Commit 38865823 authored by Loren Merritt's avatar Loren Merritt

h->mc.copy()



git-svn-id: svn://svn.videolan.org/x264/trunk@442 df754926-b1dd-0310-bc7b-ec298dee348c
parent 34cbb917
......@@ -288,20 +288,20 @@ x264_pixel_avg_weight_4x4_mmxext:
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
; void x264_mc_copy_w4_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmxext:
mov eax, parm5d ; i_height
ALIGN 4
.height_loop
mov r10d, [parm1q]
mov r11d, [parm1q+parm2q]
mov [parm3q], r10d
mov [parm3q+parm4q], r11d
lea parm1q, [parm1q+parm2q*2]
mov r10d, [parm3q]
mov r11d, [parm3q+parm4q]
mov [parm1q], r10d
mov [parm1q+parm2q], r11d
lea parm3q, [parm3q+parm4q*2]
lea parm1q, [parm1q+parm2q*2]
dec eax
dec eax
jne .height_loop
......@@ -310,27 +310,27 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
; void x264_mc_copy_w8_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w8_mmxext:
mov eax, parm5d ; i_height
lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride
lea r11, [parm4q+parm4q*2] ; 3 * i_dst_stride
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
lea r11, [parm2q+parm2q*2] ; 3 * i_dst_stride
ALIGN 4
.height_loop
movq mm0, [parm1q]
movq mm1, [parm1q+parm2q]
movq mm2, [parm1q+parm2q*2]
movq mm3, [parm1q+r10]
movq [parm3q], mm0
movq [parm3q+parm4q], mm1
movq [parm3q+parm4q*2], mm2
movq [parm3q+r11], mm3
lea parm1q, [parm1q+parm2q*4]
movq mm0, [parm3q]
movq mm1, [parm3q+parm4q]
movq mm2, [parm3q+parm4q*2]
movq mm3, [parm3q+r10]
movq [parm1q], mm0
movq [parm1q+parm2q], mm1
movq [parm1q+parm2q*2], mm2
movq [parm1q+r11], mm3
lea parm3q, [parm3q+parm4q*4]
lea parm1q, [parm1q+parm2q*4]
sub eax, byte 4
jnz .height_loop
......@@ -339,35 +339,35 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
; void x264_mc_copy_w16_mmxext( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_mmxext:
mov eax, parm5d ; i_height
lea r10, [parm2q+parm2q*2] ; 3 * i_src_stride
lea r11, [parm4q+parm4q*2] ; 3 * i_dst_stride
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
lea r11, [parm2q+parm2q*2] ; 3 * i_dst_stride
ALIGN 4
.height_loop
movq mm0, [parm1q]
movq mm1, [parm1q+8]
movq mm2, [parm1q+parm2q]
movq mm3, [parm1q+parm2q+8]
movq mm4, [parm1q+parm2q*2]
movq mm5, [parm1q+parm2q*2+8]
movq mm6, [parm1q+r10]
movq mm7, [parm1q+r10+8]
movq [parm3q], mm0
movq [parm3q+8], mm1
movq [parm3q+parm4q], mm2
movq [parm3q+parm4q+8], mm3
movq [parm3q+parm4q*2], mm4
movq [parm3q+parm4q*2+8], mm5
movq [parm3q+r11], mm6
movq [parm3q+r11+8], mm7
lea parm1q, [parm1q+parm2q*4]
movq mm0, [parm3q]
movq mm1, [parm3q+8]
movq mm2, [parm3q+parm4q]
movq mm3, [parm3q+parm4q+8]
movq mm4, [parm3q+parm4q*2]
movq mm5, [parm3q+parm4q*2+8]
movq mm6, [parm3q+r10]
movq mm7, [parm3q+r10+8]
movq [parm1q], mm0
movq [parm1q+8], mm1
movq [parm1q+parm2q], mm2
movq [parm1q+parm2q+8], mm3
movq [parm1q+parm2q*2], mm4
movq [parm1q+parm2q*2+8], mm5
movq [parm1q+r11], mm6
movq [parm1q+r11+8], mm7
lea parm3q, [parm3q+parm4q*4]
lea parm1q, [parm1q+parm2q*4]
sub eax, byte 4
jnz .height_loop
......@@ -376,21 +376,21 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_sse2( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_height )
; void x264_mc_copy_w16_sse2( uint8_t *dst, int i_dst_stride, uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_sse2:
mov eax, parm5d ; i_height
ALIGN 4
.height_loop
movdqu xmm0, [parm1q]
movdqu xmm1, [parm1q+parm2q]
movdqu [parm3q], xmm0
movdqu [parm3q+parm4q], xmm1
movdqu xmm0, [parm3q]
movdqu xmm1, [parm3q+parm4q]
movdqu [parm1q], xmm0
movdqu [parm1q+parm2q], xmm1
dec eax
dec eax
lea parm1q, [parm1q+parm2q*2]
lea parm3q, [parm3q+parm4q*2]
lea parm1q, [parm1q+parm2q*2]
jnz .height_loop
ret
......
......@@ -357,10 +357,10 @@ x264_mc_copy_w4_mmxext:
push esi
push edi
mov esi, [esp+16] ; src
mov edi, [esp+24] ; dst
mov ebx, [esp+20] ; i_src_stride
mov edx, [esp+28] ; i_dst_stride
mov esi, [esp+24] ; src
mov edi, [esp+16] ; dst
mov ebx, [esp+28] ; i_src_stride
mov edx, [esp+20] ; i_dst_stride
mov ecx, [esp+32] ; i_height
ALIGN 4
.height_loop
......@@ -389,10 +389,10 @@ x264_mc_copy_w8_mmxext:
push esi
push edi
mov esi, [esp+16] ; src
mov edi, [esp+24] ; dst
mov ebx, [esp+20] ; i_src_stride
mov edx, [esp+28] ; i_dst_stride
mov esi, [esp+24] ; src
mov edi, [esp+16] ; dst
mov ebx, [esp+28] ; i_src_stride
mov edx, [esp+20] ; i_dst_stride
mov ecx, [esp+32] ; i_height
ALIGN 4
.height_loop
......@@ -427,10 +427,10 @@ x264_mc_copy_w16_mmxext:
push esi
push edi
mov esi, [esp+16] ; src
mov edi, [esp+24] ; dst
mov ebx, [esp+20] ; i_src_stride
mov edx, [esp+28] ; i_dst_stride
mov esi, [esp+24] ; src
mov edi, [esp+16] ; dst
mov ebx, [esp+28] ; i_src_stride
mov edx, [esp+20] ; i_dst_stride
mov ecx, [esp+32] ; i_height
ALIGN 4
......@@ -473,10 +473,10 @@ x264_mc_copy_w16_sse2:
push esi
push edi
mov esi, [esp+16] ; src
mov edi, [esp+24] ; dst
mov ebx, [esp+20] ; i_src_stride
mov edx, [esp+28] ; i_dst_stride
mov esi, [esp+24] ; src
mov edi, [esp+16] ; dst
mov ebx, [esp+28] ; i_src_stride
mov edx, [esp+20] ; i_dst_stride
mov ecx, [esp+32] ; i_height
ALIGN 4
......
......@@ -106,13 +106,13 @@ void mc_luma_mmx( uint8_t *src[4], int i_src_stride,
{
switch(i_width) {
case 4:
x264_mc_copy_w4_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
x264_mc_copy_w4_mmxext( dst, i_dst_stride, src1, i_src_stride, i_height );
break;
case 8:
x264_mc_copy_w8_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
x264_mc_copy_w8_mmxext( dst, i_dst_stride, src1, i_src_stride, i_height );
break;
case 16:
x264_mc_copy_w16_mmxext( src1, i_src_stride, dst, i_dst_stride, i_height );
x264_mc_copy_w16_mmxext( dst, i_dst_stride, src1, i_src_stride, i_height );
break;
}
}
......@@ -186,6 +186,10 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf )
pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_mmxext;
pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext;
// avg_weight_4x8 is rare and 4x2 is not used
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmxext;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmxext;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmxext;
}
void x264_mc_sse2_init( x264_mc_functions_t *pf )
{
......
......@@ -343,6 +343,15 @@ static void motion_compensation_chroma_mmxext( uint8_t *src, int i_src_stride,
}
#endif
#define MC_COPY(W) \
static void mc_copy_w##W( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_height ) \
{ \
mc_copy( src, i_src, dst, i_dst, W, i_height ); \
}
MC_COPY( 16 )
MC_COPY( 8 )
MC_COPY( 4 )
void x264_mc_init( int cpu, x264_mc_functions_t *pf )
{
pf->mc_luma = mc_luma;
......@@ -371,6 +380,10 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->avg_weight[PIXEL_2x4] = pixel_avg_weight_2x4;
pf->avg_weight[PIXEL_2x2] = pixel_avg_weight_2x2;
pf->copy[PIXEL_16x16] = mc_copy_w16;
pf->copy[PIXEL_8x8] = mc_copy_w8;
pf->copy[PIXEL_4x4] = mc_copy_w4;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT ) {
x264_mc_mmxext_init( pf );
......
......@@ -47,6 +47,9 @@ typedef struct
void (*avg[10])( uint8_t *dst, int, uint8_t *src, int );
void (*avg_weight[10])( uint8_t *dst, int, uint8_t *src, int, int i_weight );
/* only 16x16, 8x8, and 4x4 defined */
void (*copy[7])( uint8_t *dst, int, uint8_t *src, int, int i_height );
} x264_mc_functions_t;
void x264_mc_init( int cpu, x264_mc_functions_t *pf );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment