Commit fdb64099 authored by Loren Merritt's avatar Loren Merritt

some mmxext functions really only required mmx.



git-svn-id: svn://svn.videolan.org/x264/trunk@470 df754926-b1dd-0310-bc7b-ec298dee348c
parent abffd18f
...@@ -157,13 +157,13 @@ pw_32: times 8 dw 32 ...@@ -157,13 +157,13 @@ pw_32: times 8 dw 32
SECTION .text SECTION .text
cglobal x264_dct4x4dc_mmxext cglobal x264_dct4x4dc_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void dct4x4dc( int16_t d[4][4] ) ; void x264_dct4x4dc_mmx( int16_t d[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_dct4x4dc_mmxext: x264_dct4x4dc_mmx:
movq mm0, [parm1q+ 0] movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8] movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16] movq mm2, [parm1q+16]
...@@ -192,13 +192,13 @@ x264_dct4x4dc_mmxext: ...@@ -192,13 +192,13 @@ x264_dct4x4dc_mmxext:
movq [parm1q+24],mm4 movq [parm1q+24],mm4
ret ret
cglobal x264_idct4x4dc_mmxext cglobal x264_idct4x4dc_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_idct4x4dc_mmxext( int16_t d[4][4] ) ; void x264_idct4x4dc_mmx( int16_t d[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_idct4x4dc_mmxext: x264_idct4x4dc_mmx:
movq mm0, [parm1q+ 0] movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8] movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16] movq mm2, [parm1q+16]
...@@ -218,13 +218,13 @@ x264_idct4x4dc_mmxext: ...@@ -218,13 +218,13 @@ x264_idct4x4dc_mmxext:
movq [parm1q+24], mm4 movq [parm1q+24], mm4
ret ret
cglobal x264_sub4x4_dct_mmxext cglobal x264_sub4x4_dct_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) ; void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_sub4x4_dct_mmxext: x264_sub4x4_dct_mmx:
firstpush rbx firstpush rbx
pushreg rbx pushreg rbx
endprolog endprolog
...@@ -272,13 +272,13 @@ x264_sub4x4_dct_mmxext: ...@@ -272,13 +272,13 @@ x264_sub4x4_dct_mmxext:
ret ret
endfunc endfunc
cglobal x264_add4x4_idct_mmxext cglobal x264_add4x4_idct_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] ) ; void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_add4x4_idct_mmxext: x264_add4x4_idct_mmx:
; Load dct coeffs ; Load dct coeffs
movq mm0, [parm3q+ 0] ; dct movq mm0, [parm3q+ 0] ; dct
movq mm1, [parm3q+ 8] movq mm1, [parm3q+ 8]
......
...@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext ...@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext
cglobal x264_pixel_avg_weight_w8_mmxext cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext cglobal x264_pixel_avg_weight_w16_mmxext
cglobal x264_mc_copy_w4_mmxext cglobal x264_mc_copy_w4_mmx
cglobal x264_mc_copy_w8_mmxext cglobal x264_mc_copy_w8_mmx
cglobal x264_mc_copy_w16_mmxext cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2 cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext cglobal x264_mc_chroma_mmxext
...@@ -288,10 +288,10 @@ x264_pixel_avg_weight_4x4_mmxext: ...@@ -288,10 +288,10 @@ x264_pixel_avg_weight_4x4_mmxext:
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *dst, int i_dst_stride, ; void x264_mc_copy_w4_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height ) ; uint8_t *src, int i_src_stride, int i_height )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmxext: x264_mc_copy_w4_mmx:
mov eax, parm5d ; i_height mov eax, parm5d ; i_height
ALIGN 4 ALIGN 4
...@@ -310,10 +310,10 @@ ALIGN 4 ...@@ -310,10 +310,10 @@ ALIGN 4
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_mc_copy_w8_mmxext( uint8_t *dst, int i_dst_stride, ; void x264_mc_copy_w8_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height ) ; uint8_t *src, int i_src_stride, int i_height )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_mc_copy_w8_mmxext: x264_mc_copy_w8_mmx:
mov eax, parm5d ; i_height mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
...@@ -339,10 +339,10 @@ ALIGN 4 ...@@ -339,10 +339,10 @@ ALIGN 4
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_mmxext( uint8_t *dst, int i_dst_stride, ; void x264_mc_copy_w16_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height ) ; uint8_t *src, int i_src_stride, int i_height )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_mc_copy_w16_mmxext: x264_mc_copy_w16_mmx:
mov eax, parm5d ; i_height mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
......
...@@ -276,9 +276,7 @@ x264_center_filter_mmxext : ...@@ -276,9 +276,7 @@ x264_center_filter_mmxext :
jnz .loopcx2 jnz .loopcx2
add r10, r11 ; dst2 += dst2_stride add r10, r11 ; dst2 += dst2_stride
dec r15 ; height dec r15 ; height
test r15, r15
jnz .loopcy jnz .loopcy
lea rsp, [rbp] lea rsp, [rbp]
...@@ -326,7 +324,6 @@ x264_horizontal_filter_mmxext : ...@@ -326,7 +324,6 @@ x264_horizontal_filter_mmxext :
loophy: loophy:
dec rcx
xor rax, rax xor rax, rax
loophx: loophx:
...@@ -365,7 +362,7 @@ loophx: ...@@ -365,7 +362,7 @@ loophx:
add rdx, r11 ; src_pitch add rdx, r11 ; src_pitch
add r9, r10 ; dst_pitch add r9, r10 ; dst_pitch
test rcx, rcx dec rcx
jnz loophy jnz loophy
ret ret
...@@ -266,13 +266,13 @@ cglobal x264_pixel_sad_pde_16x16_mmxext ...@@ -266,13 +266,13 @@ cglobal x264_pixel_sad_pde_16x16_mmxext
cglobal x264_pixel_sad_pde_16x8_mmxext cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext cglobal x264_pixel_sad_pde_8x16_mmxext
cglobal x264_pixel_ssd_16x16_mmxext cglobal x264_pixel_ssd_16x16_mmx
cglobal x264_pixel_ssd_16x8_mmxext cglobal x264_pixel_ssd_16x8_mmx
cglobal x264_pixel_ssd_8x16_mmxext cglobal x264_pixel_ssd_8x16_mmx
cglobal x264_pixel_ssd_8x8_mmxext cglobal x264_pixel_ssd_8x8_mmx
cglobal x264_pixel_ssd_8x4_mmxext cglobal x264_pixel_ssd_8x4_mmx
cglobal x264_pixel_ssd_4x8_mmxext cglobal x264_pixel_ssd_4x8_mmx
cglobal x264_pixel_ssd_4x4_mmxext cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext cglobal x264_pixel_satd_4x8_mmxext
...@@ -470,22 +470,22 @@ x264_pixel_sad_pde_8x16_mmxext: ...@@ -470,22 +470,22 @@ x264_pixel_sad_pde_8x16_mmxext:
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; int x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int ) ; int x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_pixel_ssd_16x16_mmxext: x264_pixel_ssd_16x16_mmx:
SSD_START SSD_START
SSD_INC_8x16P SSD_INC_8x16P
SSD_INC_8x16P SSD_INC_8x16P
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_16x8_mmxext: x264_pixel_ssd_16x8_mmx:
SSD_START SSD_START
SSD_INC_8x16P SSD_INC_8x16P
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_8x16_mmxext: x264_pixel_ssd_8x16_mmx:
SSD_START SSD_START
SSD_INC_4x8P SSD_INC_4x8P
SSD_INC_4x8P SSD_INC_4x8P
...@@ -494,27 +494,27 @@ x264_pixel_ssd_8x16_mmxext: ...@@ -494,27 +494,27 @@ x264_pixel_ssd_8x16_mmxext:
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_8x8_mmxext: x264_pixel_ssd_8x8_mmx:
SSD_START SSD_START
SSD_INC_4x8P SSD_INC_4x8P
SSD_INC_4x8P SSD_INC_4x8P
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_8x4_mmxext: x264_pixel_ssd_8x4_mmx:
SSD_START SSD_START
SSD_INC_4x8P SSD_INC_4x8P
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_4x8_mmxext: x264_pixel_ssd_4x8_mmx:
SSD_START SSD_START
SSD_INC_4x4P SSD_INC_4x4P
SSD_INC_4x4P SSD_INC_4x4P
SSD_END SSD_END
ALIGN 16 ALIGN 16
x264_pixel_ssd_4x4_mmxext: x264_pixel_ssd_4x4_mmx:
SSD_START SSD_START
SSD_INC_4x4P SSD_INC_4x4P
SSD_END SSD_END
......
...@@ -397,23 +397,20 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) ...@@ -397,23 +397,20 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->idct2x2dc = dct2x2dc; dctf->idct2x2dc = dct2x2dc;
#ifdef HAVE_MMXEXT #ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT ) if( cpu&X264_CPU_MMX )
{ {
dctf->sub4x4_dct = x264_sub4x4_dct_mmxext; dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
dctf->sub8x8_dct = x264_sub8x8_dct_mmxext; dctf->sub8x8_dct = x264_sub8x8_dct_mmx;
dctf->sub16x16_dct = x264_sub16x16_dct_mmxext; dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
dctf->add4x4_idct = x264_add4x4_idct_mmxext; dctf->add4x4_idct = x264_add4x4_idct_mmx;
dctf->add8x8_idct = x264_add8x8_idct_mmxext; dctf->add8x8_idct = x264_add8x8_idct_mmx;
dctf->add16x16_idct = x264_add16x16_idct_mmxext; dctf->add16x16_idct = x264_add16x16_idct_mmx;
dctf->dct4x4dc = x264_dct4x4dc_mmxext; dctf->dct4x4dc = x264_dct4x4dc_mmx;
dctf->idct4x4dc = x264_idct4x4dc_mmxext; dctf->idct4x4dc = x264_idct4x4dc_mmx;
}
#ifndef ARCH_X86_64 #ifndef ARCH_X86_64
if( cpu&X264_CPU_MMX )
{
dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx; dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx;
dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx; dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
...@@ -433,6 +430,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) ...@@ -433,6 +430,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->add16x16_idct8= x264_add16x16_idct8_sse2; dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
} }
#endif #endif
/* FIXME altivec dct is not transposed yet /* FIXME altivec dct is not transposed yet
#ifdef ARCH_PPC #ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC ) if( cpu&X264_CPU_ALTIVEC )
......
...@@ -143,13 +143,13 @@ x264_mmx_p2n2p1p1: dw 2, -2, 1, 1 ...@@ -143,13 +143,13 @@ x264_mmx_p2n2p1p1: dw 2, -2, 1, 1
SECTION .text SECTION .text
cglobal x264_dct4x4dc_mmxext cglobal x264_dct4x4dc_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void __cdecl dct4x4dc( int16_t d[4][4] ) ; void __cdecl x264_dct4x4dc_mmx( int16_t d[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_dct4x4dc_mmxext: x264_dct4x4dc_mmx:
mov eax, [esp+ 4] mov eax, [esp+ 4]
movq mm0, [eax+ 0] movq mm0, [eax+ 0]
movq mm1, [eax+ 8] movq mm1, [eax+ 8]
...@@ -183,13 +183,13 @@ x264_dct4x4dc_mmxext: ...@@ -183,13 +183,13 @@ x264_dct4x4dc_mmxext:
picpop ebx picpop ebx
ret ret
cglobal x264_idct4x4dc_mmxext cglobal x264_idct4x4dc_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void __cdecl x264_idct4x4dc_mmxext( int16_t d[4][4] ) ; void __cdecl x264_idct4x4dc_mmx( int16_t d[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_idct4x4dc_mmxext: x264_idct4x4dc_mmx:
mov eax, [esp+ 4] mov eax, [esp+ 4]
movq mm0, [eax+ 0] movq mm0, [eax+ 0]
movq mm1, [eax+ 8] movq mm1, [eax+ 8]
...@@ -210,13 +210,13 @@ x264_idct4x4dc_mmxext: ...@@ -210,13 +210,13 @@ x264_idct4x4dc_mmxext:
movq [eax+24], mm4 movq [eax+24], mm4
ret ret
cglobal x264_sub4x4_dct_mmxext cglobal x264_sub4x4_dct_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void __cdecl x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) ; void __cdecl x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_sub4x4_dct_mmxext: x264_sub4x4_dct_mmx:
push ebx push ebx
mov eax, [esp+12] ; pix1 mov eax, [esp+12] ; pix1
mov ebx, [esp+16] ; i_pix1 mov ebx, [esp+16] ; i_pix1
...@@ -255,13 +255,13 @@ x264_sub4x4_dct_mmxext: ...@@ -255,13 +255,13 @@ x264_sub4x4_dct_mmxext:
pop ebx pop ebx
ret ret
cglobal x264_add4x4_idct_mmxext cglobal x264_add4x4_idct_mmx
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void __cdecl x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] ) ; void __cdecl x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_add4x4_idct_mmxext: x264_add4x4_idct_mmx:
; Load dct coeffs ; Load dct coeffs
mov eax, [esp+12] ; dct mov eax, [esp+12] ; dct
movq mm0, [eax+ 0] movq mm0, [eax+ 0]
......
...@@ -34,20 +34,20 @@ ...@@ -34,20 +34,20 @@
#include "dct.h" #include "dct.h"
void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{ {
x264_sub4x4_dct_mmxext( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 ); x264_sub4x4_dct_mmx( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub4x4_dct_mmxext( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 ); x264_sub4x4_dct_mmx( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 );
x264_sub4x4_dct_mmxext( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 ); x264_sub4x4_dct_mmx( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 );
x264_sub4x4_dct_mmxext( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 ); x264_sub4x4_dct_mmx( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 );
} }
void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ) void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{ {
x264_sub8x8_dct_mmxext( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 ); x264_sub8x8_dct_mmx( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 ); x264_sub8x8_dct_mmx( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 ); x264_sub8x8_dct_mmx( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 ); x264_sub8x8_dct_mmx( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
} }
...@@ -56,20 +56,20 @@ void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, ...@@ -56,20 +56,20 @@ void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1,
* addXxX_idct: * addXxX_idct:
****************************************************************************/ ****************************************************************************/
void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] ) void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] )
{ {
x264_add4x4_idct_mmxext( p_dst, i_dst, dct[0] ); x264_add4x4_idct_mmx( p_dst, i_dst, dct[0] );
x264_add4x4_idct_mmxext( &p_dst[4], i_dst, dct[1] ); x264_add4x4_idct_mmx( &p_dst[4], i_dst, dct[1] );
x264_add4x4_idct_mmxext( &p_dst[4*i_dst+0], i_dst, dct[2] ); x264_add4x4_idct_mmx( &p_dst[4*i_dst+0], i_dst, dct[2] );
x264_add4x4_idct_mmxext( &p_dst[4*i_dst+4], i_dst, dct[3] ); x264_add4x4_idct_mmx( &p_dst[4*i_dst+4], i_dst, dct[3] );
} }
void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] ) void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
{ {
x264_add8x8_idct_mmxext( &p_dst[0], i_dst, &dct[0] ); x264_add8x8_idct_mmx( &p_dst[0], i_dst, &dct[0] );
x264_add8x8_idct_mmxext( &p_dst[8], i_dst, &dct[4] ); x264_add8x8_idct_mmx( &p_dst[8], i_dst, &dct[4] );
x264_add8x8_idct_mmxext( &p_dst[8*i_dst], i_dst, &dct[8] ); x264_add8x8_idct_mmx( &p_dst[8*i_dst], i_dst, &dct[8] );
x264_add8x8_idct_mmxext( &p_dst[8*i_dst+8], i_dst, &dct[12] ); x264_add8x8_idct_mmx( &p_dst[8*i_dst+8], i_dst, &dct[12] );
} }
/*********************** /***********************
......
...@@ -24,16 +24,16 @@ ...@@ -24,16 +24,16 @@
#ifndef _I386_DCT_H #ifndef _I386_DCT_H
#define _I386_DCT_H 1 #define _I386_DCT_H 1
void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] ); void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] );
void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] ); void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] );
void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] ); void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
void x264_dct4x4dc_mmxext( int16_t d[4][4] ); void x264_dct4x4dc_mmx( int16_t d[4][4] );
void x264_idct4x4dc_mmxext( int16_t d[4][4] ); void x264_idct4x4dc_mmx( int16_t d[4][4] );
void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct8_mmx( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void x264_sub16x16_dct8_mmx( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
......
...@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext ...@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext
cglobal x264_pixel_avg_weight_w8_mmxext cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext cglobal x264_pixel_avg_weight_w16_mmxext
cglobal x264_mc_copy_w4_mmxext cglobal x264_mc_copy_w4_mmx
cglobal x264_mc_copy_w8_mmxext cglobal x264_mc_copy_w8_mmx
cglobal x264_mc_copy_w16_mmxext cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2 cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext cglobal x264_mc_chroma_mmxext
...@@ -345,10 +345,10 @@ x264_pixel_avg_weight_4x4_mmxext: ...@@ -345,10 +345,10 @@ x264_pixel_avg_weight_4x4_mmxext:
ALIGN 16 ALIGN 16
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride, ; void x264_mc_copy_w4_mmx( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height ) ; uint8_t *dst, int i_dst_stride, int i_height )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmxext: x264_mc_copy_w4_mmx:
push ebx push ebx
push esi push esi
push edi push edi
...@@ -377,10 +377,10 @@ ALIGN 4 ...@@ -377,10 +377,10 @@ ALIGN 4
ALIGN 16