Commit fdb64099 authored by Loren Merritt's avatar Loren Merritt

some mmxext functions really only required mmx.



git-svn-id: svn://svn.videolan.org/x264/trunk@470 df754926-b1dd-0310-bc7b-ec298dee348c
parent abffd18f
......@@ -157,13 +157,13 @@ pw_32: times 8 dw 32
SECTION .text
cglobal x264_dct4x4dc_mmxext
cglobal x264_dct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void dct4x4dc( int16_t d[4][4] )
; void x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_dct4x4dc_mmxext:
x264_dct4x4dc_mmx:
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
......@@ -192,13 +192,13 @@ x264_dct4x4dc_mmxext:
movq [parm1q+24],mm4
ret
cglobal x264_idct4x4dc_mmxext
cglobal x264_idct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_idct4x4dc_mmxext( int16_t d[4][4] )
; void x264_idct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_idct4x4dc_mmxext:
x264_idct4x4dc_mmx:
movq mm0, [parm1q+ 0]
movq mm1, [parm1q+ 8]
movq mm2, [parm1q+16]
......@@ -218,13 +218,13 @@ x264_idct4x4dc_mmxext:
movq [parm1q+24], mm4
ret
cglobal x264_sub4x4_dct_mmxext
cglobal x264_sub4x4_dct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
; void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;-----------------------------------------------------------------------------
x264_sub4x4_dct_mmxext:
x264_sub4x4_dct_mmx:
firstpush rbx
pushreg rbx
endprolog
......@@ -272,13 +272,13 @@ x264_sub4x4_dct_mmxext:
ret
endfunc
cglobal x264_add4x4_idct_mmxext
cglobal x264_add4x4_idct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
; void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
x264_add4x4_idct_mmxext:
x264_add4x4_idct_mmx:
; Load dct coeffs
movq mm0, [parm3q+ 0] ; dct
movq mm1, [parm3q+ 8]
......
......@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext
cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext
cglobal x264_mc_copy_w4_mmxext
cglobal x264_mc_copy_w8_mmxext
cglobal x264_mc_copy_w16_mmxext
cglobal x264_mc_copy_w4_mmx
cglobal x264_mc_copy_w8_mmx
cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext
......@@ -288,10 +288,10 @@ x264_pixel_avg_weight_4x4_mmxext:
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *dst, int i_dst_stride,
; void x264_mc_copy_w4_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmxext:
x264_mc_copy_w4_mmx:
mov eax, parm5d ; i_height
ALIGN 4
......@@ -310,10 +310,10 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w8_mmxext( uint8_t *dst, int i_dst_stride,
; void x264_mc_copy_w8_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w8_mmxext:
x264_mc_copy_w8_mmx:
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
......@@ -339,10 +339,10 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_mmxext( uint8_t *dst, int i_dst_stride,
; void x264_mc_copy_w16_mmx( uint8_t *dst, int i_dst_stride,
; uint8_t *src, int i_src_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_mmxext:
x264_mc_copy_w16_mmx:
mov eax, parm5d ; i_height
lea r10, [parm4q+parm4q*2] ; 3 * i_src_stride
......
......@@ -276,9 +276,7 @@ x264_center_filter_mmxext :
jnz .loopcx2
add r10, r11 ; dst2 += dst2_stride
dec r15 ; height
test r15, r15
jnz .loopcy
lea rsp, [rbp]
......@@ -326,7 +324,6 @@ x264_horizontal_filter_mmxext :
loophy:
dec rcx
xor rax, rax
loophx:
......@@ -365,7 +362,7 @@ loophx:
add rdx, r11 ; src_pitch
add r9, r10 ; dst_pitch
test rcx, rcx
dec rcx
jnz loophy
ret
......@@ -266,13 +266,13 @@ cglobal x264_pixel_sad_pde_16x16_mmxext
cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext
cglobal x264_pixel_ssd_16x16_mmxext
cglobal x264_pixel_ssd_16x8_mmxext
cglobal x264_pixel_ssd_8x16_mmxext
cglobal x264_pixel_ssd_8x8_mmxext
cglobal x264_pixel_ssd_8x4_mmxext
cglobal x264_pixel_ssd_4x8_mmxext
cglobal x264_pixel_ssd_4x4_mmxext
cglobal x264_pixel_ssd_16x16_mmx
cglobal x264_pixel_ssd_16x8_mmx
cglobal x264_pixel_ssd_8x16_mmx
cglobal x264_pixel_ssd_8x8_mmx
cglobal x264_pixel_ssd_8x4_mmx
cglobal x264_pixel_ssd_4x8_mmx
cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
......@@ -470,22 +470,22 @@ x264_pixel_sad_pde_8x16_mmxext:
ALIGN 16
;-----------------------------------------------------------------------------
; int x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
; int x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_ssd_16x16_mmxext:
x264_pixel_ssd_16x16_mmx:
SSD_START
SSD_INC_8x16P
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_16x8_mmxext:
x264_pixel_ssd_16x8_mmx:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_8x16_mmxext:
x264_pixel_ssd_8x16_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
......@@ -494,27 +494,27 @@ x264_pixel_ssd_8x16_mmxext:
SSD_END
ALIGN 16
x264_pixel_ssd_8x8_mmxext:
x264_pixel_ssd_8x8_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_8x4_mmxext:
x264_pixel_ssd_8x4_mmx:
SSD_START
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_4x8_mmxext:
x264_pixel_ssd_4x8_mmx:
SSD_START
SSD_INC_4x4P
SSD_INC_4x4P
SSD_END
ALIGN 16
x264_pixel_ssd_4x4_mmxext:
x264_pixel_ssd_4x4_mmx:
SSD_START
SSD_INC_4x4P
SSD_END
......
......@@ -397,23 +397,20 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->idct2x2dc = dct2x2dc;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT )
if( cpu&X264_CPU_MMX )
{
dctf->sub4x4_dct = x264_sub4x4_dct_mmxext;
dctf->sub8x8_dct = x264_sub8x8_dct_mmxext;
dctf->sub16x16_dct = x264_sub16x16_dct_mmxext;
dctf->sub4x4_dct = x264_sub4x4_dct_mmx;
dctf->sub8x8_dct = x264_sub8x8_dct_mmx;
dctf->sub16x16_dct = x264_sub16x16_dct_mmx;
dctf->add4x4_idct = x264_add4x4_idct_mmxext;
dctf->add8x8_idct = x264_add8x8_idct_mmxext;
dctf->add16x16_idct = x264_add16x16_idct_mmxext;
dctf->add4x4_idct = x264_add4x4_idct_mmx;
dctf->add8x8_idct = x264_add8x8_idct_mmx;
dctf->add16x16_idct = x264_add16x16_idct_mmx;
dctf->dct4x4dc = x264_dct4x4dc_mmxext;
dctf->idct4x4dc = x264_idct4x4dc_mmxext;
}
dctf->dct4x4dc = x264_dct4x4dc_mmx;
dctf->idct4x4dc = x264_idct4x4dc_mmx;
#ifndef ARCH_X86_64
if( cpu&X264_CPU_MMX )
{
dctf->sub8x8_dct8 = x264_sub8x8_dct8_mmx;
dctf->sub16x16_dct8 = x264_sub16x16_dct8_mmx;
......@@ -433,6 +430,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->add16x16_idct8= x264_add16x16_idct8_sse2;
}
#endif
/* FIXME altivec dct is not transposed yet
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
......
......@@ -143,13 +143,13 @@ x264_mmx_p2n2p1p1: dw 2, -2, 1, 1
SECTION .text
cglobal x264_dct4x4dc_mmxext
cglobal x264_dct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl dct4x4dc( int16_t d[4][4] )
; void __cdecl x264_dct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_dct4x4dc_mmxext:
x264_dct4x4dc_mmx:
mov eax, [esp+ 4]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
......@@ -183,13 +183,13 @@ x264_dct4x4dc_mmxext:
picpop ebx
ret
cglobal x264_idct4x4dc_mmxext
cglobal x264_idct4x4dc_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl x264_idct4x4dc_mmxext( int16_t d[4][4] )
; void __cdecl x264_idct4x4dc_mmx( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_idct4x4dc_mmxext:
x264_idct4x4dc_mmx:
mov eax, [esp+ 4]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
......@@ -210,13 +210,13 @@ x264_idct4x4dc_mmxext:
movq [eax+24], mm4
ret
cglobal x264_sub4x4_dct_mmxext
cglobal x264_sub4x4_dct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
; void __cdecl x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
;-----------------------------------------------------------------------------
x264_sub4x4_dct_mmxext:
x264_sub4x4_dct_mmx:
push ebx
mov eax, [esp+12] ; pix1
mov ebx, [esp+16] ; i_pix1
......@@ -255,13 +255,13 @@ x264_sub4x4_dct_mmxext:
pop ebx
ret
cglobal x264_add4x4_idct_mmxext
cglobal x264_add4x4_idct_mmx
ALIGN 16
;-----------------------------------------------------------------------------
; void __cdecl x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
; void __cdecl x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
x264_add4x4_idct_mmxext:
x264_add4x4_idct_mmx:
; Load dct coeffs
mov eax, [esp+12] ; dct
movq mm0, [eax+ 0]
......
......@@ -34,20 +34,20 @@
#include "dct.h"
void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
x264_sub4x4_dct_mmxext( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub4x4_dct_mmxext( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 );
x264_sub4x4_dct_mmxext( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 );
x264_sub4x4_dct_mmxext( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 );
x264_sub4x4_dct_mmx( dct[0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub4x4_dct_mmx( dct[1], &pix1[4], i_pix1, &pix2[4], i_pix2 );
x264_sub4x4_dct_mmx( dct[2], &pix1[4*i_pix1+0], i_pix1, &pix2[4*i_pix2+0], i_pix2 );
x264_sub4x4_dct_mmx( dct[3], &pix1[4*i_pix1+4], i_pix1, &pix2[4*i_pix2+4], i_pix2 );
}
void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
x264_sub8x8_dct_mmxext( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
x264_sub8x8_dct_mmxext( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
x264_sub8x8_dct_mmx( &dct[ 0], &pix1[0], i_pix1, &pix2[0], i_pix2 );
x264_sub8x8_dct_mmx( &dct[ 4], &pix1[8], i_pix1, &pix2[8], i_pix2 );
x264_sub8x8_dct_mmx( &dct[ 8], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
x264_sub8x8_dct_mmx( &dct[12], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
}
......@@ -56,20 +56,20 @@ void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1,
* addXxX_idct:
****************************************************************************/
void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] )
void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] )
{
x264_add4x4_idct_mmxext( p_dst, i_dst, dct[0] );
x264_add4x4_idct_mmxext( &p_dst[4], i_dst, dct[1] );
x264_add4x4_idct_mmxext( &p_dst[4*i_dst+0], i_dst, dct[2] );
x264_add4x4_idct_mmxext( &p_dst[4*i_dst+4], i_dst, dct[3] );
x264_add4x4_idct_mmx( p_dst, i_dst, dct[0] );
x264_add4x4_idct_mmx( &p_dst[4], i_dst, dct[1] );
x264_add4x4_idct_mmx( &p_dst[4*i_dst+0], i_dst, dct[2] );
x264_add4x4_idct_mmx( &p_dst[4*i_dst+4], i_dst, dct[3] );
}
void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
{
x264_add8x8_idct_mmxext( &p_dst[0], i_dst, &dct[0] );
x264_add8x8_idct_mmxext( &p_dst[8], i_dst, &dct[4] );
x264_add8x8_idct_mmxext( &p_dst[8*i_dst], i_dst, &dct[8] );
x264_add8x8_idct_mmxext( &p_dst[8*i_dst+8], i_dst, &dct[12] );
x264_add8x8_idct_mmx( &p_dst[0], i_dst, &dct[0] );
x264_add8x8_idct_mmx( &p_dst[8], i_dst, &dct[4] );
x264_add8x8_idct_mmx( &p_dst[8*i_dst], i_dst, &dct[8] );
x264_add8x8_idct_mmx( &p_dst[8*i_dst+8], i_dst, &dct[12] );
}
/***********************
......
......@@ -24,16 +24,16 @@
#ifndef _I386_DCT_H
#define _I386_DCT_H 1
void x264_sub4x4_dct_mmxext( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub8x8_dct_mmxext( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct_mmxext( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub4x4_dct_mmx( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub8x8_dct_mmx( int16_t dct[4][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct_mmx( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] );
void x264_add8x8_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] );
void x264_add16x16_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
void x264_add4x4_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4] );
void x264_add8x8_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[4][4][4] );
void x264_add16x16_idct_mmx( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
void x264_dct4x4dc_mmxext( int16_t d[4][4] );
void x264_idct4x4dc_mmxext( int16_t d[4][4] );
void x264_dct4x4dc_mmx( int16_t d[4][4] );
void x264_idct4x4dc_mmx( int16_t d[4][4] );
void x264_sub8x8_dct8_mmx( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void x264_sub16x16_dct8_mmx( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
......
......@@ -65,9 +65,9 @@ cglobal x264_pixel_avg_weight_4x4_mmxext
cglobal x264_pixel_avg_weight_w8_mmxext
cglobal x264_pixel_avg_weight_w16_mmxext
cglobal x264_mc_copy_w4_mmxext
cglobal x264_mc_copy_w8_mmxext
cglobal x264_mc_copy_w16_mmxext
cglobal x264_mc_copy_w4_mmx
cglobal x264_mc_copy_w8_mmx
cglobal x264_mc_copy_w16_mmx
cglobal x264_mc_copy_w16_sse2
cglobal x264_mc_chroma_mmxext
......@@ -345,10 +345,10 @@ x264_pixel_avg_weight_4x4_mmxext:
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w4_mmxext( uint8_t *src, int i_src_stride,
; void x264_mc_copy_w4_mmx( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w4_mmxext:
x264_mc_copy_w4_mmx:
push ebx
push esi
push edi
......@@ -377,10 +377,10 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w8_mmxext( uint8_t *src, int i_src_stride,
; void x264_mc_copy_w8_mmx( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w8_mmxext:
x264_mc_copy_w8_mmx:
push ebx
push esi
push edi
......@@ -415,10 +415,10 @@ ALIGN 4
ALIGN 16
;-----------------------------------------------------------------------------
; void x264_mc_copy_w16_mmxext( uint8_t *src, int i_src_stride,
; void x264_mc_copy_w16_mmx( uint8_t *src, int i_src_stride,
; uint8_t *dst, int i_dst_stride, int i_height )
;-----------------------------------------------------------------------------
x264_mc_copy_w16_mmxext:
x264_mc_copy_w16_mmx:
push ebx
push esi
push edi
......
......@@ -287,10 +287,7 @@ loopcx2:
add edi, [picesp + tdstp2]
mov [picesp + tdst2], edi
mov ebp, [picesp + theight]
dec ebp
test ebp, ebp
mov [picesp + theight], ebp
dec dword [picesp + theight]
jnz loopcy
picpop ebx
......@@ -332,7 +329,6 @@ x264_horizontal_filter_mmxext :
loophy:
dec ecx
xor eax, eax
loophx:
......@@ -371,7 +367,7 @@ loophx:
add esi, [esp + 24] ; src_pitch
add edi, [esp + 16] ; dst_pitch
test ecx, ecx
dec ecx
jnz loophy
pop esi
......
......@@ -35,9 +35,9 @@ extern void x264_pixel_avg_w16_sse2( uint8_t *, int, uint8_t *, int, uint8_t *,
extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_mc_copy_w4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w4_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w8_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_mmx( uint8_t *, int, uint8_t *, int, int );
extern void x264_mc_copy_w16_sse2( uint8_t *, int, uint8_t *, int, int );
#define AVG(W,H) \
......@@ -73,13 +73,13 @@ static void (* const x264_pixel_avg_wtab_mmxext[5])( uint8_t *, int, uint8_t *,
NULL,
x264_pixel_avg_w16_mmxext
};
static void (* const x264_mc_copy_wtab_mmxext[5])( uint8_t *, int, uint8_t *, int, int ) =
static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int, int ) =
{
NULL,
x264_mc_copy_w4_mmxext,
x264_mc_copy_w8_mmxext,
x264_mc_copy_w4_mmx,
x264_mc_copy_w8_mmx,
NULL,
x264_mc_copy_w16_mmxext
x264_mc_copy_w16_mmx
};
static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
......@@ -102,7 +102,7 @@ void mc_luma_mmx( uint8_t *src[4], int i_src_stride,
}
else
{
x264_mc_copy_wtab_mmxext[i_width>>2](
x264_mc_copy_wtab_mmx[i_width>>2](
dst, i_dst_stride, src1, i_src_stride, i_height );
}
}
......@@ -155,9 +155,9 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf )
pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext;
// avg_weight_4x8 is rare and 4x2 is not used
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmxext;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmxext;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmxext;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
}
void x264_mc_sse2_init( x264_mc_functions_t *pf )
{
......
......@@ -266,13 +266,13 @@ cglobal x264_pixel_sad_pde_16x16_mmxext
cglobal x264_pixel_sad_pde_16x8_mmxext
cglobal x264_pixel_sad_pde_8x16_mmxext
cglobal x264_pixel_ssd_16x16_mmxext
cglobal x264_pixel_ssd_16x8_mmxext
cglobal x264_pixel_ssd_8x16_mmxext
cglobal x264_pixel_ssd_8x8_mmxext
cglobal x264_pixel_ssd_8x4_mmxext
cglobal x264_pixel_ssd_4x8_mmxext
cglobal x264_pixel_ssd_4x4_mmxext
cglobal x264_pixel_ssd_16x16_mmx
cglobal x264_pixel_ssd_16x8_mmx
cglobal x264_pixel_ssd_8x16_mmx
cglobal x264_pixel_ssd_8x8_mmx
cglobal x264_pixel_ssd_8x4_mmx
cglobal x264_pixel_ssd_4x8_mmx
cglobal x264_pixel_ssd_4x4_mmx
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
......@@ -473,22 +473,22 @@ x264_pixel_sad_pde_8x16_mmxext:
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
; int __cdecl x264_pixel_ssd_16x16_mmx (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_ssd_16x16_mmxext:
x264_pixel_ssd_16x16_mmx:
SSD_START
SSD_INC_8x16P
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_16x8_mmxext:
x264_pixel_ssd_16x8_mmx:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_8x16_mmxext:
x264_pixel_ssd_8x16_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
......@@ -497,27 +497,27 @@ x264_pixel_ssd_8x16_mmxext:
SSD_END
ALIGN 16
x264_pixel_ssd_8x8_mmxext:
x264_pixel_ssd_8x8_mmx:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_8x4_mmxext:
x264_pixel_ssd_8x4_mmx:
SSD_START
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_4x8_mmxext:
x264_pixel_ssd_4x8_mmx:
SSD_START
SSD_INC_4x4P
SSD_INC_4x4P