Commit 73522c84 authored by Loren Merritt's avatar Loren Merritt

SSD comparison function (not yet used).

Cosmetics in mmx SAD.


git-svn-id: svn://svn.videolan.org/x264/trunk@165 df754926-b1dd-0310-bc7b-ec298dee348c
parent c68f34e5
......@@ -91,6 +91,102 @@ BITS 32
lea ecx, [ecx+2*edx]
%endmacro
%macro SSD_INC_1x16P 0
movq mm1, [eax]
movq mm2, [ecx]
movq mm3, [eax+8]
movq mm4, [ecx+8]
movq mm5, mm2
movq mm6, mm4
psubusb mm2, mm1
psubusb mm4, mm3
psubusb mm1, mm5
psubusb mm3, mm6
por mm1, mm2
por mm3, mm4
movq mm2, mm1
movq mm4, mm3
punpcklbw mm1, mm7
punpcklbw mm3, mm7
punpckhbw mm2, mm7
punpckhbw mm4, mm7
pmaddwd mm1, mm1
pmaddwd mm2, mm2
pmaddwd mm3, mm3
pmaddwd mm4, mm4
add eax, ebx
add ecx, edx
paddd mm0, mm1
paddd mm0, mm2
paddd mm0, mm3
paddd mm0, mm4
%endmacro
%macro SSD_INC_1x8P 0
movq mm1, [eax]
movq mm2, [ecx]
movq mm5, mm2
psubusb mm2, mm1
psubusb mm1, mm5
por mm1, mm2 ; mm1 = 8bit abs diff
movq mm2, mm1
punpcklbw mm1, mm7
punpckhbw mm2, mm7 ; (mm1,mm2) = 16bit abs diff
pmaddwd mm1, mm1
pmaddwd mm2, mm2
add eax, ebx
add ecx, edx
paddd mm0, mm1
paddd mm0, mm2
%endmacro
%macro SSD_INC_1x4P 0
movd mm1, [eax]
movd mm2, [ecx]
movq mm5, mm2
psubusb mm2, mm1
psubusb mm1, mm5
por mm1, mm2
punpcklbw mm1, mm7
pmaddwd mm1, mm1
add eax, ebx
add ecx, edx
paddd mm0, mm1
%endmacro
%macro SSD_INC_8x16P 0
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
SSD_INC_1x16P
%endmacro
%macro SSD_INC_4x8P 0
SSD_INC_1x8P
SSD_INC_1x8P
SSD_INC_1x8P
SSD_INC_1x8P
%endmacro
%macro SSD_INC_4x4P 0
SSD_INC_1x4P
SSD_INC_1x4P
SSD_INC_1x4P
SSD_INC_1x4P
%endmacro
%macro LOAD_DIFF_4P 5 ; MMP, MMT, MMZ, [pix1], [pix2]
movd %1, %4
punpcklbw %1, %3
......@@ -203,6 +299,14 @@ cglobal x264_pixel_sad_8x4_mmxext
cglobal x264_pixel_sad_4x8_mmxext
cglobal x264_pixel_sad_4x4_mmxext
cglobal x264_pixel_ssd_16x16_mmxext
cglobal x264_pixel_ssd_16x8_mmxext
cglobal x264_pixel_ssd_8x16_mmxext
cglobal x264_pixel_ssd_8x8_mmxext
cglobal x264_pixel_ssd_8x4_mmxext
cglobal x264_pixel_ssd_4x8_mmxext
cglobal x264_pixel_ssd_4x4_mmxext
cglobal x264_pixel_satd_4x4_mmxext
cglobal x264_pixel_satd_4x8_mmxext
cglobal x264_pixel_satd_8x4_mmxext
......@@ -211,11 +315,7 @@ cglobal x264_pixel_satd_16x8_mmxext
cglobal x264_pixel_satd_8x16_mmxext
cglobal x264_pixel_satd_16x16_mmxext
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_16x16_mmxext:
%macro SAD_START 0
push ebx
mov eax, [esp+ 8] ; pix1
......@@ -224,153 +324,105 @@ x264_pixel_sad_16x16_mmxext:
mov edx, [esp+20] ; stride2
pxor mm0, mm0
%endmacro
%macro SAD_END 0
movd eax, mm0
pop ebx
ret
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_16x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_16x16_mmxext:
SAD_START
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_16x8_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_16x8_mmxext:
push ebx
mov eax, [esp+ 8] ; pix1
mov ebx, [esp+12] ; stride1
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
SAD_START
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
SAD_INC_2x16P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_8x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_8x16_mmxext:
push ebx
mov eax, [esp+ 8] ; pix1
mov ebx, [esp+12] ; stride1
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
SAD_START
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_8x8_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_8x8_mmxext:
push ebx
mov eax, [esp+ 8] ; pix1
mov ebx, [esp+12] ; stride1
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
SAD_START
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
SAD_INC_2x8P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_8x4_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_8x4_mmxext:
push ebx
mov eax, [esp+ 8] ; pix1
mov ebx, [esp+12] ; stride1
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
SAD_START
SAD_INC_2x8P
SAD_INC_2x8P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_4x8_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_4x8_mmxext:
push ebx
mov eax, [esp+ 8] ; pix1
mov ebx, [esp+12] ; stride1
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
SAD_START
SAD_INC_2x4P
SAD_INC_2x4P
SAD_INC_2x4P
SAD_INC_2x4P
movd eax, mm0
pop ebx
ret
SAD_END
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_sad_4x4_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_sad_4x4_mmxext:
SAD_START
SAD_INC_2x4P
SAD_INC_2x4P
SAD_END
%macro SSD_START 0
push ebx
mov eax, [esp+ 8] ; pix1
......@@ -378,15 +430,72 @@ x264_pixel_sad_4x4_mmxext:
mov ecx, [esp+16] ; pix2
mov edx, [esp+20] ; stride2
pxor mm0, mm0
pxor mm7, mm7 ; zero
pxor mm0, mm0 ; mm0 holds the sum
%endmacro
SAD_INC_2x4P
SAD_INC_2x4P
movd eax, mm0
%macro SSD_END 0
movq mm1, mm0
psrlq mm1, 32
paddd mm0, mm1
movd eax, mm0
pop ebx
ret
%endmacro
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_ssd_16x16_mmxext (uint8_t *, int, uint8_t *, int )
;-----------------------------------------------------------------------------
x264_pixel_ssd_16x16_mmxext:
SSD_START
SSD_INC_8x16P
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_16x8_mmxext:
SSD_START
SSD_INC_8x16P
SSD_END
ALIGN 16
x264_pixel_ssd_8x16_mmxext:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_8x8_mmxext:
SSD_START
SSD_INC_4x8P
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_8x4_mmxext:
SSD_START
SSD_INC_4x8P
SSD_END
ALIGN 16
x264_pixel_ssd_4x8_mmxext:
SSD_START
SSD_INC_4x4P
SSD_INC_4x4P
SSD_END
ALIGN 16
x264_pixel_ssd_4x4_mmxext:
SSD_START
SSD_INC_4x4P
SSD_END
ALIGN 16
;-----------------------------------------------------------------------------
......@@ -402,7 +511,6 @@ x264_pixel_satd_4x4_mmxext:
pxor mm7, mm7
LOAD_DIFF_4P mm0, mm6, mm7, [eax], [ecx]
LOAD_DIFF_4P mm1, mm6, mm7, [eax+ebx], [ecx+edx]
LOAD_DIFF_4P mm2, mm6, mm7, [eax+2*ebx], [ecx+2*edx]
......@@ -416,8 +524,6 @@ x264_pixel_satd_4x4_mmxext:
pop ebx
ret
ALIGN 16
;-----------------------------------------------------------------------------
; int __cdecl x264_pixel_satd_4x8_mmxext (uint8_t *, int, uint8_t *, int )
......
......@@ -32,6 +32,14 @@ int x264_pixel_sad_8x4_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_sad_4x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_sad_4x4_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_16x16_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_16x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_8x16_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_8x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_8x4_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_4x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_ssd_4x4_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x16_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_16x8_mmxext( uint8_t *, int, uint8_t *, int );
int x264_pixel_satd_8x16_mmxext( uint8_t *, int, uint8_t *, int );
......
......@@ -72,6 +72,38 @@ PIXEL_SAD_C( pixel_sad_8x4, 8, 4 )
PIXEL_SAD_C( pixel_sad_4x8, 4, 8 )
PIXEL_SAD_C( pixel_sad_4x4, 4, 4 )
/****************************************************************************
* pixel_ssd_WxH
****************************************************************************/
#define PIXEL_SSD_C( name, lx, ly ) \
static int name( uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2 ) \
{ \
int i_sum = 0; \
int x, y; \
for( y = 0; y < ly; y++ ) \
{ \
for( x = 0; x < lx; x++ ) \
{ \
int d = pix1[x] - pix2[x]; \
i_sum += d*d; \
} \
pix1 += i_stride_pix1; \
pix2 += i_stride_pix2; \
} \
return i_sum; \
}
PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 )
PIXEL_SSD_C( pixel_ssd_16x8, 16, 8 )
PIXEL_SSD_C( pixel_ssd_8x16, 8, 16 )
PIXEL_SSD_C( pixel_ssd_8x8, 8, 8 )
PIXEL_SSD_C( pixel_ssd_8x4, 8, 4 )
PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 )
PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 )
static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int y, x;
......@@ -243,6 +275,14 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sad[PIXEL_4x8] = pixel_sad_4x8;
pixf->sad[PIXEL_4x4] = pixel_sad_4x4;
pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16;
pixf->ssd[PIXEL_16x8] = pixel_ssd_16x8;
pixf->ssd[PIXEL_8x16] = pixel_ssd_8x16;
pixf->ssd[PIXEL_8x8] = pixel_ssd_8x8;
pixf->ssd[PIXEL_8x4] = pixel_ssd_8x4;
pixf->ssd[PIXEL_4x8] = pixel_ssd_4x8;
pixf->ssd[PIXEL_4x4] = pixel_ssd_4x4;
pixf->satd[PIXEL_16x16]= pixel_satd_16x16;
pixf->satd[PIXEL_16x8] = pixel_satd_16x8;
pixf->satd[PIXEL_8x16] = pixel_satd_8x16;
......@@ -284,6 +324,14 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sad[PIXEL_4x8 ] = x264_pixel_sad_4x8_mmxext;
pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_mmxext;
pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext;
pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_mmxext;
pixf->ssd[PIXEL_8x16] = x264_pixel_ssd_8x16_mmxext;
pixf->ssd[PIXEL_8x8] = x264_pixel_ssd_8x8_mmxext;
pixf->ssd[PIXEL_8x4] = x264_pixel_ssd_8x4_mmxext;
pixf->ssd[PIXEL_4x8] = x264_pixel_ssd_4x8_mmxext;
pixf->ssd[PIXEL_4x4] = x264_pixel_ssd_4x4_mmxext;
pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext;
pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext;
pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext;
......
......@@ -25,6 +25,7 @@
#define _PIXEL_H 1
typedef int (*x264_pixel_sad_t) ( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_ssd_t) ( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_satd_t)( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_weight_t) ( uint8_t *, int, uint8_t *, int, int );
......@@ -65,6 +66,7 @@ static const int x264_size2pixel[5][5] = {
typedef struct
{
x264_pixel_sad_t sad[7];
x264_pixel_ssd_t ssd[7];
x264_pixel_satd_t satd[7];
x264_pixel_avg_t avg[10];
x264_pixel_avg_weight_t avg_weight[10];
......
......@@ -45,6 +45,7 @@
#endif
#ifdef _MSC_VER
#define exp2f(x) pow( 2, (x) )
#define sqrtf sqrt
#endif
#ifdef WIN32 // POSIX says that rename() removes the destination, but win32 doesn't.
#define rename(src,dst) (unlink(dst), rename(src,dst))
......
......@@ -770,6 +770,7 @@ static int Encode( x264_param_t *param, hnd_t hin, FILE *fout )
/* Do not force any parameters */
pic.i_type = X264_TYPE_AUTO;
pic.i_qpplus1 = 0;
if( x264_encoder_encode( h, &nal, &i_nal, &pic, &pic ) < 0 )
{
fprintf( stderr, "x264_encoder_encode failed\n" );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment