Commit 7760f1b2 authored by Loren Merritt's avatar Loren Merritt

SSIM computation. (default on, disable by --no-ssim)


git-svn-id: svn://svn.videolan.org/x264/trunk@554 df754926-b1dd-0310-bc7b-ec298dee348c
parent 127e2fbf
......@@ -30,9 +30,12 @@ BITS 64
SECTION .rodata align=16
pd_0000ffff: times 4 dd 0x0000ffff
pb_1: times 16 db 1
pb_1: times 16 db 1
pw_1: times 8 dw 1
ssim_c1: times 4 dd 416 ; .01*.01*255*255*64
ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63
mask_ff: times 16 db 0xff
times 16 db 0
SECTION .text
......@@ -49,6 +52,20 @@ cglobal x264_pixel_satd_16x16_sse2
cglobal x264_pixel_sa8d_8x8_sse2
cglobal x264_pixel_sa8d_16x16_sse2
cglobal x264_intra_sa8d_x3_8x8_core_sse2
cglobal x264_pixel_ssim_4x4x2_core_sse2
cglobal x264_pixel_ssim_end4_sse2
%macro HADDD 2 ; sum junk
movhlps %2, %1
paddd %1, %2
pshuflw %2, %1, 0xE
paddd %1, %2
%endmacro
%macro HADDW 2
pmaddwd %1, [pw_1 GLOBAL]
HADDD %1, %2
%endmacro
%macro SAD_INC_4x16P_SSE2 0
movdqu xmm1, [rdx]
......@@ -217,15 +234,8 @@ x264_pixel_sad_16x8_sse2:
%endmacro
%macro SSD_END_SSE2 0
movdqa xmm1, xmm0
psrldq xmm1, 8
paddd xmm0, xmm1
movdqa xmm1, xmm0
psrldq xmm1, 4
paddd xmm0, xmm1
movd eax, xmm0
HADDD xmm0, xmm1
movd eax, xmm0
ret
%endmacro
......@@ -399,20 +409,6 @@ x264_pixel_ssd_16x8_sse2:
paddusw %7, %4
%endmacro
%macro SUM_MM_SSE2 2 ; sum junk
movdqa %2, %1
psrldq %1, 2
paddusw %1, %2
pand %1, [pd_0000ffff GLOBAL]
movdqa %2, %1
psrldq %1, 4
paddd %1, %2
movdqa %2, %1
psrldq %1, 8
paddd %1, %2
movd eax,%1
%endmacro
%macro SATD_TWO_SSE2 0
LOAD4x8_DIFF_SSE2
HADAMARD4x4_TWO_SSE2 xmm0, xmm1, xmm2, xmm4, xmm5, xmm3
......@@ -430,8 +426,9 @@ x264_pixel_ssd_16x8_sse2:
%endmacro
%macro SATD_END 0
psrlw xmm6, 1
SUM_MM_SSE2 xmm6, xmm7
psrlw xmm6, 1
HADDW xmm6, xmm7
movd eax, xmm6
ret
%endmacro
......@@ -531,6 +528,13 @@ x264_pixel_satd_8x4_sse2:
punpckh%2 %5, %4
%endmacro
%macro TRANSPOSE4x4D 5 ; abcd-t -> adtc
SBUTTERFLY dqa, dq, %1, %2, %5
SBUTTERFLY dqa, dq, %3, %4, %2
SBUTTERFLY dqa, qdq, %1, %3, %4
SBUTTERFLY dqa, qdq, %5, %2, %3
%endmacro
;-----------------------------------------------------------------------------
; input ABCDEFGH output AFHDTECB
;-----------------------------------------------------------------------------
......@@ -593,7 +597,8 @@ x264_pixel_sa8d_8x8_sse2:
SUM4x4_TWO_SSE2 xmm0, xmm1, xmm6, xmm2, xmm3, xmm9, xmm10
SUM4x4_TWO_SSE2 xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10
psrlw xmm10, 1
SUM_MM_SSE2 xmm10, xmm0
HADDW xmm10, xmm0
movd eax, xmm10
add r8d, eax ; preserve rounding for 16x16
add eax, 1
shr eax, 1
......@@ -695,17 +700,128 @@ x264_intra_sa8d_x3_8x8_core_sse2:
psubw xmm0, xmm1 ; 8x1 sum
SUM1x8_SSE2 xmm0, xmm1, xmm2
SUM_MM_SSE2 xmm14, xmm3
HADDW xmm14, xmm3
movd eax, xmm14
add eax, 2
shr eax, 2
mov [parm3q+4], eax ; i8x8_h sa8d
SUM_MM_SSE2 xmm15, xmm4
HADDW xmm15, xmm4
movd eax, xmm15
add eax, 2
shr eax, 2
mov [parm3q+8], eax ; i8x8_dc sa8d
SUM_MM_SSE2 xmm2, xmm5
HADDW xmm2, xmm5
movd eax, xmm2
add eax, 2
shr eax, 2
mov [parm3q+0], eax ; i8x8_v sa8d
ret
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_sse2( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN 16
x264_pixel_ssim_4x4x2_core_sse2:
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
movdqa xmm8, [pw_1 GLOBAL]
%rep 4
movq xmm5, [parm1q]
movq xmm6, [parm3q]
punpcklbw xmm5, xmm0
punpcklbw xmm6, xmm0
paddw xmm1, xmm5
paddw xmm2, xmm6
movdqa xmm7, xmm5
pmaddwd xmm5, xmm5
pmaddwd xmm7, xmm6
pmaddwd xmm6, xmm6
paddd xmm3, xmm5
paddd xmm4, xmm7
paddd xmm3, xmm6
add parm1q, parm2q
add parm3q, parm4q
%endrep
; PHADDW xmm1, xmm2
; PHADDD xmm3, xmm4
pshufd xmm5, xmm3, 0xB1
pmaddwd xmm1, xmm8
pmaddwd xmm2, xmm8
pshufd xmm6, xmm4, 0xB1
packssdw xmm1, xmm2
paddd xmm3, xmm5
pmaddwd xmm1, xmm8
paddd xmm4, xmm6
pshufd xmm1, xmm1, 0xD8
movdqa xmm5, xmm3
punpckldq xmm3, xmm4
punpckhdq xmm5, xmm4
movq [parm5q+ 0], xmm1
movq [parm5q+ 8], xmm3
psrldq xmm1, 8
movq [parm5q+16], xmm1
movq [parm5q+24], xmm5
ret
;-----------------------------------------------------------------------------
; float x264_pixel_ssim_end_sse2( int sum0[5][4], int sum1[5][4], int width )
;-----------------------------------------------------------------------------
ALIGN 16
x264_pixel_ssim_end4_sse2:
movdqa xmm0, [parm1q+ 0]
movdqa xmm1, [parm1q+16]
movdqa xmm2, [parm1q+32]
movdqa xmm3, [parm1q+48]
movdqa xmm4, [parm1q+64]
paddd xmm0, [parm2q+ 0]
paddd xmm1, [parm2q+16]
paddd xmm2, [parm2q+32]
paddd xmm3, [parm2q+48]
paddd xmm4, [parm2q+64]
paddd xmm0, xmm1
paddd xmm1, xmm2
paddd xmm2, xmm3
paddd xmm3, xmm4
movdqa xmm5, [ssim_c1 GLOBAL]
movdqa xmm6, [ssim_c2 GLOBAL]
TRANSPOSE4x4D xmm0, xmm1, xmm2, xmm3, xmm4
; s1=mm0, s2=mm3, ss=mm4, s12=mm2
movdqa xmm1, xmm3
pslld xmm3, 16
pmaddwd xmm1, xmm0 ; s1*s2
por xmm0, xmm3
pmaddwd xmm0, xmm0 ; s1*s1 + s2*s2
pslld xmm1, 1
pslld xmm2, 7
pslld xmm4, 6
psubd xmm2, xmm1 ; covar*2
psubd xmm4, xmm0 ; vars
paddd xmm0, xmm5
paddd xmm1, xmm5
paddd xmm2, xmm6
paddd xmm4, xmm6
cvtdq2ps xmm0, xmm0 ; (float)(s1*s1 + s2*s2 + ssim_c1)
cvtdq2ps xmm1, xmm1 ; (float)(s1*s2*2 + ssim_c1)
cvtdq2ps xmm2, xmm2 ; (float)(covar*2 + ssim_c2)
cvtdq2ps xmm4, xmm4 ; (float)(vars + ssim_c2)
mulps xmm1, xmm2
mulps xmm0, xmm4
divps xmm1, xmm0 ; ssim
neg parm3d
movdqu xmm3, [mask_ff + parm3d*4 + 16 GLOBAL]
pand xmm1, xmm3
movhlps xmm0, xmm1
addps xmm0, xmm1
pshuflw xmm1, xmm0, 0xE
addss xmm0, xmm1
ret
......@@ -123,6 +123,7 @@ void x264_param_default( x264_param_t *param )
param->analyse.b_fast_pskip = 1;
param->analyse.b_dct_decimate = 1;
param->analyse.b_psnr = 1;
param->analyse.b_ssim = 1;
param->i_cqm_preset = X264_CQM_FLAT;
memset( param->cqm_4iy, 16, 16 );
......@@ -460,6 +461,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->rc.psz_zones = strdup(value);
OPT("psnr")
p->analyse.b_psnr = atobool(value);
OPT("ssim")
p->analyse.b_ssim = atobool(value);
OPT("aud")
p->b_aud = atobool(value);
OPT("sps-id")
......
......@@ -546,6 +546,7 @@ struct x264_t
float f_psnr_mean_y[5];
float f_psnr_mean_u[5];
float f_psnr_mean_v[5];
float f_ssim_mean_y[5];
/* */
int64_t i_mb_count[5][19];
int64_t i_mb_count_8x8dct[2];
......
......@@ -490,6 +490,7 @@ cglobal x264_intra_satd_x3_8x8c_mmxext
cglobal x264_intra_satd_x3_16x16_mmxext
cglobal x264_intra_sa8d_x3_8x8_core_mmxext
cglobal x264_pixel_ssim_4x4x2_core_mmxext
%macro SAD_START 0
push ebx
......@@ -1571,3 +1572,66 @@ x264_intra_sa8d_x3_8x8_core_mmxext:
%undef trans
%undef sum
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_mmxext( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN 16
x264_pixel_ssim_4x4x2_core_mmxext:
push ebx
push edi
mov ebx, [esp+16]
mov edx, [esp+24]
mov edi, 4
pxor mm0, mm0
.loop
mov eax, [esp+12]
mov ecx, [esp+20]
add eax, edi
add ecx, edi
pxor mm1, mm1
pxor mm2, mm2
pxor mm3, mm3
pxor mm4, mm4
%rep 4
movd mm5, [eax]
movd mm6, [ecx]
punpcklbw mm5, mm0
punpcklbw mm6, mm0
paddw mm1, mm5
paddw mm2, mm6
movq mm7, mm5
pmaddwd mm5, mm5
pmaddwd mm7, mm6
pmaddwd mm6, mm6
paddd mm3, mm5
paddd mm4, mm7
paddd mm3, mm6
add eax, ebx
add ecx, edx
%endrep
mov eax, [esp+28]
lea eax, [eax+edi*4]
pshufw mm5, mm1, 0xE
pshufw mm6, mm2, 0xE
paddusw mm1, mm5
paddusw mm2, mm6
punpcklwd mm1, mm2
pshufw mm2, mm1, 0xE
pshufw mm5, mm3, 0xE
pshufw mm6, mm4, 0xE
paddusw mm1, mm2
paddd mm3, mm5
paddd mm4, mm6
punpcklwd mm1, mm0
punpckldq mm3, mm4
movq [eax+0], mm1
movq [eax+8], mm3
sub edi, 4
jge .loop
pop edi
pop ebx
emms
ret
......@@ -30,7 +30,11 @@ BITS 32
SECTION_RODATA
pd_0000ffff: times 4 dd 0x0000ffff
pw_1: times 8 dw 1
ssim_c1: times 4 dd 416 ; .01*.01*255*255*64
ssim_c2: times 4 dd 235963 ; .03*.03*255*255*64*63
mask_ff: times 16 db 0xff
times 16 db 0
SECTION .text
......@@ -49,6 +53,23 @@ cglobal x264_pixel_satd_8x8_sse2
cglobal x264_pixel_satd_16x8_sse2
cglobal x264_pixel_satd_8x16_sse2
cglobal x264_pixel_satd_16x16_sse2
cglobal x264_pixel_ssim_4x4x2_core_sse2
cglobal x264_pixel_ssim_end4_sse2
%macro SBUTTERFLY 5
mov%1 %5, %3
punpckl%2 %3, %4
punpckh%2 %5, %4
%endmacro
%macro TRANSPOSE4x4D 5 ; abcd-t -> adtc
SBUTTERFLY dqa, dq, %1, %2, %5
SBUTTERFLY dqa, dq, %3, %4, %2
SBUTTERFLY dqa, qdq, %1, %3, %4
SBUTTERFLY dqa, qdq, %5, %2, %3
%endmacro
%macro SAD_INC_4x16P_SSE2 0
movdqu xmm1, [ecx]
......@@ -548,22 +569,14 @@ x264_pixel_ssd_16x8_sse2:
paddusw %7, %4
%endmacro
%macro SUM_MM_SSE2 2 ; sum junk
%macro HADDW 2 ; sum junk
; ebx is no longer used at this point, so no push needed
picgetgot ebx
; each column sum of SATD is necessarily even, so we don't lose any precision by shifting first.
psrlw %1, 1
movdqa %2, %1
psrldq %1, 2
paddusw %1, %2
pand %1, [pd_0000ffff GOT_ebx]
movdqa %2, %1
psrldq %1, 4
pmaddwd %1, [pw_1 GOT_ebx]
movhlps %2, %1
paddd %1, %2
movdqa %2, %1
psrldq %1, 8
pshuflw %2, %1, 0xE
paddd %1, %2
movd eax,%1
%endmacro
%macro SATD_TWO_SSE2 0
......@@ -586,8 +599,10 @@ x264_pixel_ssd_16x8_sse2:
%endmacro
%macro SATD_END 0
SUM_MM_SSE2 xmm6, xmm7
; each column sum of SATD is necessarily even, so we don't lose any precision by shifting first.
psrlw xmm6, 1
HADDW xmm6, xmm7
movd eax, xmm6
pop ebx
ret
%endmacro
......@@ -673,3 +688,127 @@ x264_pixel_satd_8x4_sse2:
SATD_END
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_sse2( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN 16
x264_pixel_ssim_4x4x2_core_sse2:
push ebx
mov eax, [esp+ 8]
mov ebx, [esp+12]
mov ecx, [esp+16]
mov edx, [esp+20]
pxor xmm0, xmm0
pxor xmm1, xmm1
pxor xmm2, xmm2
pxor xmm3, xmm3
pxor xmm4, xmm4
%rep 4
movq xmm5, [eax]
movq xmm6, [ecx]
punpcklbw xmm5, xmm0
punpcklbw xmm6, xmm0
paddw xmm1, xmm5
paddw xmm2, xmm6
movdqa xmm7, xmm5
pmaddwd xmm5, xmm5
pmaddwd xmm7, xmm6
pmaddwd xmm6, xmm6
paddd xmm3, xmm5
paddd xmm4, xmm7
paddd xmm3, xmm6
add eax, ebx
add ecx, edx
%endrep
; PHADDW xmm1, xmm2
; PHADDD xmm3, xmm4
mov eax, [esp+24]
picgetgot ebx
movdqa xmm7, [pw_1 GOT_ebx]
pshufd xmm5, xmm3, 0xB1
pmaddwd xmm1, xmm7
pmaddwd xmm2, xmm7
pshufd xmm6, xmm4, 0xB1
packssdw xmm1, xmm2
paddd xmm3, xmm5
pmaddwd xmm1, xmm7
paddd xmm4, xmm6
pshufd xmm1, xmm1, 0xD8
movdqa xmm5, xmm3
punpckldq xmm3, xmm4
punpckhdq xmm5, xmm4
movq [eax+ 0], xmm1
movq [eax+ 8], xmm3
psrldq xmm1, 8
movq [eax+16], xmm1
movq [eax+24], xmm5
pop ebx
ret
;-----------------------------------------------------------------------------
; float x264_pixel_ssim_end_sse2( int sum0[5][4], int sum1[5][4], int width )
;-----------------------------------------------------------------------------
ALIGN 16
x264_pixel_ssim_end4_sse2:
mov eax, [esp+ 4]
mov ecx, [esp+ 8]
mov edx, [esp+12]
picpush ebx
picgetgot ebx
movdqa xmm0, [eax+ 0]
movdqa xmm1, [eax+16]
movdqa xmm2, [eax+32]
movdqa xmm3, [eax+48]
movdqa xmm4, [eax+64]
paddd xmm0, [ecx+ 0]
paddd xmm1, [ecx+16]
paddd xmm2, [ecx+32]
paddd xmm3, [ecx+48]
paddd xmm4, [ecx+64]
paddd xmm0, xmm1
paddd xmm1, xmm2
paddd xmm2, xmm3
paddd xmm3, xmm4
movdqa xmm5, [ssim_c1 GOT_ebx]
movdqa xmm6, [ssim_c2 GOT_ebx]
TRANSPOSE4x4D xmm0, xmm1, xmm2, xmm3, xmm4
; s1=mm0, s2=mm3, ss=mm4, s12=mm2
movdqa xmm1, xmm3
pslld xmm3, 16
pmaddwd xmm1, xmm0 ; s1*s2
por xmm0, xmm3
pmaddwd xmm0, xmm0 ; s1*s1 + s2*s2
pslld xmm1, 1
pslld xmm2, 7
pslld xmm4, 6
psubd xmm2, xmm1 ; covar*2
psubd xmm4, xmm0 ; vars
paddd xmm0, xmm5
paddd xmm1, xmm5
paddd xmm2, xmm6
paddd xmm4, xmm6
cvtdq2ps xmm0, xmm0 ; (float)(s1*s1 + s2*s2 + ssim_c1)
cvtdq2ps xmm1, xmm1 ; (float)(s1*s2*2 + ssim_c1)
cvtdq2ps xmm2, xmm2 ; (float)(covar*2 + ssim_c2)
cvtdq2ps xmm4, xmm4 ; (float)(vars + ssim_c2)
mulps xmm1, xmm2
mulps xmm0, xmm4
divps xmm1, xmm0 ; ssim
neg edx
movdqu xmm3, [mask_ff + edx*4 + 16 GOT_ebx]
pand xmm1, xmm3
movhlps xmm0, xmm1
addps xmm0, xmm1
pshuflw xmm1, xmm0, 0xE
addss xmm0, xmm1
movd [picesp+4], xmm0
fld dword [picesp+4]
picpop ebx
ret
......@@ -98,4 +98,10 @@ void x264_intra_sa8d_x3_8x8_mmxext( uint8_t *, uint8_t *, int * );
void x264_intra_sa8d_x3_8x8_core_sse2( uint8_t *, int16_t [2][8], int * );
void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
void x264_pixel_ssim_4x4x2_core_mmxext( const uint8_t *pix1, int stride1,
const uint8_t *pix2, int stride2, int sums[2][4] );
void x264_pixel_ssim_4x4x2_core_sse2( const uint8_t *pix1, int stride1,
const uint8_t *pix2, int stride2, int sums[2][4] );
float x264_pixel_ssim_end4_sse2( int sum0[5][4], int sum1[5][4], int width );
#endif
......@@ -322,6 +322,84 @@ SAD_X( 8x16_vis )
SAD_X( 8x8_vis )
#endif
static void ssim_4x4x2_core( const uint8_t *pix1, int stride1,
const uint8_t *pix2, int stride2,
int sums[2][4])
{
int x, y, z;
for(z=0; z<2; z++)
{
uint32_t s1=0, s2=0, ss=0, s12=0;
for(y=0; y<4; y++)
for(x=0; x<4; x++)
{
int a = pix1[x+y*stride1];
int b = pix2[x+y*stride2];
s1 += a;
s2 += b;
ss += a*a;
ss += b*b;
s12 += a*b;
}
sums[z][0] = s1;
sums[z][1] = s2;
sums[z][2] = ss;
sums[z][3] = s12;
pix1 += 4;
pix2 += 4;
}
}
static float ssim_end1( int s1, int s2, int ss, int s12 )
{
static const int ssim_c1 = (int)(.01*.01*255*255*64 + .5);
static const int ssim_c2 = (int)(.03*.03*255*255*64*63 + .5);
int vars = ss*64 - s1*s1 - s2*s2;
int covar = s12*64 - s1*s2;
return (float)(2*s1*s2 + ssim_c1) * (float)(2*covar + ssim_c2)\
/ ((float)(s1*s1 + s2*s2 + ssim_c1) * (float)(vars + ssim_c2));
}
static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
{
int i;
float ssim = 0.0;
for( i = 0; i < width; i++ )
ssim += ssim_end1( sum0[i][0] + sum0[i+1][0] + sum1[i][0] + sum1[i+1][0],
sum0[i][1] + sum0[i+1][1] + sum1[i][1] + sum1[i+1][1],
sum0[i][2] + sum0[i+1][2] + sum1[i][2] + sum1[i+1][2],
sum0[i][3] + sum0[i+1][3] + sum1[i][3] + sum1[i+1][3] );
return ssim;
}
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
uint8_t *pix1, int stride1,
uint8_t *pix2, int stride2,
int width, int height )
{
int x, y, z;
float ssim = 0.0;
int sums[2][width/4+3][4];
int (*sum0)[4] = sums[0];
int (*sum1)[4] = sums[1];
width >>= 2;
height >>= 2;
z = 0;
for( y = 1; y < height; y++ )
{
for( ; z <= y; z++ )
{
XCHG( void*, sum0, sum1 );
for( x = 0; x < width; x+=2 )
pf->ssim_4x4x2_core( &pix1[4*(x+z*stride1)], stride1, &pix2[4*(x+z*stride2)], stride2, &sum0[x] );
}
for( x = 0; x < width-1; x += 4 )
ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
}
return ssim / ((width-1) * (height-1));
}
/****************************************************************************
* x264_pixel_init:
****************************************************************************/
......@@ -348,6 +426,8 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sa8d[PIXEL_16x8] = x264_pixel_sa8d_16x8;
pixf->sa8d[PIXEL_8x16] = x264_pixel_sa8d_8x16;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8;
pixf->ssim_4x4x2_core = ssim_4x4x2_core;
pixf->ssim_end4 = ssim_end4;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMX )
......@@ -370,6 +450,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_mmxext;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_mmxext;
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_mmxext;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_mmxext;
#endif
pixf->intra_satd_x3_16x16 = x264_intra_satd_x3_16x16_mmxext;
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext;
......@@ -403,6 +484,8 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_sse2;
pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_sse2;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2;
pixf->ssim_end4 = x264_pixel_ssim_end4_sse2;
#ifdef ARCH_X86_64
pixf->sa8d[PIXEL_16x16] = x264_pixel_sa8d_16x16_sse2;
......
......@@ -67,8 +67,14 @@ typedef struct
x264_pixel_cmp_t sad[7];
x264_pixel_cmp_t ssd[7];
x264_pixel_cmp_t satd[7];
x264_pixel_cmp_t ssim[7];
x264_pixel_cmp_t sa8d[4];
x264_pixel_cmp_t mbcmp[7]; /* either satd or sad for subpel refine and mode decision */
x264_pixel_cmp_t rdcmp[7]; /* either ssd or ssim for rate-distortion */
void (*ssim_4x4x2_core)( const uint8_t *pix1, int stride1,
const uint8_t *pix2, int stride2, int sums[2][4] );
float (*ssim_end4)( int sum0[5][4], int sum1[5][4], int width );
/* partial distortion elimination:
* terminate early if partial score is worse than a threshold.
......@@ -89,5 +95,6 @@ typedef struct
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
int64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height );
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height );
#endif
......@@ -363,6 +363,7 @@ static int x264_validate_parameters( x264_t *h )
h->param.rc.f_pb_factor = 1;
h->param.analyse.b_transform_8x8 = 0;
h->param.analyse.b_psnr = 0;
h->param.analyse.b_ssim = 0;
h->param.analyse.i_chroma_qp_offset = 0;
h->param.analyse.i_trellis = 0;
h->param.analyse.b_fast_pskip = 0;
......@@ -447,6 +448,12 @@ static int x264_validate_parameters( x264_t *h )
h->param.i_sps_id &= 31;
if( h->param.i_log_level < X264_LOG_INFO )
{
h->param.analyse.b_psnr = 0;
h->param.analyse.b_ssim = 0;
}
/* ensure the booleans are 0 or 1 so they can be used in math */
#define BOOLIFY(x) h->param.x = !!h->param.x
BOOLIFY( b_cabac );
......@@ -462,6 +469,13 @@ static int x264_validate_parameters( x264_t *h )
return 0;
}
static void mbcmp_init( x264_t *h )
{
memcpy( h->pixf.mbcmp,
( h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1 ) ? h->pixf.sad : h->pixf.satd,
sizeof(h->pixf.mbcmp) );
}
/****************************************************************************
* x264_encoder_open:
****************************************************************************/
......@@ -603,9 +617,7 @@ x264_t *x264_encoder_open ( x264_param_t *param )
x264_quant_init( h, h->param.cpu, &h->quantf );
x264_deblock_init( h->param.cpu, &h->loopf );
memcpy( h->pixf.mbcmp,
( h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1 ) ? h->pixf.sad : h->pixf.satd,
sizeof(h->pixf.mbcmp) );
mbcmp_init( h );