Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Open sidebar
VideoLAN
x264
Commits
7760f1b2
Commit
7760f1b2
authored
Aug 17, 2006
by
Loren Merritt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
SSIM computation. (default on, disable by --no-ssim)
git-svn-id:
svn://svn.videolan.org/x264/trunk@554
df754926-b1dd-0310-bc7b-ec298dee348c
parent
127e2fbf
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
540 additions
and
73 deletions
+540
-73
common/amd64/pixel-sse2.asm
common/amd64/pixel-sse2.asm
+148
-32
common/common.c
common/common.c
+3
-0
common/common.h
common/common.h
+1
-0
common/i386/pixel-a.asm
common/i386/pixel-a.asm
+64
-0
common/i386/pixel-sse2.asm
common/i386/pixel-sse2.asm
+154
-15
common/i386/pixel.h
common/i386/pixel.h
+6
-0
common/pixel.c
common/pixel.c
+83
-0
common/pixel.h
common/pixel.h
+7
-0
encoder/encoder.c
encoder/encoder.c
+53
-23
tools/checkasm.c
tools/checkasm.c
+16
-0
x264.c
x264.c
+2
-1
x264.h
x264.h
+3
-2
No files found.
common/amd64/pixel-sse2.asm
View file @
7760f1b2
...
...
@@ -30,9 +30,12 @@ BITS 64
SECTION
.rodata
al
ign
=
16
pd_0000ffff:
times
4
dd
0x0000ffff
pb_1:
times
16
db
1
pb_1:
times
16
db
1
pw_1:
times
8
dw
1
ssim_c1:
times
4
dd
416
; .01*.01*255*255*64
ssim_c2:
times
4
dd
235963
; .03*.03*255*255*64*63
mask_ff:
times
16
db
0xff
times
16
db
0
SECTION
.text
...
...
@@ -49,6 +52,20 @@ cglobal x264_pixel_satd_16x16_sse2
cglobal
x264_pixel_sa8d_8x8_sse2
cglobal
x264_pixel_sa8d_16x16_sse2
cglobal
x264_intra_sa8d_x3_8x8_core_sse2
cglobal
x264_pixel_ssim_4x4x2_core_sse2
cglobal
x264_pixel_ssim_end4_sse2
%macro HADDD 2
; sum junk
movhlps
%
2
,
%
1
paddd
%
1
,
%
2
pshuflw
%
2
,
%
1
,
0xE
paddd
%
1
,
%
2
%endmacro
%macro HADDW 2
pmaddwd
%
1
,
[
pw_1
GLOBAL
]
HADDD
%
1
,
%
2
%endmacro
%macro SAD_INC_4x16P_SSE2 0
movdqu
xmm1
,
[
rdx
]
...
...
@@ -217,15 +234,8 @@ x264_pixel_sad_16x8_sse2:
%endmacro
%macro SSD_END_SSE2 0
movdqa
xmm1
,
xmm0
psrldq
xmm1
,
8
paddd
xmm0
,
xmm1
movdqa
xmm1
,
xmm0
psrldq
xmm1
,
4
paddd
xmm0
,
xmm1
movd
eax
,
xmm0
HADDD
xmm0
,
xmm1
movd
eax
,
xmm0
ret
%endmacro
...
...
@@ -399,20 +409,6 @@ x264_pixel_ssd_16x8_sse2:
paddusw
%
7
,
%
4
%endmacro
%macro SUM_MM_SSE2 2
; sum junk
movdqa
%
2
,
%
1
psrldq
%
1
,
2
paddusw
%
1
,
%
2
pand
%
1
,
[
pd_0000ffff
GLOBAL
]
movdqa
%
2
,
%
1
psrldq
%
1
,
4
paddd
%
1
,
%
2
movdqa
%
2
,
%
1
psrldq
%
1
,
8
paddd
%
1
,
%
2
movd
eax
,
%
1
%endmacro
%macro SATD_TWO_SSE2 0
LOAD4x8_DIFF_SSE2
HADAMARD4x4_TWO_SSE2
xmm0
,
xmm1
,
xmm2
,
xmm4
,
xmm5
,
xmm3
...
...
@@ -430,8 +426,9 @@ x264_pixel_ssd_16x8_sse2:
%endmacro
%macro SATD_END 0
psrlw
xmm6
,
1
SUM_MM_SSE2
xmm6
,
xmm7
psrlw
xmm6
,
1
HADDW
xmm6
,
xmm7
movd
eax
,
xmm6
ret
%endmacro
...
...
@@ -531,6 +528,13 @@ x264_pixel_satd_8x4_sse2:
punpckh
%
2
%
5
,
%
4
%endmacro
%macro TRANSPOSE4x4D 5
; abcd-t -> adtc
SBUTTERFLY
dqa
,
dq
,
%
1
,
%
2
,
%
5
SBUTTERFLY
dqa
,
dq
,
%
3
,
%
4
,
%
2
SBUTTERFLY
dqa
,
qdq
,
%
1
,
%
3
,
%
4
SBUTTERFLY
dqa
,
qdq
,
%
5
,
%
2
,
%
3
%endmacro
;-----------------------------------------------------------------------------
; input ABCDEFGH output AFHDTECB
;-----------------------------------------------------------------------------
...
...
@@ -593,7 +597,8 @@ x264_pixel_sa8d_8x8_sse2:
SUM4x4_TWO_SSE2
xmm0
,
xmm1
,
xmm6
,
xmm2
,
xmm3
,
xmm9
,
xmm10
SUM4x4_TWO_SSE2
xmm4
,
xmm5
,
xmm6
,
xmm7
,
xmm8
,
xmm9
,
xmm10
psrlw
xmm10
,
1
SUM_MM_SSE2
xmm10
,
xmm0
HADDW
xmm10
,
xmm0
movd
eax
,
xmm10
add
r8d
,
eax
; preserve rounding for 16x16
add
eax
,
1
shr
eax
,
1
...
...
@@ -695,17 +700,128 @@ x264_intra_sa8d_x3_8x8_core_sse2:
psubw
xmm0
,
xmm1
; 8x1 sum
SUM1x8_SSE2
xmm0
,
xmm1
,
xmm2
SUM_MM_SSE2
xmm14
,
xmm3
HADDW
xmm14
,
xmm3
movd
eax
,
xmm14
add
eax
,
2
shr
eax
,
2
mov
[
parm3q
+
4
],
eax
; i8x8_h sa8d
SUM_MM_SSE2
xmm15
,
xmm4
HADDW
xmm15
,
xmm4
movd
eax
,
xmm15
add
eax
,
2
shr
eax
,
2
mov
[
parm3q
+
8
],
eax
; i8x8_dc sa8d
SUM_MM_SSE2
xmm2
,
xmm5
HADDW
xmm2
,
xmm5
movd
eax
,
xmm2
add
eax
,
2
shr
eax
,
2
mov
[
parm3q
+
0
],
eax
; i8x8_v sa8d
ret
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_sse2( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN
16
x264_pixel_ssim_4x4x2_core_sse2:
pxor
xmm0
,
xmm0
pxor
xmm1
,
xmm1
pxor
xmm2
,
xmm2
pxor
xmm3
,
xmm3
pxor
xmm4
,
xmm4
movdqa
xmm8
,
[
pw_1
GLOBAL
]
%rep 4
movq
xmm5
,
[
parm1q
]
movq
xmm6
,
[
parm3q
]
punpcklbw
xmm5
,
xmm0
punpcklbw
xmm6
,
xmm0
paddw
xmm1
,
xmm5
paddw
xmm2
,
xmm6
movdqa
xmm7
,
xmm5
pmaddwd
xmm5
,
xmm5
pmaddwd
xmm7
,
xmm6
pmaddwd
xmm6
,
xmm6
paddd
xmm3
,
xmm5
paddd
xmm4
,
xmm7
paddd
xmm3
,
xmm6
add
parm1q
,
parm2q
add
parm3q
,
parm4q
%endrep
; PHADDW xmm1, xmm2
; PHADDD xmm3, xmm4
pshufd
xmm5
,
xmm3
,
0xB1
pmaddwd
xmm1
,
xmm8
pmaddwd
xmm2
,
xmm8
pshufd
xmm6
,
xmm4
,
0xB1
packssdw
xmm1
,
xmm2
paddd
xmm3
,
xmm5
pmaddwd
xmm1
,
xmm8
paddd
xmm4
,
xmm6
pshufd
xmm1
,
xmm1
,
0xD8
movdqa
xmm5
,
xmm3
punpckldq
xmm3
,
xmm4
punpckhdq
xmm5
,
xmm4
movq
[
parm5q
+
0
],
xmm1
movq
[
parm5q
+
8
],
xmm3
psrldq
xmm1
,
8
movq
[
parm5q
+
16
],
xmm1
movq
[
parm5q
+
24
],
xmm5
ret
;-----------------------------------------------------------------------------
; float x264_pixel_ssim_end_sse2( int sum0[5][4], int sum1[5][4], int width )
;-----------------------------------------------------------------------------
ALIGN
16
x264_pixel_ssim_end4_sse2:
movdqa
xmm0
,
[
parm1q
+
0
]
movdqa
xmm1
,
[
parm1q
+
16
]
movdqa
xmm2
,
[
parm1q
+
32
]
movdqa
xmm3
,
[
parm1q
+
48
]
movdqa
xmm4
,
[
parm1q
+
64
]
paddd
xmm0
,
[
parm2q
+
0
]
paddd
xmm1
,
[
parm2q
+
16
]
paddd
xmm2
,
[
parm2q
+
32
]
paddd
xmm3
,
[
parm2q
+
48
]
paddd
xmm4
,
[
parm2q
+
64
]
paddd
xmm0
,
xmm1
paddd
xmm1
,
xmm2
paddd
xmm2
,
xmm3
paddd
xmm3
,
xmm4
movdqa
xmm5
,
[
ss
im_c1
GLOBAL
]
movdqa
xmm6
,
[
ss
im_c2
GLOBAL
]
TRANSPOSE4x4D
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
; s1=mm0, s2=mm3, ss=mm4, s12=mm2
movdqa
xmm1
,
xmm3
pslld
xmm3
,
16
pmaddwd
xmm1
,
xmm0
; s1*s2
por
xmm0
,
xmm3
pmaddwd
xmm0
,
xmm0
; s1*s1 + s2*s2
pslld
xmm1
,
1
pslld
xmm2
,
7
pslld
xmm4
,
6
psubd
xmm2
,
xmm1
; covar*2
psubd
xmm4
,
xmm0
; vars
paddd
xmm0
,
xmm5
paddd
xmm1
,
xmm5
paddd
xmm2
,
xmm6
paddd
xmm4
,
xmm6
cvtdq2ps
xmm0
,
xmm0
; (float)(s1*s1 + s2*s2 + ssim_c1)
cvtdq2ps
xmm1
,
xmm1
; (float)(s1*s2*2 + ssim_c1)
cvtdq2ps
xmm2
,
xmm2
; (float)(covar*2 + ssim_c2)
cvtdq2ps
xmm4
,
xmm4
; (float)(vars + ssim_c2)
mulps
xmm1
,
xmm2
mulps
xmm0
,
xmm4
divps
xmm1
,
xmm0
; ssim
neg
parm3d
movdqu
xmm3
,
[
mask_ff
+
parm3d
*
4
+
16
GLOBAL
]
pand
xmm1
,
xmm3
movhlps
xmm0
,
xmm1
addps
xmm0
,
xmm1
pshuflw
xmm1
,
xmm0
,
0xE
addss
xmm0
,
xmm1
ret
common/common.c
View file @
7760f1b2
...
...
@@ -123,6 +123,7 @@ void x264_param_default( x264_param_t *param )
param
->
analyse
.
b_fast_pskip
=
1
;
param
->
analyse
.
b_dct_decimate
=
1
;
param
->
analyse
.
b_psnr
=
1
;
param
->
analyse
.
b_ssim
=
1
;
param
->
i_cqm_preset
=
X264_CQM_FLAT
;
memset
(
param
->
cqm_4iy
,
16
,
16
);
...
...
@@ -460,6 +461,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p
->
rc
.
psz_zones
=
strdup
(
value
);
OPT
(
"psnr"
)
p
->
analyse
.
b_psnr
=
atobool
(
value
);
OPT
(
"ssim"
)
p
->
analyse
.
b_ssim
=
atobool
(
value
);
OPT
(
"aud"
)
p
->
b_aud
=
atobool
(
value
);
OPT
(
"sps-id"
)
...
...
common/common.h
View file @
7760f1b2
...
...
@@ -546,6 +546,7 @@ struct x264_t
float
f_psnr_mean_y
[
5
];
float
f_psnr_mean_u
[
5
];
float
f_psnr_mean_v
[
5
];
float
f_ssim_mean_y
[
5
];
/* */
int64_t
i_mb_count
[
5
][
19
];
int64_t
i_mb_count_8x8dct
[
2
];
...
...
common/i386/pixel-a.asm
View file @
7760f1b2
...
...
@@ -490,6 +490,7 @@ cglobal x264_intra_satd_x3_8x8c_mmxext
cglobal
x264_intra_satd_x3_16x16_mmxext
cglobal
x264_intra_sa8d_x3_8x8_core_mmxext
cglobal
x264_pixel_ssim_4x4x2_core_mmxext
%macro SAD_START 0
push
ebx
...
...
@@ -1571,3 +1572,66 @@ x264_intra_sa8d_x3_8x8_core_mmxext:
%undef trans
%undef sum
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_mmxext( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN
16
x264_pixel_ssim_4x4x2_core_mmxext:
push
ebx
push
edi
mov
ebx
,
[
esp
+
16
]
mov
edx
,
[
esp
+
24
]
mov
edi
,
4
pxor
mm0
,
mm0
.loop
mov
eax
,
[
esp
+
12
]
mov
ecx
,
[
esp
+
20
]
add
eax
,
edi
add
ecx
,
edi
pxor
mm1
,
mm1
pxor
mm2
,
mm2
pxor
mm3
,
mm3
pxor
mm4
,
mm4
%rep 4
movd
mm5
,
[
eax
]
movd
mm6
,
[
ecx
]
punpcklbw
mm5
,
mm0
punpcklbw
mm6
,
mm0
paddw
mm1
,
mm5
paddw
mm2
,
mm6
movq
mm7
,
mm5
pmaddwd
mm5
,
mm5
pmaddwd
mm7
,
mm6
pmaddwd
mm6
,
mm6
paddd
mm3
,
mm5
paddd
mm4
,
mm7
paddd
mm3
,
mm6
add
eax
,
ebx
add
ecx
,
edx
%endrep
mov
eax
,
[
esp
+
28
]
lea
eax
,
[
eax
+
edi
*
4
]
pshufw
mm5
,
mm1
,
0xE
pshufw
mm6
,
mm2
,
0xE
paddusw
mm1
,
mm5
paddusw
mm2
,
mm6
punpcklwd
mm1
,
mm2
pshufw
mm2
,
mm1
,
0xE
pshufw
mm5
,
mm3
,
0xE
pshufw
mm6
,
mm4
,
0xE
paddusw
mm1
,
mm2
paddd
mm3
,
mm5
paddd
mm4
,
mm6
punpcklwd
mm1
,
mm0
punpckldq
mm3
,
mm4
movq
[
eax
+
0
],
mm1
movq
[
eax
+
8
],
mm3
sub
edi
,
4
jge
.loop
pop
edi
pop
ebx
emms
ret
common/i386/pixel-sse2.asm
View file @
7760f1b2
...
...
@@ -30,7 +30,11 @@ BITS 32
SECTION
_RODATA
pd_0000ffff:
times
4
dd
0x0000ffff
pw_1:
times
8
dw
1
ssim_c1:
times
4
dd
416
; .01*.01*255*255*64
ssim_c2:
times
4
dd
235963
; .03*.03*255*255*64*63
mask_ff:
times
16
db
0xff
times
16
db
0
SECTION
.text
...
...
@@ -49,6 +53,23 @@ cglobal x264_pixel_satd_8x8_sse2
cglobal
x264_pixel_satd_16x8_sse2
cglobal
x264_pixel_satd_8x16_sse2
cglobal
x264_pixel_satd_16x16_sse2
cglobal
x264_pixel_ssim_4x4x2_core_sse2
cglobal
x264_pixel_ssim_end4_sse2
%macro SBUTTERFLY 5
mov
%
1
%
5
,
%
3
punpckl
%
2
%
3
,
%
4
punpckh
%
2
%
5
,
%
4
%endmacro
%macro TRANSPOSE4x4D 5
; abcd-t -> adtc
SBUTTERFLY
dqa
,
dq
,
%
1
,
%
2
,
%
5
SBUTTERFLY
dqa
,
dq
,
%
3
,
%
4
,
%
2
SBUTTERFLY
dqa
,
qdq
,
%
1
,
%
3
,
%
4
SBUTTERFLY
dqa
,
qdq
,
%
5
,
%
2
,
%
3
%endmacro
%macro SAD_INC_4x16P_SSE2 0
movdqu
xmm1
,
[
ecx
]
...
...
@@ -548,22 +569,14 @@ x264_pixel_ssd_16x8_sse2:
paddusw
%
7
,
%
4
%endmacro
%macro
SUM_MM_SSE2
2
; sum junk
%macro
HADDW
2
; sum junk
; ebx is no longer used at this point, so no push needed
picgetgot
ebx
; each column sum of SATD is necessarily even, so we don't lose any precision by shifting first.
psrlw
%
1
,
1
movdqa
%
2
,
%
1
psrldq
%
1
,
2
paddusw
%
1
,
%
2
pand
%
1
,
[
pd_0000ffff
GOT_ebx
]
movdqa
%
2
,
%
1
psrldq
%
1
,
4
pmaddwd
%
1
,
[
pw_1
GOT_ebx
]
movhlps
%
2
,
%
1
paddd
%
1
,
%
2
movdqa
%
2
,
%
1
psrldq
%
1
,
8
pshuflw
%
2
,
%
1
,
0xE
paddd
%
1
,
%
2
movd
eax
,
%
1
%endmacro
%macro SATD_TWO_SSE2 0
...
...
@@ -586,8 +599,10 @@ x264_pixel_ssd_16x8_sse2:
%endmacro
%macro SATD_END 0
SUM_MM_SSE2
xmm6
,
xmm7
; each column sum of SATD is necessarily even, so we don't lose any precision by shifting first.
psrlw
xmm6
,
1
HADDW
xmm6
,
xmm7
movd
eax
,
xmm6
pop
ebx
ret
%endmacro
...
...
@@ -673,3 +688,127 @@ x264_pixel_satd_8x4_sse2:
SATD_END
;-----------------------------------------------------------------------------
; void x264_pixel_ssim_4x4x2_core_sse2( const uint8_t *pix1, int stride1,
; const uint8_t *pix2, int stride2, int sums[2][4] )
;-----------------------------------------------------------------------------
ALIGN
16
x264_pixel_ssim_4x4x2_core_sse2:
push
ebx
mov
eax
,
[
esp
+
8
]
mov
ebx
,
[
esp
+
12
]
mov
ecx
,
[
esp
+
16
]
mov
edx
,
[
esp
+
20
]
pxor
xmm0
,
xmm0
pxor
xmm1
,
xmm1
pxor
xmm2
,
xmm2
pxor
xmm3
,
xmm3
pxor
xmm4
,
xmm4
%rep 4
movq
xmm5
,
[
eax
]
movq
xmm6
,
[
ecx
]
punpcklbw
xmm5
,
xmm0
punpcklbw
xmm6
,
xmm0
paddw
xmm1
,
xmm5
paddw
xmm2
,
xmm6
movdqa
xmm7
,
xmm5
pmaddwd
xmm5
,
xmm5
pmaddwd
xmm7
,
xmm6
pmaddwd
xmm6
,
xmm6
paddd
xmm3
,
xmm5
paddd
xmm4
,
xmm7
paddd
xmm3
,
xmm6
add
eax
,
ebx
add
ecx
,
edx
%endrep
; PHADDW xmm1, xmm2
; PHADDD xmm3, xmm4
mov
eax
,
[
esp
+
24
]
picgetgot
ebx
movdqa
xmm7
,
[
pw_1
GOT_ebx
]
pshufd
xmm5
,
xmm3
,
0xB1
pmaddwd
xmm1
,
xmm7
pmaddwd
xmm2
,
xmm7
pshufd
xmm6
,
xmm4
,
0xB1
packssdw
xmm1
,
xmm2
paddd
xmm3
,
xmm5
pmaddwd
xmm1
,
xmm7
paddd
xmm4
,
xmm6
pshufd
xmm1
,
xmm1
,
0xD8
movdqa
xmm5
,
xmm3
punpckldq
xmm3
,
xmm4
punpckhdq
xmm5
,
xmm4
movq
[
eax
+
0
],
xmm1
movq
[
eax
+
8
],
xmm3
psrldq
xmm1
,
8
movq
[
eax
+
16
],
xmm1
movq
[
eax
+
24
],
xmm5
pop
ebx
ret
;-----------------------------------------------------------------------------
; float x264_pixel_ssim_end_sse2( int sum0[5][4], int sum1[5][4], int width )
;-----------------------------------------------------------------------------
ALIGN
16
x264_pixel_ssim_end4_sse2:
mov
eax
,
[
esp
+
4
]
mov
ecx
,
[
esp
+
8
]
mov
edx
,
[
esp
+
12
]
picpush
ebx
picgetgot
ebx
movdqa
xmm0
,
[
eax
+
0
]
movdqa
xmm1
,
[
eax
+
16
]
movdqa
xmm2
,
[
eax
+
32
]
movdqa
xmm3
,
[
eax
+
48
]
movdqa
xmm4
,
[
eax
+
64
]
paddd
xmm0
,
[
ecx
+
0
]
paddd
xmm1
,
[
ecx
+
16
]
paddd
xmm2
,
[
ecx
+
32
]
paddd
xmm3
,
[
ecx
+
48
]
paddd
xmm4
,
[
ecx
+
64
]
paddd
xmm0
,
xmm1
paddd
xmm1
,
xmm2
paddd
xmm2
,
xmm3
paddd
xmm3
,
xmm4
movdqa
xmm5
,
[
ss
im_c1
GOT_ebx
]
movdqa
xmm6
,
[
ss
im_c2
GOT_ebx
]
TRANSPOSE4x4D
xmm0
,
xmm1
,
xmm2
,
xmm3
,
xmm4
; s1=mm0, s2=mm3, ss=mm4, s12=mm2
movdqa
xmm1
,
xmm3
pslld
xmm3
,
16
pmaddwd
xmm1
,
xmm0
; s1*s2
por
xmm0
,
xmm3
pmaddwd
xmm0
,
xmm0
; s1*s1 + s2*s2
pslld
xmm1
,
1
pslld
xmm2
,
7
pslld
xmm4
,
6
psubd
xmm2
,
xmm1
; covar*2
psubd
xmm4
,
xmm0
; vars
paddd
xmm0
,
xmm5
paddd
xmm1
,
xmm5
paddd
xmm2
,
xmm6
paddd
xmm4
,
xmm6
cvtdq2ps
xmm0
,
xmm0
; (float)(s1*s1 + s2*s2 + ssim_c1)
cvtdq2ps
xmm1
,
xmm1
; (float)(s1*s2*2 + ssim_c1)
cvtdq2ps
xmm2
,
xmm2
; (float)(covar*2 + ssim_c2)
cvtdq2ps
xmm4
,
xmm4
; (float)(vars + ssim_c2)
mulps
xmm1
,
xmm2
mulps
xmm0
,
xmm4
divps
xmm1
,
xmm0
; ssim
neg
edx
movdqu
xmm3
,
[
mask_ff
+
edx
*
4
+
16
GOT_ebx
]
pand
xmm1
,
xmm3
movhlps
xmm0
,
xmm1
addps
xmm0
,
xmm1
pshuflw
xmm1
,
xmm0
,
0xE
addss
xmm0
,
xmm1
movd
[
picesp
+
4
],
xmm0
fld
dword
[
picesp
+
4
]
picpop
ebx
ret
common/i386/pixel.h
View file @
7760f1b2
...
...
@@ -98,4 +98,10 @@ void x264_intra_sa8d_x3_8x8_mmxext( uint8_t *, uint8_t *, int * );
void
x264_intra_sa8d_x3_8x8_core_sse2
(
uint8_t
*
,
int16_t
[
2
][
8
],
int
*
);
void
x264_intra_sa8d_x3_8x8_core_mmxext
(
uint8_t
*
,
int16_t
[
2
][
8
],
int
*
);
void
x264_pixel_ssim_4x4x2_core_mmxext
(
const
uint8_t
*
pix1
,
int
stride1
,
const
uint8_t
*
pix2
,
int
stride2
,
int
sums
[
2
][
4
]
);
void
x264_pixel_ssim_4x4x2_core_sse2
(
const
uint8_t
*
pix1
,
int
stride1
,
const
uint8_t
*
pix2
,
int
stride2
,
int
sums
[
2
][
4
]
);
float
x264_pixel_ssim_end4_sse2
(
int
sum0
[
5
][
4
],
int
sum1
[
5
][
4
],
int
width
);
#endif
common/pixel.c
View file @
7760f1b2
...
...
@@ -322,6 +322,84 @@ SAD_X( 8x16_vis )
SAD_X
(
8
x8_vis
)
#endif
static
void
ssim_4x4x2_core
(
const
uint8_t
*
pix1
,
int
stride1
,
const
uint8_t
*
pix2
,
int
stride2
,
int
sums
[
2
][
4
])
{
int
x
,
y
,
z
;
for
(
z
=
0
;
z
<
2
;
z
++
)
{
uint32_t
s1
=
0
,
s2
=
0
,
ss
=
0
,
s12
=
0
;
for
(
y
=
0
;
y
<
4
;
y
++
)
for
(
x
=
0
;
x
<
4
;
x
++
)
{
int
a
=
pix1
[
x
+
y
*
stride1
];
int
b
=
pix2
[
x
+
y
*
stride2
];
s1
+=
a
;
s2
+=
b
;
ss
+=
a
*
a
;
ss
+=
b
*
b
;
s12
+=
a
*
b
;
}
sums
[
z
][
0
]
=
s1
;
sums
[
z
][
1
]
=
s2
;
sums
[
z
][
2
]
=
ss
;
sums
[
z
][
3
]
=
s12
;
pix1
+=
4
;
pix2
+=
4
;
}
}
static
float
ssim_end1
(
int
s1
,
int
s2
,
int
ss
,
int
s12
)
{
static
const
int
ssim_c1
=
(
int
)(.
01
*
.
01
*
255
*
255
*
64
+
.
5
);
static
const
int
ssim_c2
=
(
int
)(.
03
*
.
03
*
255
*
255
*
64
*
63
+
.
5
);
int
vars
=
ss
*
64
-
s1
*
s1
-
s2
*
s2
;
int
covar
=
s12
*
64
-
s1
*
s2
;
return
(
float
)(
2
*
s1
*
s2
+
ssim_c1
)
*
(
float
)(
2
*
covar
+
ssim_c2
)
\
/
((
float
)(
s1
*
s1
+
s2
*
s2
+
ssim_c1
)
*
(
float
)(
vars
+
ssim_c2
));
}
static
float
ssim_end4
(
int
sum0
[
5
][
4
],
int
sum1
[
5
][
4
],
int
width
)
{
int
i
;
float
ssim
=
0
.
0
;
for
(
i
=
0
;
i
<
width
;
i
++
)
ssim
+=
ssim_end1
(
sum0
[
i
][
0
]
+
sum0
[
i
+
1
][
0
]
+
sum1
[
i
][
0
]
+
sum1
[
i
+
1
][
0
],
sum0
[
i
][
1
]
+
sum0
[
i
+
1
][
1
]
+
sum1
[
i
][
1
]
+
sum1
[
i
+
1
][
1
],
sum0
[
i
][
2
]
+
sum0
[
i
+
1
][
2
]
+
sum1
[
i
][
2
]
+
sum1
[
i
+
1
][
2
],
sum0
[
i
][
3
]
+
sum0
[
i
+
1
][
3
]
+
sum1
[
i
][
3
]
+
sum1
[
i
+
1
][
3
]
);
return
ssim
;
}
float
x264_pixel_ssim_wxh
(
x264_pixel_function_t
*
pf
,
uint8_t
*
pix1
,
int
stride1
,
uint8_t
*
pix2
,
int
stride2
,
int
width
,
int
height
)
{