Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
309ddabb
Commit
309ddabb
authored
Aug 03, 2011
by
Loren Merritt
Committed by
Fiona Glaser
Aug 09, 2011
Browse files
asm cosmetics: merge all the variants of ABS macros
parent
1921c682
Changes
8
Hide whitespace changes
Inline
Side-by-side
common/x86/deblock-a.asm
View file @
309ddabb
...
...
@@ -2128,7 +2128,8 @@ cglobal deblock_strength, 6,6,8
packsswb
m0
,
m1
packsswb
m2
,
m3
%endif
ABSB2
m0
,
m2
,
m1
,
m3
ABSB
m0
,
m1
ABSB
m2
,
m3
psubusb
m0
,
m6
psubusb
m2
,
m6
packsswb
m0
,
m2
...
...
@@ -2144,7 +2145,8 @@ cglobal deblock_strength, 6,6,8
psubw
m3
,
[
mv
+
4
*
8
*
3
]
packsswb
m0
,
m1
packsswb
m2
,
m3
ABSB2
m0
,
m2
,
m1
,
m3
ABSB
m0
,
m1
ABSB
m2
,
m3
psubusb
m0
,
m6
psubusb
m2
,
m6
packsswb
m0
,
m2
...
...
common/x86/pixel-32.asm
View file @
309ddabb
...
...
@@ -48,14 +48,14 @@ INIT_MMX mmx2
%macro SUM4x8_MM 0
movq
[
sp
ill
],
m6
movq
[
sp
ill
+
8
],
m7
ABS2
m0
,
m1
,
m6
,
m7
ABS2
m2
,
m3
,
m6
,
m7
ABS
W
2
m0
,
m1
,
m0
,
m1
,
m6
,
m7
ABS
W
2
m2
,
m3
,
m2
,
m3
,
m6
,
m7
paddw
m0
,
m2
paddw
m1
,
m3
movq
m6
,
[
sp
ill
]
movq
m7
,
[
sp
ill
+
8
]
ABS2
m4
,
m5
,
m2
,
m3
ABS2
m6
,
m7
,
m2
,
m3
ABS
W
2
m4
,
m5
,
m4
,
m5
,
m2
,
m3
ABS
W
2
m6
,
m7
,
m6
,
m7
,
m2
,
m3
paddw
m4
,
m6
paddw
m5
,
m7
paddw
m0
,
m4
...
...
@@ -218,23 +218,23 @@ cglobal intra_sa8d_x3_8x8_core
movq
[
sp
ill
+
0
],
m0
movq
[
sp
ill
+
8
],
m1
ABS2
m2
,
m3
,
m0
,
m1
ABS2
m4
,
m5
,
m0
,
m1
ABS
W
2
m2
,
m3
,
m2
,
m3
,
m0
,
m1
ABS
W
2
m4
,
m5
,
m4
,
m5
,
m0
,
m1
paddw
m2
,
m4
paddw
m3
,
m5
ABS2
m6
,
m7
,
m4
,
m5
ABS
W
2
m6
,
m7
,
m6
,
m7
,
m4
,
m5
movq
m0
,
[
sp
ill
+
0
]
movq
m1
,
[
sp
ill
+
8
]
paddw
m2
,
m6
paddw
m3
,
m7
paddw
m2
,
m3
ABS
1
m1
,
m4
ABS
W
m1
,
m1
,
m4
paddw
m2
,
m1
; 7x4 sum
movq
m7
,
m0
movq
m1
,
[
ecx
+
8
]
; left bottom
psllw
m1
,
3
psubw
m7
,
m1
ABS2
m0
,
m7
,
m5
,
m3
ABS
W
2
m0
,
m7
,
m0
,
m7
,
m5
,
m3
paddw
m0
,
m2
paddw
m7
,
m2
movq
[
sum
+
0
],
m0
; dc
...
...
@@ -262,15 +262,15 @@ cglobal intra_sa8d_x3_8x8_core
movq
[
sp
ill
],
m0
movq
[
sp
ill
+
8
],
m1
ABS2
m2
,
m3
,
m0
,
m1
ABS2
m4
,
m5
,
m0
,
m1
ABS
W
2
m2
,
m3
,
m2
,
m3
,
m0
,
m1
ABS
W
2
m4
,
m5
,
m4
,
m5
,
m0
,
m1
paddw
m2
,
m4
paddw
m3
,
m5
paddw
m2
,
m3
movq
m0
,
[
sp
ill
]
movq
m1
,
[
sp
ill
+
8
]
ABS2
m6
,
m7
,
m4
,
m5
ABS
1
m1
,
m3
ABS
W
2
m6
,
m7
,
m6
,
m7
,
m4
,
m5
ABS
W
m1
,
m1
,
m3
paddw
m2
,
m7
paddw
m1
,
m6
paddw
m2
,
m1
; 7x4 sum
...
...
@@ -287,7 +287,7 @@ cglobal intra_sa8d_x3_8x8_core
psubw
m1
,
m7
psubw
m0
,
m6
ABS2
m0
,
m1
,
m5
,
m6
ABS
W
2
m0
,
m1
,
m0
,
m1
,
m5
,
m6
movq
m3
,
[
sum
+
0
]
; dc
paddw
m0
,
m2
paddw
m1
,
m2
...
...
@@ -303,7 +303,7 @@ cglobal intra_sa8d_x3_8x8_core
psllw
m4
,
3
psubw
m3
,
[
sum
+
16
]
psubw
m4
,
[
sum
+
24
]
ABS2
m3
,
m4
,
m5
,
m6
ABS
W
2
m3
,
m4
,
m3
,
m4
,
m5
,
m6
paddw
m2
,
m3
paddw
m2
,
m4
; v
...
...
common/x86/pixel-a.asm
View file @
309ddabb
...
...
@@ -965,8 +965,8 @@ cglobal pixel_var2_8x8_ssse3, 5,6,8
%else
HADAMARD4_V
%
2
,
%
3
,
%
4
,
%
5
,
%
6
; doing the abs first is a slight advantage
ABS
2
m
%
2
,
m
%
4
,
m
%
6
,
m
%
7
ABS
2
m
%
3
,
m
%
5
,
m
%
6
,
m
%
7
ABS
W2
m
%
2
,
m
%
4
,
m
%
2
,
m
%
4
,
m
%
6
,
m
%
7
ABS
W2
m
%
3
,
m
%
5
,
m
%
3
,
m
%
5
,
m
%
6
,
m
%
7
HADAMARD
1
,
max
,
%
2
,
%
4
,
%
6
,
%
7
%endif
%ifnidn %9, swap
...
...
@@ -1567,21 +1567,12 @@ cglobal intra_sa8d_x3_8x8_core, 3,3,16
movdqa
m9
,
m3
movdqa
m10
,
m4
movdqa
m11
,
m5
ABS2
m8
,
m9
,
m12
,
m13
ABS2
m10
,
m11
,
m12
,
m13
ABS
W
2
m8
,
m9
,
m8
,
m9
,
m12
,
m13
ABS
W
2
m10
,
m11
,
m10
,
m11
,
m12
,
m13
paddusw
m8
,
m10
paddusw
m9
,
m11
%if cpuflag(ssse3)
pabsw
m10
,
m6
pabsw
m11
,
m7
pabsw
m15
,
m1
%else
movdqa
m10
,
m6
movdqa
m11
,
m7
movdqa
m15
,
m1
ABS2
m10
,
m11
,
m13
,
m14
ABS1
m15
,
m13
%endif
ABSW2
m10
,
m11
,
m6
,
m7
,
m6
,
m7
ABSW
m15
,
m1
,
m1
paddusw
m10
,
m11
paddusw
m8
,
m9
paddusw
m15
,
m10
...
...
@@ -1592,8 +1583,7 @@ cglobal intra_sa8d_x3_8x8_core, 3,3,16
psllw
m8
,
3
psubw
m8
,
m0
psubw
m9
,
m0
ABS1
m8
,
m10
ABS1
m9
,
m11
; 1x8 sum
ABSW2
m8
,
m9
,
m8
,
m9
,
m10
,
m11
; 1x8 sum
paddusw
m14
,
m15
,
m8
paddusw
m15
,
m9
punpcklwd
m0
,
m1
...
...
@@ -1607,7 +1597,7 @@ cglobal intra_sa8d_x3_8x8_core, 3,3,16
psllw
m1
,
3
psrldq
m2
,
m15
,
2
; 8x7 sum
psubw
m0
,
m1
; 8x1 sum
ABS
1
m0
,
m1
ABS
W
m0
,
m0
,
m1
paddusw
m2
,
m0
; 3x HADDW
...
...
@@ -1727,19 +1717,9 @@ cglobal hadamard_load
; out: m7
; clobber: m4..m6
%macro SUM3x4 0
%if cpuflag(ssse3)
pabsw
m4
,
m1
pabsw
m5
,
m2
pabsw
m7
,
m3
ABSW2
m4
,
m5
,
m1
,
m2
,
m1
,
m2
ABSW
m7
,
m3
,
m3
paddw
m4
,
m5
%else
movq
m4
,
m1
movq
m5
,
m2
ABS2
m4
,
m5
,
m6
,
m7
movq
m7
,
m3
paddw
m4
,
m5
ABS1
m7
,
m6
%endif
paddw
m7
,
m4
%endmacro
...
...
@@ -1758,8 +1738,8 @@ cglobal hadamard_load
movq
m1
,
%
3
psllw
m1
,
2
psubw
m0
,
m1
ABS2
m4
,
m5
,
m2
,
m3
; 1x4 sum
ABS
1
m0
,
m1
; 4x1 sum
ABS
W
2
m4
,
m5
,
m4
,
m5
,
m2
,
m3
; 1x4 sum
ABS
W
m0
,
m0
,
m1
; 4x1 sum
%endmacro
%macro INTRA_SATDS_MMX 0
...
...
@@ -1982,16 +1962,6 @@ cglobal intra_satd_x3_8x8c, 0,6
%endmacro
; INTRA_SATDS_MMX
%macro ABS_MOV 2
%if cpuflag(ssse3)
pabsw
%
1
,
%
2
%else
pxor
%
1
,
%
1
psubw
%
1
,
%
2
pmaxsw
%
1
,
%
2
%endif
%endmacro
; in: r0=pix, r1=stride, r2=stride*3, r3=tmp, m6=mask_ac4, m7=0
; out: [tmp]=hadamard4, m0=satd
INIT_MMX
mmx2
...
...
@@ -2016,11 +1986,11 @@ cglobal hadamard_ac_4x4
mova
[
r3
+
8
],
m1
mova
[
r3
+
16
],
m2
mova
[
r3
+
24
],
m3
ABS
1
m0
,
m4
ABS
1
m1
,
m4
ABS
W
m0
,
m0
,
m4
ABS
W
m1
,
m1
,
m4
pand
m0
,
m6
ABS
1
m2
,
m4
ABS
1
m3
,
m4
ABS
W
m2
,
m2
,
m4
ABS
W
m3
,
m3
,
m4
paddw
m0
,
m1
paddw
m2
,
m3
paddw
m0
,
m2
...
...
@@ -2034,8 +2004,8 @@ cglobal hadamard_ac_2x2max
mova
m3
,
[
r3
+
0x60
]
sub
r3
,
8
SUMSUB_BADC
w
,
0
,
1
,
2
,
3
,
4
ABS
2
m0
,
m2
,
m4
,
m5
ABS
2
m1
,
m3
,
m4
,
m5
ABS
W2
m0
,
m2
,
m0
,
m2
,
m4
,
m5
ABS
W2
m1
,
m3
,
m1
,
m3
,
m4
,
m5
HADAMARD
0
,
max
,
0
,
2
,
4
,
5
HADAMARD
0
,
max
,
1
,
3
,
4
,
5
%ifdef HIGH_BIT_DEPTH
...
...
@@ -2101,8 +2071,8 @@ cglobal hadamard_ac_8x8
mova
m3
,
[
r3
+
0x60
]
SUMSUB_BADC
w
,
0
,
1
,
2
,
3
,
4
HADAMARD
0
,
sumsub
,
0
,
2
,
4
,
5
ABS
2
m1
,
m3
,
m4
,
m5
ABS
2
m0
,
m2
,
m4
,
m5
ABS
W2
m1
,
m3
,
m1
,
m3
,
m4
,
m5
ABS
W2
m0
,
m2
,
m0
,
m2
,
m4
,
m5
HADAMARD
0
,
max
,
1
,
3
,
4
,
5
%ifdef HIGH_BIT_DEPTH
pand
m0
,
[
mask_ac4
]
...
...
@@ -2299,9 +2269,9 @@ cglobal hadamard_ac_8x8
%endif
mova
sp
ill1
,
m2
mova
sp
ill2
,
m3
ABS
_MOV
m1
,
m0
ABS
_MOV
m2
,
m4
ABS
_MOV
m3
,
m5
ABS
W
m1
,
m0
,
m0
ABS
W
m2
,
m4
,
m4
ABS
W
m3
,
m5
,
m5
paddw
m1
,
m2
SUMSUB_BA
w
,
0
,
4
%if vertical
...
...
@@ -2310,15 +2280,15 @@ cglobal hadamard_ac_8x8
pand
m1
,
[
mask_ac4b
]
%endif
AC_PREP
m1
,
[
pw_1
]
ABS
_MOV
m2
,
sp
ill0
ABS
W
m2
,
sp
ill0
AC_PADD
m1
,
m3
,
[
pw_1
]
ABS
_MOV
m3
,
sp
ill1
ABS
W
m3
,
sp
ill1
AC_PADD
m1
,
m2
,
[
pw_1
]
ABS
_MOV
m2
,
sp
ill2
ABS
W
m2
,
sp
ill2
AC_PADD
m1
,
m3
,
[
pw_1
]
ABS
_MOV
m3
,
m6
ABS
W
m3
,
m6
,
m6
AC_PADD
m1
,
m2
,
[
pw_1
]
ABS
_MOV
m2
,
m7
ABS
W
m2
,
m7
,
m7
AC_PADD
m1
,
m3
,
[
pw_1
]
mova
m3
,
m7
AC_PADD
m1
,
m2
,
[
pw_1
]
...
...
@@ -2349,9 +2319,9 @@ cglobal hadamard_ac_8x8
%else
paddw
m2
,
m2
%endif
; HIGH_BIT_DEPTH
ABS
1
m4
,
m7
ABS
W
m4
,
m4
,
m7
pand
m0
,
[
mask_ac8
]
ABS
1
m0
,
m7
ABS
W
m0
,
m0
,
m7
AC_PADD
m2
,
m4
,
[
pw_1
]
AC_PADD
m2
,
m0
,
[
pw_1
]
mova
[
rsp
+
gprsize
+
16
],
m2
; save sa8d
...
...
@@ -2708,15 +2678,15 @@ cglobal pixel_ads4, 6,7
movq
mm1
,
[
r1
+
16
]
psubw
mm0
,
mm7
psubw
mm1
,
mm6
ABS
1
mm0
,
mm2
ABS
1
mm1
,
mm3
ABS
W
mm0
,
mm0
,
mm2
ABS
W
mm1
,
mm1
,
mm3
movq
mm2
,
[
r1
+
r2
]
movq
mm3
,
[
r1
+
r2
+
16
]
psubw
mm2
,
mm5
psubw
mm3
,
mm4
paddw
mm0
,
mm1
ABS
1
mm2
,
mm1
ABS
1
mm3
,
mm1
ABS
W
mm2
,
mm2
,
mm1
ABS
W
mm3
,
mm3
,
mm1
paddw
mm0
,
mm2
paddw
mm0
,
mm3
pshufw
mm1
,
r6m
,
0
...
...
@@ -2737,8 +2707,8 @@ cglobal pixel_ads2, 6,7
movq
mm1
,
[
r1
+
r2
]
psubw
mm0
,
mm7
psubw
mm1
,
mm6
ABS
1
mm0
,
mm2
ABS
1
mm1
,
mm3
ABS
W
mm0
,
mm0
,
mm2
ABS
W
mm1
,
mm1
,
mm3
paddw
mm0
,
mm1
paddusw
mm0
,
[
r3
]
movq
mm4
,
mm5
...
...
@@ -2756,8 +2726,8 @@ cglobal pixel_ads1, 6,7
movq
mm1
,
[
r1
+
8
]
psubw
mm0
,
mm7
psubw
mm1
,
mm7
ABS
1
mm0
,
mm2
ABS
1
mm1
,
mm3
ABS
W
mm0
,
mm0
,
mm2
ABS
W
mm1
,
mm1
,
mm3
paddusw
mm0
,
[
r3
]
paddusw
mm1
,
[
r3
+
8
]
movq
mm4
,
mm6
...
...
@@ -2789,15 +2759,15 @@ cglobal pixel_ads4, 6,7,12
psubw
xmm0
,
xmm10
,
xmm7
movdqu
xmm10
,
[
r1
+
16
]
psubw
xmm1
,
xmm10
,
xmm6
ABS
1
xmm0
,
xmm2
ABS
1
xmm1
,
xmm3
ABS
W
xmm0
,
xmm0
,
xmm2
ABS
W
xmm1
,
xmm1
,
xmm3
psubw
xmm2
,
xmm11
,
xmm5
movdqu
xmm11
,
[
r1
+
r2
+
16
]
paddw
xmm0
,
xmm1
psubw
xmm3
,
xmm11
,
xmm4
movdqu
xmm9
,
[
r3
]
ABS
1
xmm2
,
xmm1
ABS
1
xmm3
,
xmm1
ABS
W
xmm2
,
xmm2
,
xmm1
ABS
W
xmm3
,
xmm3
,
xmm1
paddw
xmm0
,
xmm2
paddw
xmm0
,
xmm3
paddusw
xmm0
,
xmm9
...
...
@@ -2811,15 +2781,15 @@ cglobal pixel_ads4, 6,7,12
movdqu
xmm1
,
[
r1
+
16
]
psubw
xmm0
,
xmm7
psubw
xmm1
,
xmm6
ABS
1
xmm0
,
xmm2
ABS
1
xmm1
,
xmm3
ABS
W
xmm0
,
xmm0
,
xmm2
ABS
W
xmm1
,
xmm1
,
xmm3
movdqu
xmm2
,
[
r1
+
r2
]
movdqu
xmm3
,
[
r1
+
r2
+
16
]
psubw
xmm2
,
xmm5
psubw
xmm3
,
xmm4
paddw
xmm0
,
xmm1
ABS
1
xmm2
,
xmm1
ABS
1
xmm3
,
xmm1
ABS
W
xmm2
,
xmm2
,
xmm1
ABS
W
xmm3
,
xmm3
,
xmm1
paddw
xmm0
,
xmm2
paddw
xmm0
,
xmm3
movd
xmm1
,
r6m
...
...
@@ -2849,8 +2819,8 @@ cglobal pixel_ads2, 6,7,8
psubw
xmm0
,
xmm7
psubw
xmm1
,
xmm6
movdqu
xmm4
,
[
r3
]
ABS
1
xmm0
,
xmm2
ABS
1
xmm1
,
xmm3
ABS
W
xmm0
,
xmm0
,
xmm2
ABS
W
xmm1
,
xmm1
,
xmm3
paddw
xmm0
,
xmm1
paddusw
xmm0
,
xmm4
psubusw
xmm1
,
xmm5
,
xmm0
...
...
@@ -2873,8 +2843,8 @@ cglobal pixel_ads1, 6,7,8
psubw
xmm1
,
xmm7
movdqu
xmm2
,
[
r3
]
movdqu
xmm3
,
[
r3
+
16
]
ABS
1
xmm0
,
xmm4
ABS
1
xmm1
,
xmm5
ABS
W
xmm0
,
xmm0
,
xmm4
ABS
W
xmm1
,
xmm1
,
xmm5
paddusw
xmm0
,
xmm2
paddusw
xmm1
,
xmm3
psubusw
xmm4
,
xmm6
,
xmm0
...
...
common/x86/predict-a.asm
View file @
309ddabb
...
...
@@ -1538,7 +1538,7 @@ cglobal predict_8x8c_h, 1,1
add
r0
,
FDEC_STRIDE
*
4
%assign n -4
%rep 8
SPLATB
m0
,
r0
+
FDEC_STRIDE
*
n
-
1
,
m1
SPLATB
_LOAD
m0
,
r0
+
FDEC_STRIDE
*
n
-
1
,
m1
mova
[
r0
+
FDEC_STRIDE
*
n
],
m0
%assign n n+1
%endrep
...
...
@@ -1745,7 +1745,7 @@ cglobal predict_16x16_h, 1,2
.vloop:
%assign n 0
%rep 4
SPLATB
m0
,
r0
+
r1
+
FDEC_STRIDE
*
n
-
1
,
m1
SPLATB
_LOAD
m0
,
r0
+
r1
+
FDEC_STRIDE
*
n
-
1
,
m1
mova
[
r0
+
r1
+
FDEC_STRIDE
*
n
],
m0
%if mmsize==8
mova
[
r0
+
r1
+
FDEC_STRIDE
*
n
+
8
],
m0
...
...
common/x86/quant-a.asm
View file @
309ddabb
...
...
@@ -99,59 +99,6 @@ cextern pd_1024
%endif
%endmacro
; PABSW mmx and PSIGNW mmx do not individually perform the same operations as
; pabsw and psignw instructions, but the conjuction works
%macro PABSW 2
%if cpuflag(ssse3)
pabsw
%
1
,
%
2
%else
pxor
%
1
,
%
1
pcmpgtw
%
1
,
%
2
pxor
%
2
,
%
1
psubw
%
2
,
%
1
SWAP
%
1
,
%
2
%endif
%endmacro
%macro PSIGNW 2
%if cpuflag(ssse3)
psignw
%
1
,
%
2
%else
pxor
%
1
,
%
2
psubw
%
1
,
%
2
%endif
%endmacro
%macro PABSD 2
%if cpuflag(ssse3)
pabsd
%
1
,
%
2
%else
pxor
%
1
,
%
1
pcmpgtd
%
1
,
%
2
pxor
%
2
,
%
1
psubd
%
2
,
%
1
SWAP
%
1
,
%
2
%endif
%endmacro
%macro PSIGND_MMX 2-3
%if %0==3
pxor
%
1
,
%
2
,
%
3
psubd
%
1
,
%
3
%else
pxor
%
1
,
%
2
psubd
%
1
,
%
2
%endif
%endmacro
%macro PSIGND 2+
%if cpuflag(ssse3)
psignd
%
1
,
%
2
%else
PSIGND_MMX
%
1
,
%
2
%endif
%endmacro
%macro QUANT_END 0
%if cpuflag(sse4)
xor
eax
,
eax
...
...
@@ -185,7 +132,7 @@ cextern pd_1024
%macro QUANT_ONE_DC 4
%if cpuflag(sse4)
mova
m0
,
[
%
1
]
P
ABSD
m1
,
m0
ABSD
m1
,
m0
paddd
m1
,
%
3
pmulld
m1
,
%
2
psrad
m1
,
16
...
...
@@ -198,7 +145,7 @@ cextern pd_1024
%endif
%else
; !sse4
mova
m0
,
[
%
1
]
P
ABSD
m1
,
m0
ABSD
m1
,
m0
paddd
m1
,
%
3
mova
m2
,
m1
psrlq
m2
,
32
...
...
@@ -221,8 +168,8 @@ cextern pd_1024
%if cpuflag(sse4)
mova
m0
,
[
%
1
]
mova
m1
,
[
%
1
+
mmsize
]
P
ABSD
m2
,
m0
P
ABSD
m3
,
m1
ABSD
m2
,
m0
ABSD
m3
,
m1
paddd
m2
,
%
3
paddd
m3
,
%
3
pmulld
m2
,
%
2
...
...
@@ -248,7 +195,7 @@ cextern pd_1024
%macro QUANT_ONE_AC_MMX 4
mova
m0
,
[
%
1
]
mova
m2
,
[
%
2
]
P
ABSD
m1
,
m0
ABSD
m1
,
m0
mova
m4
,
m2
paddd
m1
,
[
%
3
]
mova
m3
,
m1
...
...
@@ -272,8 +219,8 @@ cextern pd_1024
%if cpuflag(sse4)
mova
m0
,
[
%
1
]
mova
m1
,
[
%
1
+
mmsize
]
P
ABSD
m2
,
m0
P
ABSD
m3
,
m1
ABSD
m2
,
m0
ABSD
m3
,
m1
paddd
m2
,
[
%
3
]
paddd
m3
,
[
%
3
+
mmsize
]
pmulld
m2
,
[
%
2
]
...
...
@@ -355,7 +302,7 @@ QUANT_AC 8, 8
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
mova
m1
,
%
1
; load dct coeffs
P
ABSW
m0
,
m1
ABSW
m0
,
m1
,
si
gn
paddusw
m0
,
%
3
; round
pmulhuw
m0
,
%
2
; divide
PSIGNW
m0
,
m1
; restore sign
...
...
@@ -370,8 +317,8 @@ QUANT_AC 8, 8
%macro QUANT_TWO 7
mova
m1
,
%
1
mova
m3
,
%
2
P
ABSW
m0
,
m1
P
ABSW
m2
,
m3
ABSW
m0
,
m1
,
si
gn
ABSW
m2
,
m3
,
si
gn
paddusw
m0
,
%
5
paddusw
m2
,
%
6
pmulhuw
m0
,
%
3
...
...
@@ -853,8 +800,8 @@ cglobal denoise_dct, 4,4,8
sub
r3
,
mmsize
/
2
mova
m2
,
[
r0
+
r3
*
4
+
0
*
mmsize
]
mova
m3
,
[
r0
+
r3
*
4
+
1
*
mmsize
]
P
ABSD
m0
,
m2
P
ABSD
m1
,
m3
ABSD
m0
,
m2
ABSD
m1
,
m3
mova
m4
,
m0
mova
m5
,
m1
psubd
m0
,
[
r2
+
r3
*
4
+
0
*
mmsize
]
...
...
@@ -898,8 +845,8 @@ cglobal denoise_dct, 4,4,7
sub
r3
,
mmsize
mova
m2
,
[
r0
+
r3
*
2
+
0
*
mmsize
]
mova
m3
,
[
r0
+
r3
*
2
+
1
*
mmsize
]
P
ABSW
m0
,
m2
P
ABSW
m1
,
m3
ABSW
m0
,
m2
,
si
gn
ABSW
m1
,
m3
,
si
gn
psubusw
m4
,
m0
,
[
r2
+
r3
*
2
+
0
*
mmsize
]
psubusw
m5
,
m1
,
[
r2
+
r3
*
2
+
1
*
mmsize
]
PSIGNW
m4
,
m2
...
...
@@ -946,16 +893,10 @@ DENOISE_DCT
movdqa
xmm1
,
[
%
3
+
32
]
packssdw
xmm0
,
[
%
3
+
16
]
packssdw
xmm1
,
[
%
3
+
48
]
ABS2
xmm0
,
xmm1
,
xmm3
,
xmm4
ABS
W
2
xmm0
,
xmm1
,
xmm0
,
xmm1
,
xmm3
,
xmm4
%else
%if cpuflag(ssse3)
pabsw
xmm0
,
[
%
3
+
0
]
pabsw
xmm1
,
[
%
3
+
16
]
%else
movdqa
xmm0
,
[
%
3
+
0
]
movdqa
xmm1
,
[
%
3
+
16
]
ABS2
xmm0
,
xmm1
,
xmm3
,
xmm4
%endif
ABSW
xmm0
,
[
%
3
+
0
],
xmm3
ABSW
xmm1
,
[
%
3
+
16
],
xmm4
%endif
packsswb
xmm0
,
xmm1
pxor
xmm2
,
xmm2
...
...
@@ -980,8 +921,8 @@ DENOISE_DCT
movq
mm2
,
[
%
3
+
16
]
movq
mm3
,
[
%
3
+
24
]
%endif
ABS2
mm0
,
mm1
,
mm6
,
mm7
ABS2
mm2
,
mm3
,
mm6
,
mm7
ABS
W
2
mm0
,
mm1
,
mm0
,
mm1
,
mm6
,
mm7
ABS
W
2
mm2
,
mm3
,
mm2
,
mm3
,
mm6
,
mm7
packsswb
mm0
,
mm1
packsswb
mm2
,
mm3
pxor
mm4
,
mm4
...
...
common/x86/sad-a.asm
View file @
309ddabb
...
...
@@ -733,7 +733,7 @@ cglobal intra_sad_x3_16x16, 3,5,8
pxor
m2
,
m2
mov
r3d
,
15
*
FENC_STRIDE
.vloop:
SPLATB
m6
,
r1
+
r3
*
2
-
1
,
m1
SPLATB
_LOAD
m6
,
r1
+
r3
*
2
-
1
,
m1
mova
m0
,
[
r0
+
r3
]
psadbw
m0
,
m7
paddw
m4
,
m0
...
...
common/x86/sad16-a.asm
View file @
309ddabb
...
...
@@ -43,8 +43,8 @@ cextern pw_1
psubw
m2
,
[
r2
+
8
]
psubw
m3
,
[
r2
+
16
]
psubw
m4
,
[
r2
+
24
]
ABS2
m1
,
m2
,
m5
,
m6
ABS2
m3
,
m4
,
m7
,
m5
ABS
W
2
m1
,
m2
,
m1
,
m2
,
m5
,
m6
ABS
W
2
m3
,
m4
,
m3
,
m4
,
m7
,
m5
lea
r0
,
[
r0
+
2
*
r1
]
lea
r2
,
[
r2
+
2
*
r3
]
paddw
m1
,
m2
...
...
@@ -62,8 +62,8 @@ cextern pw_1
psubw
m2
,
[
r2
+
8
]
psubw
m3
,
[
r2
+
2
*
r3
+
0
]
psubw
m4
,
[
r2
+
2
*
r3
+
8
]