Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
29899d84
Commit
29899d84
authored
Apr 03, 2008
by
Loren Merritt
Browse files
more mmx/xmm macros (mova, movu, movh)
parent
937b7925
Changes
3
Hide whitespace changes
Inline
Side-by-side
common/x86/deblock-a.asm
View file @
29899d84
...
...
@@ -138,8 +138,8 @@ SECTION .text
; out: %4 = |%1-%2|>%3
; clobbers: %5
%macro DIFF_GT 5
mov
q
%
5
,
%
2
mov
q
%
4
,
%
1
mov
a
%
5
,
%
2
mov
a
%
4
,
%
1
psubusb
%
5
,
%
1
psubusb
%
4
,
%
2
por
%
4
,
%
5
...
...
@@ -149,8 +149,8 @@ SECTION .text
; out: %4 = |%1-%2|>%3
; clobbers: %5
%macro DIFF_GT2 5
mov
q
%
5
,
%
2
mov
q
%
4
,
%
1
mov
a
%
5
,
%
2
mov
a
%
4
,
%
1
psubusb
%
5
,
%
1
psubusb
%
4
,
%
2
psubusb
%
5
,
%
3
...
...
@@ -190,7 +190,7 @@ SECTION .text
; out: m1=p0' m2=q0'
; clobbers: m0,3-6
%macro DEBLOCK_P0_Q0 0
mov
q
m5
,
m1
mov
a
m5
,
m1
pxor
m5
,
m2
; p0^q0
pand
m5
,
[
pb_01
GLOBAL
]
; (p0^q0)&1
pcmpeqb
m4
,
m4
...
...
@@ -201,7 +201,7 @@ SECTION .text
pavgb
m4
,
m2
; (q0 - p0 + 256)>>1
pavgb
m3
,
m5
paddusb
m3
,
m4
; d+128+33
mov
q
m6
,
[
pb_a1
GLOBAL
]
mov
a
m6
,
[
pb_a1
GLOBAL
]
psubusb
m6
,
m3
psubusb
m3
,
[
pb_a1
GLOBAL
]
pminub
m6
,
m7
...
...
@@ -217,18 +217,18 @@ SECTION .text
; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
; clobbers: q2, tmp, tc0
%macro LUMA_Q1 6
mov
q
%
6
,
m1
mov
a
%
6
,
m1
pavgb
%
6
,
m2
pavgb
%
2
,
%
6
; avg(p2,avg(p0,q0))
pxor
%
6
,
%
3
pand
%
6
,
[
pb_01
GLOBAL
]
; (p2^avg(p0,q0))&1
psubusb
%
2
,
%
6
; (p2+((p0+q0+1)>>1))>>1
mov
q
%
6
,
%
1
mov
a
%
6
,
%
1
psubusb
%
6
,
%
5
paddusb
%
5
,
%
1
pmaxub
%
2
,
%
6
pminub
%
2
,
%
5
mov
q
%
4
,
%
2
mov
a
%
4
,
%
2
%endmacro
;-----------------------------------------------------------------------------
...
...
@@ -244,10 +244,10 @@ cglobal x264_deblock_v_luma_sse2
dec
r3d
; beta-1
add
r4
,
r0
; pix-3*stride
mov
dqa
m0
,
[
r4
+
r1
]
; p1
mov
dqa
m1
,
[
r4
+
2
*
r1
]
; p0
mov
dqa
m2
,
[
r0
]
; q0
mov
dqa
m3
,
[
r0
+
r1
]
; q1
mov
a
m0
,
[
r4
+
r1
]
; p1
mov
a
m1
,
[
r4
+
2
*
r1
]
; p0
mov
a
m2
,
[
r0
]
; q0
mov
a
m3
,
[
r0
+
r1
]
; q1
LOAD_MASK
r2d
,
r3d
punpcklbw
m8
,
m8
...
...
@@ -260,7 +260,7 @@ cglobal x264_deblock_v_luma_sse2
movdqa
m3
,
[
r4
]
; p2
DIFF_GT2
m1
,
m3
,
m5
,
m6
,
m7
; |p2-p0| > beta-1
pand
m6
,
m9
mov
dqa
m7
,
m8
mov
a
m7
,
m8
psubb
m7
,
m6
pand
m6
,
m8
LUMA_Q1
m0
,
m3
,
[
r4
],
[
r4
+
r1
],
m6
,
m4
...
...
@@ -270,12 +270,12 @@ cglobal x264_deblock_v_luma_sse2
pand
m6
,
m9
pand
m8
,
m6
psubb
m7
,
m6
mov
dqa
m3
,
[
r0
+
r1
]
mov
a
m3
,
[
r0
+
r1
]
LUMA_Q1
m3
,
m4
,
[
r0
+
2
*
r1
],
[
r0
+
r1
],
m8
,
m6
DEBLOCK_P0_Q0
mov
dqa
[
r4
+
2
*
r1
],
m1
mov
dqa
[
r0
],
m2
mov
a
[
r4
+
2
*
r1
],
m1
mov
a
[
r0
],
m2
ret
;-----------------------------------------------------------------------------
...
...
@@ -338,10 +338,10 @@ cglobal x264_deblock_%2_luma_%1, 5,5,1
dec
r3
; beta-1
add
r4
,
r0
; pix-3*stride
mov
q
m0
,
[
r4
+
r1
]
; p1
mov
q
m1
,
[
r4
+
2
*
r1
]
; p0
mov
q
m2
,
[
r0
]
; q0
mov
q
m3
,
[
r0
+
r1
]
; q1
mov
a
m0
,
[
r4
+
r1
]
; p1
mov
a
m1
,
[
r4
+
2
*
r1
]
; p0
mov
a
m2
,
[
r0
]
; q0
mov
a
m3
,
[
r0
+
r1
]
; q1
LOAD_MASK
r2
,
r3
mov
r3
,
r4m
...
...
@@ -356,34 +356,34 @@ cglobal x264_deblock_%2_luma_%1, 5,5,1
movd
m4
,
[
r3
]
; tc0
punpcklbw
m4
,
m4
punpcklbw
m4
,
m4
; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
mov
q
[
esp
+%
3
],
m4
; tc
mov
a
[
esp
+%
3
],
m4
; tc
pcmpeqb
m3
,
m3
pcmpgtb
m4
,
m3
pand
m4
,
m7
mov
q
[
esp
],
m4
; mask
mov
a
[
esp
],
m4
; mask
mov
q
m3
,
[
r4
]
; p2
mov
a
m3
,
[
r4
]
; p2
DIFF_GT2
m1
,
m3
,
m5
,
m6
,
m7
; |p2-p0| > beta-1
pand
m6
,
m4
pand
m4
,
[
esp
+%
3
]
; tc
mov
q
m7
,
m4
mov
a
m7
,
m4
psubb
m7
,
m6
pand
m6
,
m4
LUMA_Q1
m0
,
m3
,
[
r4
],
[
r4
+
r1
],
m6
,
m4
mov
q
m4
,
[
r0
+
2
*
r1
]
; q2
mov
a
m4
,
[
r0
+
2
*
r1
]
; q2
DIFF_GT2
m2
,
m4
,
m5
,
m6
,
m3
; |q2-q0| > beta-1
mov
q
m5
,
[
esp
]
; mask
mov
a
m5
,
[
esp
]
; mask
pand
m6
,
m5
mov
q
m5
,
[
esp
+%
3
]
; tc
mov
a
m5
,
[
esp
+%
3
]
; tc
pand
m5
,
m6
psubb
m7
,
m6
mov
q
m3
,
[
r0
+
r1
]
mov
a
m3
,
[
r0
+
r1
]
LUMA_Q1
m3
,
m4
,
[
r0
+
2
*
r1
],
[
r0
+
r1
],
m5
,
m6
DEBLOCK_P0_Q0
mov
q
[
r4
+
2
*
r1
],
m1
mov
q
[
r0
],
m2
mov
a
[
r4
+
2
*
r1
],
m1
mov
a
[
r0
],
m2
%if %3 == 16
mov
esp
,
r2
...
...
common/x86/quant-a.asm
View file @
29899d84
...
...
@@ -75,7 +75,7 @@ SECTION .text
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
mov
q
m0
,
%
1
; load dct coeffs
mov
a
m0
,
%
1
; load dct coeffs
pxor
m1
,
m1
pcmpgtw
m1
,
m0
; sign(coeff)
pxor
m0
,
m1
...
...
@@ -84,16 +84,16 @@ SECTION .text
pmulhuw
m0
,
%
2
; divide
pxor
m0
,
m1
; restore sign
psubw
m0
,
m1
mov
q
%
1
,
m0
; store
mov
a
%
1
,
m0
; store
%endmacro
%macro QUANT_SSSE3 3
mov
q
m1
,
%
1
; load dct coeffs
mov
a
m1
,
%
1
; load dct coeffs
pabsw
m0
,
m1
paddusw
m0
,
%
3
; round
pmulhuw
m0
,
%
2
; divide
psignw
m0
,
m1
; restore sign
mov
q
%
1
,
m0
; store
mov
a
%
1
,
m0
; store
%endmacro
INIT_MMX
...
...
@@ -162,11 +162,11 @@ QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
;;; %2,%3 dequant_mf[i_mf][y][x]
;;; m5 i_qbits
mov
q
m0
,
%
2
mov
a
m0
,
%
2
packssdw
m0
,
%
3
pmullw
m0
,
%
1
psllw
m0
,
m5
mov
q
%
1
,
m0
mov
a
%
1
,
m0
%endmacro
%macro DEQUANT32_R 3
...
...
@@ -176,8 +176,8 @@ QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
;;; m6 f
;;; m7 0
mov
q
m0
,
%
1
mov
q
m1
,
m0
mov
a
m0
,
%
1
mov
a
m1
,
m0
punpcklwd
m0
,
m7
punpckhwd
m1
,
m7
pmaddwd
m0
,
%
2
...
...
@@ -187,7 +187,7 @@ QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
psrad
m0
,
m5
psrad
m1
,
m5
packssdw
m0
,
m1
mov
q
%
1
,
m0
mov
a
%
1
,
m0
%endmacro
%macro DEQUANT_LOOP 3
...
...
@@ -207,17 +207,17 @@ QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
%endmacro
%macro DEQUANT16_FLAT 2-8
mov
q
m0
,
%
1
mov
a
m0
,
%
1
%assign i %0-2
%rep %0-1
%if i
mov
q
m
%+
i
,
[
r0
+%
2
]
mov
a
m
%+
i
,
[
r0
+%
2
]
pmullw
m
%+
i
,
m0
%else
pmullw
m0
,
[
r0
+%
2
]
%endif
psllw
m
%+
i
,
m7
mov
q
[
r0
+%
2
],
m
%+
i
mov
a
[
r0
+%
2
],
m
%+
i
%assign i i-1
%rotate 1
%endrep
...
...
@@ -268,7 +268,7 @@ cglobal x264_dequant_%2x%2_%1, 0,3
neg
t0d
movd
m5
,
t0d
picgetgot
t0d
mov
q
m6
,
[
pd_1
GLOBAL
]
mov
a
m6
,
[
pd_1
GLOBAL
]
pxor
m7
,
m7
pslld
m6
,
m5
psrld
m6
,
1
...
...
common/x86/x86inc.asm
View file @
29899d84
...
...
@@ -331,7 +331,9 @@ SECTION ".note.GNU-stack" noalloc noexec nowrite progbits
%assign FDEC_STRIDE 32
%macro INIT_MMX 0
%undef movq
%define mova movq
%define movu movq
%define movh movd
%define m0 mm0
%define m1 mm1
%define m2 mm2
...
...
@@ -345,7 +347,9 @@ SECTION ".note.GNU-stack" noalloc noexec nowrite progbits
%endmacro
%macro INIT_XMM 0
%define movq movdqa
%define mova movdqa
%define movu movdqu
%define movh movq
%define m0 xmm0
%define m1 xmm1
%define m2 xmm2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment