Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
VideoLAN
x264
Commits
223eedb0
Commit
223eedb0
authored
Jul 02, 2008
by
Loren Merritt
Browse files
cosmetics in permutation macros
SWAP can now take mmregs directly, rather than just their numbers
parent
5b92682d
Changes
2
Hide whitespace changes
Inline
Side-by-side
common/x86/quant-a.asm
View file @
223eedb0
...
...
@@ -70,57 +70,49 @@ SECTION .text
%endif
%endmacro
%macro QUANT_MMX 3
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
%macro PABSW_MMX 2
pxor
%
1
,
%
1
pcmpgtw
%
1
,
%
2
pxor
%
2
,
%
1
psubw
%
2
,
%
1
SWAP
%
1
,
%
2
%endmacro
mova
m0
,
%
1
; load dct coeffs
pxor
m1
,
m1
pcmpgtw
m1
,
m0
; sign(coeff)
pxor
m0
,
m1
psubw
m0
,
m1
; abs(coeff)
paddusw
m0
,
%
3
; round
pmulhuw
m0
,
%
2
; divide
pxor
m0
,
m1
; restore sign
psubw
m0
,
m1
mova
%
1
,
m0
; store
%macro PSIGNW_MMX 2
pxor
%
1
,
%
2
psubw
%
1
,
%
2
%endmacro
%macro QUANT_SSSE3 3
%macro PABSW_SSSE3 2
pabsw
%
1
,
%
2
%endmacro
%macro PSIGNW_SSSE3 2
psignw
%
1
,
%
2
%endmacro
%macro QUANT_ONE 3
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
mova
m1
,
%
1
; load dct coeffs
pabsw
m0
,
m1
PABSW
m0
,
m1
paddusw
m0
,
%
3
; round
pmulhuw
m0
,
%
2
; divide
psignw
m0
,
m1
; restore sign
PSIGNW
m0
,
m1
; restore sign
mova
%
1
,
m0
; store
%endmacro
INIT_MMX
;-----------------------------------------------------------------------------
; void x264_quant_2x2_dc_mmxext( int16_t dct[4], int mf, int bias )
;-----------------------------------------------------------------------------
cglobal
x264_quant_2x2_dc_mmxext
,
1
,
1
QUANT_DC_START
QUANT_MMX
[
r0
],
mm6
,
mm7
RET
cglobal
x264_quant_2x2_dc_ssse3
,
1
,
1
QUANT_DC_START
QUANT_SSSE3
[
r0
],
mm6
,
mm7
RET
;-----------------------------------------------------------------------------
; void x264_quant_4x4_dc_mmxext( int16_t dct[16], int mf, int bias )
;-----------------------------------------------------------------------------
%macro QUANT_DC
4
%macro QUANT_DC
2
cglobal
%
1
,
1
,
1
QUANT_DC_START
%assign x 0
%rep %
3
%2
[r0+x], m6, m7
%assign x x+
%4
%rep %
2
QUANT_ONE
[
r0
+
x
],
m6
,
m7
%assign x x+
regsize
%endrep
RET
%endmacro
...
...
@@ -128,31 +120,39 @@ cglobal %1, 1,1
;-----------------------------------------------------------------------------
; void x264_quant_4x4_mmx( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
;-----------------------------------------------------------------------------
%macro QUANT_AC
4
%macro QUANT_AC
2
cglobal
%
1
,
3
,
3
%assign x 0
%rep %
3
%2
[r0+x], [r1+x], [r2+x]
%assign x x+
%4
%rep %
2
QUANT_ONE
[
r0
+
x
],
[
r1
+
x
],
[
r2
+
x
]
%assign x x+
regsize
%endrep
RET
%endmacro
INIT_MMX
%define PABSW PABSW_MMX
%define PSIGNW PSIGNW_MMX
QUANT_DC
x264_quant_2x2_dc_mmxext
,
1
%ifndef ARCH_X86_64
; not needed because sse2 is faster
QUANT_DC
x264_quant_4x4_dc_mmxext
,
QUANT_MMX
,
4
,
8
QUANT_AC
x264_quant_4x4_mmx
,
QUANT_MMX
,
4
,
8
QUANT_AC
x264_quant_8x8_mmx
,
QUANT_MMX
,
16
,
8
QUANT_DC
x264_quant_4x4_dc_mmxext
,
4
QUANT_AC
x264_quant_4x4_mmx
,
4
QUANT_AC
x264_quant_8x8_mmx
,
16
%endif
INIT_XMM
QUANT_DC
x264_quant_4x4_dc_sse2
,
2
QUANT_AC
x264_quant_4x4_sse2
,
2
QUANT_AC
x264_quant_8x8_sse2
,
8
QUANT_DC
x264_quant_4x4_dc_sse2
,
QUANT_MMX
,
2
,
16
QUANT_AC
x264_quant_4x4_sse2
,
QUANT_MMX
,
2
,
16
QUANT_AC
x264_quant_8x8_sse2
,
QUANT_MMX
,
8
,
16
%define PABSW PABSW_SSSE3
%define PSIGNW PSIGNW_SSSE3
QUANT_DC
x264_quant_4x4_dc_ssse3
,
2
QUANT_AC
x264_quant_4x4_ssse3
,
2
QUANT_AC
x264_quant_8x8_ssse3
,
8
QUANT_DC
x264_quant_4x4_dc_ssse3
,
QUANT_SSSE3
,
2
,
16
QUANT_AC
x264_quant_4x4_ssse3
,
QUANT_SSSE3
,
2
,
16
QUANT_AC
x264_quant_8x8_ssse3
,
QUANT_SSSE3
,
8
,
16
INIT_MMX
QUANT_DC
x264_quant_2x2_dc_ssse3
,
1
...
...
common/x86/x86inc.asm
View file @
223eedb0
...
...
@@ -294,56 +294,56 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
; merge mmx and sse*
%macro CAT_DEFINE 3
%define %1%2 %3
%endmacro
%macro CAT_XDEFINE 3
%xdefine %1%2 %3
%endmacro
%macro CAT_UNDEF 2
%undef %1%2
%endmacro
%macro INIT_MMX 0
%define RESET_MM_PERMUTATION INIT_MMX
%define regsize 8
%define num_mmregs 8
%define mova movq
%define movu movq
%define movh movd
%define movnt movntq
%define m0 mm0
%define m1 mm1
%define m2 mm2
%define m3 mm3
%define m4 mm4
%define m5 mm5
%define m6 mm6
%define m7 mm7
%undef m8
%undef m9
%undef m10
%undef m11
%undef m12
%undef m13
%undef m14
%undef m15
%assign %%i 0
%rep 8
CAT_DEFINE
m
,
%%
i
,
mm
%+
%%
i
CAT_DEFINE
nmm
,
%%
i
,
%%
i
%assign %%i %%i+1
%endrep
%rep 8
CAT_UNDEF
m
,
%%
i
CAT_UNDEF
nmm
,
%%
i
%assign %%i %%i+1
%endrep
%endmacro
%macro INIT_XMM 0
%define RESET_MM_PERMUTATION INIT_XMM
%define regsize 16
%define num_mmregs 8
%ifdef ARCH_X86_64
%define num_mmregs 16
%endif
%define mova movdqa
%define movu movdqu
%define movh movq
%define movnt movntdq
%define m0 xmm0
%define m1 xmm1
%define m2 xmm2
%define m3 xmm3
%define m4 xmm4
%define m5 xmm5
%define m6 xmm6
%define m7 xmm7
%ifdef ARCH_X86_64
%define m8 xmm8
%define m9 xmm9
%define m10 xmm10
%define m11 xmm11
%define m12 xmm12
%define m13 xmm13
%define m14 xmm14
%define m15 xmm15
%endif
%assign %%i 0
%rep num_mmregs
CAT_DEFINE
m
,
%%
i
,
xmm
%+
%%
i
CAT_DEFINE
nxmm
,
%%
i
,
%%
i
%assign %%i %%i+1
%endrep
%endmacro
INIT_MMX
...
...
@@ -365,65 +365,57 @@ INIT_MMX
%macro PERMUTE 2-*
; takes a list of pairs to swap
%rep %0/2
%xdefine tmp%2 m%2
%xdefine ntmp%2 nm%2
%rotate 2
%endrep
%rep %0/2
%xdefine m%1 tmp%2
%xdefine nm%1 ntmp%2
%undef tmp%2
%undef ntmp%2
%rotate 2
%endrep
%endmacro
%macro SWAP 2-*
; swaps a single chain (sometimes more concise than pairs)
%rep %0-1
%ifdef m%1
%xdefine tmp m%1
%xdefine m%1 m%2
%xdefine m%2 tmp
CAT_XDEFINE
n
,
m
%
1
,
%
1
CAT_XDEFINE
n
,
m
%
2
,
%
2
%else
; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
; Be careful using the mode in nested macros though, as in some cases there may be
; other copies of m# that have already been dereferenced and don't get updated correctly.
%xdefine %%n1 n %+ %1
%xdefine %%n2 n %+ %2
%xdefine tmp m %+ %%n1
CAT_XDEFINE
m
,
%%
n1
,
m
%+
%%
n2
CAT_XDEFINE
m
,
%%
n2
,
tmp
CAT_XDEFINE
n
,
m
%+
%%
n1
,
%%
n1
CAT_XDEFINE
n
,
m
%+
%%
n2
,
%%
n2
%endif
%undef tmp
%rotate 1
%endrep
%endmacro
%macro SAVE_MM_PERMUTATION 1
%xdefine %1_m0 m0
%xdefine %1_m1 m1
%xdefine %1_m2 m2
%xdefine %1_m3 m3
%xdefine %1_m4 m4
%xdefine %1_m5 m5
%xdefine %1_m6 m6
%xdefine %1_m7 m7
%ifdef ARCH_X86_64
%xdefine %1_m8 m8
%xdefine %1_m9 m9
%xdefine %1_m10 m10
%xdefine %1_m11 m11
%xdefine %1_m12 m12
%xdefine %1_m13 m13
%xdefine %1_m14 m14
%xdefine %1_m15 m15
%endif
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE
%
1
_m
,
%%
i
,
m
%+
%%
i
%assign %%i %%i+1
%endrep
%endmacro
%macro LOAD_MM_PERMUTATION 1
%xdefine m0 %1_m0
%xdefine m1 %1_m1
%xdefine m2 %1_m2
%xdefine m3 %1_m3
%xdefine m4 %1_m4
%xdefine m5 %1_m5
%xdefine m6 %1_m6
%xdefine m7 %1_m7
%ifdef ARCH_X86_64
%xdefine m8 %1_m8
%xdefine m9 %1_m9
%xdefine m10 %1_m10
%xdefine m11 %1_m11
%xdefine m12 %1_m12
%xdefine m13 %1_m13
%xdefine m14 %1_m14
%xdefine m15 %1_m15
%endif
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE
m
,
%%
i
,
%
1
_m
%+
%%
i
%assign %%i %%i+1
%endrep
%endmacro
%macro call 1
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment