Commit 223eedb0 authored by Loren Merritt's avatar Loren Merritt
Browse files

cosmetics in permutation macros

SWAP can now take mmregs directly, rather than just their numbers
parent 5b92682d
......@@ -70,57 +70,49 @@ SECTION .text
%endif
%endmacro
%macro QUANT_MMX 3
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
%macro PABSW_MMX 2
pxor %1, %1
pcmpgtw %1, %2
pxor %2, %1
psubw %2, %1
SWAP %1, %2
%endmacro
mova m0, %1 ; load dct coeffs
pxor m1, m1
pcmpgtw m1, m0 ; sign(coeff)
pxor m0, m1
psubw m0, m1 ; abs(coeff)
paddusw m0, %3 ; round
pmulhuw m0, %2 ; divide
pxor m0, m1 ; restore sign
psubw m0, m1
mova %1, m0 ; store
%macro PSIGNW_MMX 2
pxor %1, %2
psubw %1, %2
%endmacro
%macro QUANT_SSSE3 3
%macro PABSW_SSSE3 2
pabsw %1, %2
%endmacro
%macro PSIGNW_SSSE3 2
psignw %1, %2
%endmacro
%macro QUANT_ONE 3
;;; %1 (m64) dct[y][x]
;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
mova m1, %1 ; load dct coeffs
pabsw m0, m1
PABSW m0, m1
paddusw m0, %3 ; round
pmulhuw m0, %2 ; divide
psignw m0, m1 ; restore sign
PSIGNW m0, m1 ; restore sign
mova %1, m0 ; store
%endmacro
INIT_MMX
;-----------------------------------------------------------------------------
; void x264_quant_2x2_dc_mmxext( int16_t dct[4], int mf, int bias )
;-----------------------------------------------------------------------------
cglobal x264_quant_2x2_dc_mmxext, 1,1
QUANT_DC_START
QUANT_MMX [r0], mm6, mm7
RET
cglobal x264_quant_2x2_dc_ssse3, 1,1
QUANT_DC_START
QUANT_SSSE3 [r0], mm6, mm7
RET
;-----------------------------------------------------------------------------
; void x264_quant_4x4_dc_mmxext( int16_t dct[16], int mf, int bias )
;-----------------------------------------------------------------------------
%macro QUANT_DC 4
%macro QUANT_DC 2
cglobal %1, 1,1
QUANT_DC_START
%assign x 0
%rep %3
%2 [r0+x], m6, m7
%assign x x+%4
%rep %2
QUANT_ONE [r0+x], m6, m7
%assign x x+regsize
%endrep
RET
%endmacro
......@@ -128,31 +120,39 @@ cglobal %1, 1,1
;-----------------------------------------------------------------------------
; void x264_quant_4x4_mmx( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
;-----------------------------------------------------------------------------
%macro QUANT_AC 4
%macro QUANT_AC 2
cglobal %1, 3,3
%assign x 0
%rep %3
%2 [r0+x], [r1+x], [r2+x]
%assign x x+%4
%rep %2
QUANT_ONE [r0+x], [r1+x], [r2+x]
%assign x x+regsize
%endrep
RET
%endmacro
INIT_MMX
%define PABSW PABSW_MMX
%define PSIGNW PSIGNW_MMX
QUANT_DC x264_quant_2x2_dc_mmxext, 1
%ifndef ARCH_X86_64 ; not needed because sse2 is faster
QUANT_DC x264_quant_4x4_dc_mmxext, QUANT_MMX, 4, 8
QUANT_AC x264_quant_4x4_mmx, QUANT_MMX, 4, 8
QUANT_AC x264_quant_8x8_mmx, QUANT_MMX, 16, 8
QUANT_DC x264_quant_4x4_dc_mmxext, 4
QUANT_AC x264_quant_4x4_mmx, 4
QUANT_AC x264_quant_8x8_mmx, 16
%endif
INIT_XMM
QUANT_DC x264_quant_4x4_dc_sse2, 2
QUANT_AC x264_quant_4x4_sse2, 2
QUANT_AC x264_quant_8x8_sse2, 8
QUANT_DC x264_quant_4x4_dc_sse2, QUANT_MMX, 2, 16
QUANT_AC x264_quant_4x4_sse2, QUANT_MMX, 2, 16
QUANT_AC x264_quant_8x8_sse2, QUANT_MMX, 8, 16
%define PABSW PABSW_SSSE3
%define PSIGNW PSIGNW_SSSE3
QUANT_DC x264_quant_4x4_dc_ssse3, 2
QUANT_AC x264_quant_4x4_ssse3, 2
QUANT_AC x264_quant_8x8_ssse3, 8
QUANT_DC x264_quant_4x4_dc_ssse3, QUANT_SSSE3, 2, 16
QUANT_AC x264_quant_4x4_ssse3, QUANT_SSSE3, 2, 16
QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
INIT_MMX
QUANT_DC x264_quant_2x2_dc_ssse3, 1
......
......@@ -294,56 +294,56 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
; merge mmx and sse*
%macro CAT_DEFINE 3
%define %1%2 %3
%endmacro
%macro CAT_XDEFINE 3
%xdefine %1%2 %3
%endmacro
%macro CAT_UNDEF 2
%undef %1%2
%endmacro
%macro INIT_MMX 0
%define RESET_MM_PERMUTATION INIT_MMX
%define regsize 8
%define num_mmregs 8
%define mova movq
%define movu movq
%define movh movd
%define movnt movntq
%define m0 mm0
%define m1 mm1
%define m2 mm2
%define m3 mm3
%define m4 mm4
%define m5 mm5
%define m6 mm6
%define m7 mm7
%undef m8
%undef m9
%undef m10
%undef m11
%undef m12
%undef m13
%undef m14
%undef m15
%assign %%i 0
%rep 8
CAT_DEFINE m, %%i, mm %+ %%i
CAT_DEFINE nmm, %%i, %%i
%assign %%i %%i+1
%endrep
%rep 8
CAT_UNDEF m, %%i
CAT_UNDEF nmm, %%i
%assign %%i %%i+1
%endrep
%endmacro
%macro INIT_XMM 0
%define RESET_MM_PERMUTATION INIT_XMM
%define regsize 16
%define num_mmregs 8
%ifdef ARCH_X86_64
%define num_mmregs 16
%endif
%define mova movdqa
%define movu movdqu
%define movh movq
%define movnt movntdq
%define m0 xmm0
%define m1 xmm1
%define m2 xmm2
%define m3 xmm3
%define m4 xmm4
%define m5 xmm5
%define m6 xmm6
%define m7 xmm7
%ifdef ARCH_X86_64
%define m8 xmm8
%define m9 xmm9
%define m10 xmm10
%define m11 xmm11
%define m12 xmm12
%define m13 xmm13
%define m14 xmm14
%define m15 xmm15
%endif
%assign %%i 0
%rep num_mmregs
CAT_DEFINE m, %%i, xmm %+ %%i
CAT_DEFINE nxmm, %%i, %%i
%assign %%i %%i+1
%endrep
%endmacro
INIT_MMX
......@@ -365,65 +365,57 @@ INIT_MMX
%macro PERMUTE 2-* ; takes a list of pairs to swap
%rep %0/2
%xdefine tmp%2 m%2
%xdefine ntmp%2 nm%2
%rotate 2
%endrep
%rep %0/2
%xdefine m%1 tmp%2
%xdefine nm%1 ntmp%2
%undef tmp%2
%undef ntmp%2
%rotate 2
%endrep
%endmacro
%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
%rep %0-1
%ifdef m%1
%xdefine tmp m%1
%xdefine m%1 m%2
%xdefine m%2 tmp
CAT_XDEFINE n, m%1, %1
CAT_XDEFINE n, m%2, %2
%else
; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
; Be careful using the mode in nested macros though, as in some cases there may be
; other copies of m# that have already been dereferenced and don't get updated correctly.
%xdefine %%n1 n %+ %1
%xdefine %%n2 n %+ %2
%xdefine tmp m %+ %%n1
CAT_XDEFINE m, %%n1, m %+ %%n2
CAT_XDEFINE m, %%n2, tmp
CAT_XDEFINE n, m %+ %%n1, %%n1
CAT_XDEFINE n, m %+ %%n2, %%n2
%endif
%undef tmp
%rotate 1
%endrep
%endmacro
%macro SAVE_MM_PERMUTATION 1
%xdefine %1_m0 m0
%xdefine %1_m1 m1
%xdefine %1_m2 m2
%xdefine %1_m3 m3
%xdefine %1_m4 m4
%xdefine %1_m5 m5
%xdefine %1_m6 m6
%xdefine %1_m7 m7
%ifdef ARCH_X86_64
%xdefine %1_m8 m8
%xdefine %1_m9 m9
%xdefine %1_m10 m10
%xdefine %1_m11 m11
%xdefine %1_m12 m12
%xdefine %1_m13 m13
%xdefine %1_m14 m14
%xdefine %1_m15 m15
%endif
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE %1_m, %%i, m %+ %%i
%assign %%i %%i+1
%endrep
%endmacro
%macro LOAD_MM_PERMUTATION 1
%xdefine m0 %1_m0
%xdefine m1 %1_m1
%xdefine m2 %1_m2
%xdefine m3 %1_m3
%xdefine m4 %1_m4
%xdefine m5 %1_m5
%xdefine m6 %1_m6
%xdefine m7 %1_m7
%ifdef ARCH_X86_64
%xdefine m8 %1_m8
%xdefine m9 %1_m9
%xdefine m10 %1_m10
%xdefine m11 %1_m11
%xdefine m12 %1_m12
%xdefine m13 %1_m13
%xdefine m14 %1_m14
%xdefine m15 %1_m15
%endif
%assign %%i 0
%rep num_mmregs
CAT_XDEFINE m, %%i, %1_m %+ %%i
%assign %%i %%i+1
%endrep
%endmacro
%macro call 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment