Commit acabceb6 authored by Ronald S. Bultje's avatar Ronald S. Bultje Committed by Fiona Glaser

Change %ifdef directives to %if directives in *.asm files

This allows combining multiple conditionals in a single statement.
parent 82d8cdde
......@@ -87,12 +87,13 @@ X86SRC = $(X86SRC0:%=common/x86/%)
ifeq ($(ARCH),X86)
ARCH_X86 = yes
ASMSRC = $(X86SRC) common/x86/pixel-32.asm
ASFLAGS += -DARCH_X86_64=0
endif
ifeq ($(ARCH),X86_64)
ARCH_X86 = yes
ASMSRC = $(X86SRC:-32.asm=-64.asm) common/x86/trellis-64.asm
ASFLAGS += -DARCH_X86_64
ASFLAGS += -DARCH_X86_64=1
endif
ifdef ARCH_X86
......
......@@ -34,10 +34,10 @@ cextern cabac_transition
cextern cabac_renorm_shift
; t3 must be ecx, since it's used for shift.
%ifdef WIN64
%if WIN64
DECLARE_REG_TMP 3,1,2,0,6,5,4,2
%define pointer resq
%elifdef ARCH_X86_64
%elif ARCH_X86_64
DECLARE_REG_TMP 0,1,2,3,4,5,6,6
%define pointer resq
%else
......@@ -81,7 +81,7 @@ cglobal cabac_encode_decision_asm, 0,7
and t4d, t6d
shr t5d, 6
movifnidn t2d, r2m
%ifdef WIN64
%if WIN64
PUSH r7
%endif
LOAD_GLOBAL t5d, cabac_range_lps-4, t5, t4*2
......@@ -98,7 +98,7 @@ cglobal cabac_encode_decision_asm, 0,7
mov t4d, t3d
shr t3d, 3
LOAD_GLOBAL t3d, cabac_renorm_shift, 0, t3
%ifdef WIN64
%if WIN64
POP r7
%endif
shl t4d, t3b
......@@ -119,7 +119,7 @@ cglobal cabac_encode_bypass_asm, 0,3
lea t7d, [t7*2+t3]
mov t3d, [t0+cb.queue]
inc t3d
%ifdef UNIX64 ; .putbyte compiles to nothing but a jmp
%if UNIX64 ; .putbyte compiles to nothing but a jmp
jge cabac_putbyte
%else
jge .putbyte
......@@ -153,7 +153,7 @@ cglobal cabac_encode_terminal_asm, 0,3
cabac_putbyte:
; alive: t0=cb t3=queue t6=low
%ifdef WIN64
%if WIN64
DECLARE_REG_TMP 3,6,1,0,2,5,4
%endif
mov t1d, -1
......
......@@ -65,7 +65,7 @@ cglobal cpu_xgetbv, 3,7
mov [rsi], edx
RET
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
......
......@@ -157,7 +157,7 @@ cextern hsub_mul
SWAP %4, %9, %8
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro SUB8x8_DCT8 0
cglobal sub8x8_dct8, 3,3,8
......
......@@ -137,11 +137,11 @@ cextern hsub_mul
SWAP %4, %9, %8
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro SUB8x8_DCT8 0
cglobal sub8x8_dct8, 3,3,14
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......@@ -194,7 +194,7 @@ SUB8x8_DCT8
%macro ADD8x8_IDCT8 0
cglobal add8x8_idct8, 2,2,16
add r1, 128
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......@@ -260,7 +260,7 @@ cglobal sub8x8_dct, 3,3,10
%if cpuflag(ssse3)
mova m7, [hsub_mul]
%endif
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......@@ -287,7 +287,7 @@ cglobal sub8x8_dct8, 3,3,11
%if cpuflag(ssse3)
mova m7, [hsub_mul]
%endif
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......@@ -330,7 +330,7 @@ DCT_SUB8
cglobal add8x8_idct8, 2,2,11
add r0, 4*FDEC_STRIDE
pxor m7, m7
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......@@ -369,7 +369,7 @@ ADD8x8_IDCT8
cglobal add8x8_idct, 2,2,11
add r0, 4*FDEC_STRIDE
pxor m7, m7
%ifdef WIN64
%if WIN64
call .skip_prologue
RET
%endif
......
......@@ -82,7 +82,7 @@ cextern pw_pmpmpmpm
SWAP %1, %3
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void dct4x4dc( dctcoef d[4][4] )
;-----------------------------------------------------------------------------
......@@ -134,7 +134,7 @@ cglobal dct4x4dc, 1,1
RET
%endif ; HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void idct4x4dc( int32_t d[4][4] )
;-----------------------------------------------------------------------------
......@@ -179,7 +179,7 @@ cglobal idct4x4dc, 1,1
RET
%endif ; HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void sub4x4_dct( dctcoef dct[4][4], pixel *pix1, pixel *pix2 )
;-----------------------------------------------------------------------------
......@@ -236,7 +236,7 @@ INIT_MMX ssse3
SUB_DCT4
%endif ; HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void add4x4_idct( pixel *p_dst, dctcoef dct[4][4] )
;-----------------------------------------------------------------------------
......@@ -357,7 +357,7 @@ INIT_MMX
;-----------------------------------------------------------------------------
%macro SUB_NxN_DCT 7
cglobal %1, 3,3,%7
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
%if mmsize == 8
pxor m7, m7
%else
......@@ -378,7 +378,7 @@ cglobal %1, 3,3,%7
add r0, %3
add r1, %4-%5-%6*FENC_STRIDE
add r2, %4-%5-%6*FDEC_STRIDE
%ifdef WIN64
%if WIN64
call %2.skip_prologue
RET
%else
......@@ -390,7 +390,7 @@ cglobal %1, 3,3,%7
; void add8x8_idct( uint8_t *pix, int16_t dct[4][4][4] )
;-----------------------------------------------------------------------------
%macro ADD_NxN_IDCT 6-7
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
cglobal %1, 2,2,%7
%if %3==256
add r1, 128
......@@ -412,7 +412,7 @@ cglobal %1, 2,2,11
call %2.skip_prologue
add r0, %4-%5-%6*FDEC_STRIDE
add r1, %3
%ifdef WIN64
%if WIN64
call %2.skip_prologue
RET
%else
......@@ -420,7 +420,7 @@ cglobal %1, 2,2,11
%endif
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_MMX
SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx, 64, 8, 0, 0, 0
SUB_NxN_DCT sub16x16_dct_mmx, sub8x8_dct_mmx, 64, 16, 8, 8, 0
......@@ -440,7 +440,7 @@ SUB_NxN_DCT sub16x16_dct8_sse2, sub8x8_dct8_sse2, 256, 16, 0, 0, 14
SUB_NxN_DCT sub16x16_dct8_sse4, sub8x8_dct8_sse4, 256, 16, 0, 0, 14
SUB_NxN_DCT sub16x16_dct8_avx, sub8x8_dct8_avx, 256, 16, 0, 0, 14
%else ; !HIGH_BIT_DEPTH
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX
SUB_NxN_DCT sub8x8_dct_mmx, sub4x4_dct_mmx, 32, 4, 0, 0, 0
ADD_NxN_IDCT add8x8_idct_mmx, add4x4_idct_mmx, 32, 4, 0, 0
......@@ -481,7 +481,7 @@ SUB_NxN_DCT sub16x16_dct8_ssse3, sub8x8_dct8_ssse3, 128, 8, 0, 0, 11
SUB_NxN_DCT sub16x16_dct8_avx, sub8x8_dct8_avx, 128, 8, 0, 0, 11
%endif ; HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void add8x8_idct_dc( pixel *p_dst, dctcoef *dct2x2 )
;-----------------------------------------------------------------------------
......@@ -669,7 +669,7 @@ INIT_XMM
cglobal add16x16_idct_dc_sse2, 2,2,8
call .loop
add r0, FDEC_STRIDE*4
%ifdef WIN64
%if WIN64
call .loop
RET
%endif
......@@ -701,7 +701,7 @@ cglobal add16x16_idct_dc_sse2, 2,2,8
cglobal add16x16_idct_dc, 2,2,8
call .loop
add r0, FDEC_STRIDE*4
%ifdef WIN64
%if WIN64
call .loop
RET
%endif
......@@ -769,7 +769,7 @@ ADD16x16
psubw m0, m1 ; d02-d13 s02-s13 d02+d13 s02+s13
%endmacro
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
INIT_MMX
cglobal sub8x8_dct_dc_mmx2, 3,3
DCTDC_2ROW_MMX m0, m4, 0, 0
......@@ -874,7 +874,7 @@ SUB8x16_DCT_DC
paddw %1, m0
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro SUB8x8_DCT_DC_10 0
cglobal sub8x8_dct_dc, 3,3,3
DCTDC_4ROW_SSE2 m1, 0
......@@ -1042,7 +1042,7 @@ cglobal zigzag_scan_8x8_frame, 2,2,8
RET
%endmacro
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
INIT_XMM sse2
SCAN_8x8
INIT_XMM ssse3
......@@ -1137,7 +1137,7 @@ cglobal zigzag_scan_8x8_frame, 2,2,8
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse2
SCAN_8x8_FRAME 4 , dq, qdq, dq, d
INIT_XMM avx
......@@ -1178,7 +1178,7 @@ cglobal zigzag_scan_4x4_frame, 2,2,8*(mmsize)/16
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse2
SCAN_4x4 4 , dq, qdq, dq
INIT_XMM avx
......@@ -1221,7 +1221,7 @@ cglobal zigzag_scan_4x4_frame, 2,2
RET
%endif ; !HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void zigzag_scan_4x4_field( int32_t level[16], int32_t dct[4][4] )
;-----------------------------------------------------------------------------
......@@ -1348,7 +1348,7 @@ cglobal zigzag_scan_8x8_field, 2,3,8
mova [r0+60*SIZEOF_DCTCOEF], m7
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse4
SCAN_8x8 d, dq, qdq, dq, 4
INIT_XMM avx
......@@ -1417,7 +1417,7 @@ cglobal zigzag_sub_4x4%1_%2, 3,3,8
RET
%endmacro
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
INIT_XMM ssse3
ZIGZAG_SUB_4x4 , frame
ZIGZAG_SUB_4x4 ac, frame
......@@ -1459,7 +1459,7 @@ cglobal zigzag_interleave_8x8_cavlc, 3,3,8
packsswb m5, m6
packsswb m5, m5
pxor m0, m0
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
packsswb m5, m5
%endif
pcmpeqb m5, m0
......@@ -1471,7 +1471,7 @@ cglobal zigzag_interleave_8x8_cavlc, 3,3,8
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse2
ZIGZAG_8x8_CAVLC D
INIT_XMM avx
......@@ -1511,7 +1511,7 @@ ZIGZAG_8x8_CAVLC W
%endif
%endmacro
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
%macro ZIGZAG_8x8_CAVLC 0
cglobal zigzag_interleave_8x8_cavlc, 3,3,8
INTERLEAVE_XMM 0
......
......@@ -43,7 +43,7 @@ cextern pw_4
cextern pw_00ff
cextern pw_pixel_max
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
; out: %4 = |%1-%2|-%3
; clobbers: %5
%macro ABS_SUB 5
......@@ -306,7 +306,7 @@ cglobal deblock_h_luma, 5,6,8
RET
%endmacro
%ifdef ARCH_X86_64
%if ARCH_X86_64
; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
; m12=alpha, m13=beta
; out: m0=p1', m3=q1', m1=p0', m2=q0'
......@@ -437,7 +437,7 @@ DEBLOCK_LUMA_64
; %1=p0 %2=p1 %3=p2 %4=p3 %5=q0 %6=q1 %7=mask0
; %8=mask1p %9=2 %10=p0' %11=p1' %12=p2'
%macro LUMA_INTRA_P012 12 ; p0..p3 in memory
%ifdef ARCH_X86_64
%if ARCH_X86_64
paddw t0, %3, %2
mova t2, %4
paddw t2, %3
......@@ -504,7 +504,7 @@ DEBLOCK_LUMA_64
LOAD_AB t0, t1, r2d, r3d
mova %1, t0
LOAD_MASK m0, m1, m2, m3, %1, t1, t0, t2, t3
%ifdef ARCH_X86_64
%if ARCH_X86_64
mova %2, t0 ; mask0
psrlw t3, %1, 2
%else
......@@ -601,7 +601,7 @@ DEBLOCK_LUMA_64
%endif
%endmacro
%ifdef ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
......@@ -789,7 +789,7 @@ cglobal deblock_h_luma_intra, 4,7,8
RET
%endmacro
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX mmx2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
......@@ -802,7 +802,7 @@ DEBLOCK_LUMA_INTRA
%endif
%endif ; HIGH_BIT_DEPTH
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
; expands to [base],...,[base+7*stride]
%define PASS8ROWS(base, base3, stride, stride3) \
[base], [base+stride], [base+stride*2], [base3], \
......@@ -1010,7 +1010,7 @@ DEBLOCK_LUMA_INTRA
; out: %4 = |%1-%2|>%3
; clobbers: %5
%macro DIFF_GT2 5
%ifdef ARCH_X86_64
%if ARCH_X86_64
psubusb %5, %2, %1
psubusb %4, %1, %2
%else
......@@ -1088,7 +1088,7 @@ DEBLOCK_LUMA_INTRA
mova %4, %2
%endmacro
%ifdef ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
......@@ -1143,7 +1143,7 @@ cglobal deblock_h_luma, 5,9
lea r8, [r7*3]
lea r6, [r0-4]
lea r5, [r0-4+r8]
%ifdef WIN64
%if WIN64
sub rsp, 0x98
%define pix_tmp rsp+0x30
%else
......@@ -1162,7 +1162,7 @@ cglobal deblock_h_luma, 5,9
; don't backup r6, r5, r7, r8 because deblock_v_luma_sse2 doesn't use them
lea r0, [pix_tmp+0x30]
mov r1d, 0x10
%ifdef WIN64
%if WIN64
mov [rsp+0x20], r4
%endif
call deblock_v_luma
......@@ -1186,7 +1186,7 @@ cglobal deblock_h_luma, 5,9
movq m3, [pix_tmp+0x40]
TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8)
%ifdef WIN64
%if WIN64
add rsp, 0x98
%else
add rsp, 0x68
......@@ -1324,7 +1324,7 @@ DEBLOCK_LUMA v, 16
%macro LUMA_INTRA_P012 4 ; p0..p3 in memory
%ifdef ARCH_X86_64
%if ARCH_X86_64
pavgb t0, p2, p1
pavgb t1, p0, q0
%else
......@@ -1335,7 +1335,7 @@ DEBLOCK_LUMA v, 16
%endif
pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
mova t5, t1
%ifdef ARCH_X86_64
%if ARCH_X86_64
paddb t2, p2, p1
paddb t3, p0, q0
%else
......@@ -1353,7 +1353,7 @@ DEBLOCK_LUMA v, 16
pand t2, mpb_1
psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
%ifdef ARCH_X86_64
%if ARCH_X86_64
pavgb t1, p2, q1
psubb t2, p2, q1
%else
......@@ -1428,7 +1428,7 @@ DEBLOCK_LUMA v, 16
%define t1 m5
%define t2 m6
%define t3 m7
%ifdef ARCH_X86_64
%if ARCH_X86_64
%define p2 m8
%define q2 m9
%define t4 m10
......@@ -1455,7 +1455,7 @@ DEBLOCK_LUMA v, 16
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_%1_luma_intra, 4,6,16
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
sub esp, 0x60
%endif
lea r4, [r1*4]
......@@ -1470,7 +1470,7 @@ cglobal deblock_%1_luma_intra, 4,6,16
mova p0, [r4+r5]
mova q0, [r0]
mova q1, [r0+r1]
%ifdef ARCH_X86_64
%if ARCH_X86_64
pxor mpb_0, mpb_0
mova mpb_1, [pb_1]
LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
......@@ -1506,13 +1506,13 @@ cglobal deblock_%1_luma_intra, 4,6,16
LUMA_INTRA_SWAP_PQ
LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
.end:
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
add esp, 0x60
%endif
RET
INIT_MMX cpuname
%ifdef ARCH_X86_64
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
......@@ -1590,13 +1590,13 @@ INIT_XMM sse2
DEBLOCK_LUMA_INTRA v
INIT_XMM avx
DEBLOCK_LUMA_INTRA v
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX mmx2
DEBLOCK_LUMA_INTRA v8
%endif
%endif ; !HIGH_BIT_DEPTH
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
%macro CHROMA_DEBLOCK_P0_Q0_INTRA 7
......@@ -1870,7 +1870,7 @@ cglobal deblock_h_chroma_422, 5,7,8
REP_RET
%endmacro ; DEBLOCK_CHROMA
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX mmx2
DEBLOCK_CHROMA
%endif
......@@ -1880,7 +1880,7 @@ INIT_XMM avx
DEBLOCK_CHROMA
%endif ; HIGH_BIT_DEPTH
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
%macro CHROMA_V_START 0
dec r2d ; alpha-1
dec r3d ; beta-1
......@@ -1974,7 +1974,7 @@ INIT_XMM sse2
DEBLOCK_CHROMA
INIT_XMM avx
DEBLOCK_CHROMA
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX mmx2
DEBLOCK_CHROMA
%endif
......@@ -2002,14 +2002,14 @@ cglobal deblock_h_chroma_mbaff, 5,7,8
INIT_XMM sse2
DEBLOCK_H_CHROMA_420_MBAFF
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
INIT_MMX mmx2
DEBLOCK_H_CHROMA_420_MBAFF
%endif
%macro DEBLOCK_H_CHROMA_422 0
cglobal deblock_h_chroma_422, 5,8,8
%ifdef ARCH_X86_64
%if ARCH_X86_64
%define cntr r7
%else
%define cntr dword r0m
......@@ -2127,7 +2127,7 @@ DEBLOCK_CHROMA_INTRA_BODY
DEBLOCK_CHROMA_INTRA
INIT_MMX mmx2
DEBLOCK_CHROMA_INTRA_BODY
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
DEBLOCK_CHROMA_INTRA
%endif
......
......@@ -58,13 +58,13 @@ cextern pd_32
; implicit weighted biprediction
;=============================================================================
; assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64
%ifdef WIN64
%if WIN64
DECLARE_REG_TMP 0,1,2,3,4,5,4,5
%macro AVG_START 0-1 0
PROLOGUE 5,7,%1
movsxd r5, dword r5m
%endmacro
%elifdef UNIX64
%elif UNIX64
DECLARE_REG_TMP 0,1,2,3,4,5,7,8
%macro AVG_START 0-1 0
PROLOGUE 6,9,%1
......@@ -91,7 +91,7 @@ cextern pd_32
REP_RET
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro BIWEIGHT_MMX 2
movh m0, %1
......@@ -157,7 +157,7 @@ cextern pd_32
SPLATW m3, m3 ; weight_dst,src
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro BIWEIGHT_ROW 4
BIWEIGHT [%2], [%3]
%if %4==mmsize/4
......@@ -196,7 +196,7 @@ cextern pd_32
cglobal pixel_avg_weight_w%1
BIWEIGHT_START
AVG_START %2
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
mova m7, [pw_pixel_max]
%endif
.height_loop:
......@@ -204,7 +204,7 @@ cglobal pixel_avg_weight_w%1
BIWEIGHT [t2], [t4]
SWAP 0, 6
BIWEIGHT [t2+SIZEOF_PIXEL*t3], [t4+SIZEOF_PIXEL*t5]
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
packssdw m6, m0
CLIPW m6, m5, m7
%else ;!HIGH_BIT_DEPTH
......@@ -229,7 +229,7 @@ INIT_MMX mmx2
AVG_WEIGHT 4
AVG_WEIGHT 8
AVG_WEIGHT 16
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse2
AVG_WEIGHT 4, 8
AVG_WEIGHT 8, 8
......@@ -251,7 +251,7 @@ AVG_WEIGHT 16, 7
; P frame explicit weighted prediction
;=============================================================================
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%macro WEIGHT_START 1 ; (width)
mova m0, [r4+ 0] ; 1<<denom
mova m3, [r4+16]
......@@ -414,7 +414,7 @@ AVG_WEIGHT 16, 7
;void mc_weight_wX( pixel *dst, int i_dst_stride, pixel *src, int i_src_stride, weight_t *weight, int h )
;-----------------------------------------------------------------------------
%ifdef ARCH_X86_64
%if ARCH_X86_64
%define NUMREGS 6
%define LOAD_HEIGHT
%define HEIGHT_REG r5d
......@@ -427,7 +427,7 @@ AVG_WEIGHT 16, 7
%endif
%assign XMMREGS 7
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%assign NUMREGS NUMREGS+1
%assign XMMREGS 8
%endif
......@@ -456,7 +456,7 @@ INIT_XMM sse2
WEIGHTER 8
WEIGHTER 16
WEIGHTER 20
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
WEIGHTER 12
INIT_XMM avx
WEIGHTER 8
......@@ -481,7 +481,7 @@ WEIGHTER 20
%macro OFFSET_OP 7
mov%6 m0, [%1]
mov%6 m1, [%2]
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
p%5usw m0, m2
p%5usw m1, m2
%ifidn %5,add
......@@ -503,7 +503,7 @@ WEIGHTER 20
OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, u, a
%assign x (x+mmsize)
%else
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, h, h
%else
OFFSET_OP (%1+x), (%1+x+r3), (%2+x), (%2+x+r1), %4, d, d
......@@ -523,7 +523,7 @@ WEIGHTER 20
cglobal mc_offset%2_w%1, NUMREGS, NUMREGS
FIX_STRIDES r1, r3
mova m2, [r4]
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
%ifidn %2,add
mova m3, [pw_pixel_max]
%endif
......@@ -556,7 +556,7 @@ INIT_XMM avx
OFFSETPN 12
OFFSETPN 16
OFFSETPN 20
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_XMM sse2
OFFSETPN 8
INIT_XMM avx
......@@ -602,7 +602,7 @@ cglobal pixel_avg_w%1
%rep (%1*SIZEOF_PIXEL+mmsize-1)/mmsize
%2 m0, [t2+x]
%2 m1, [t2+x+SIZEOF_PIXEL*t3]
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
pavgw m0, [t4+x]
pavgw m1, [t4+x+SIZEOF_PIXEL*t5]
%else ;!HIGH_BIT_DEPTH
......@@ -616,7 +616,7 @@ cglobal pixel_avg_w%1
AVG_END
%endmacro
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
INIT_MMX mmx2
AVG_FUNC 4, movq, movq
......@@ -695,7 +695,7 @@ AVGH 4, 2
; pixel avg2
;=============================================================================
%ifdef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void pixel_avg2_wN( uint16_t *dst, int dst_stride,
; uint16_t *src1, int src_stride,
......@@ -879,7 +879,7 @@ cglobal pixel_avg2_w18_sse2, 6,7,6
REP_RET
%endif ; HIGH_BIT_DEPTH
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
;-----------------------------------------------------------------------------
; void pixel_avg2_w4( uint8_t *dst, int dst_stride,
; uint8_t *src1, int src_stride,
......@@ -1094,7 +1094,7 @@ cglobal pixel_avg2_w%1_cache%2_%3
%endif
%if 0 ; or %1==8 - but the extra branch seems too expensive
ja cachesplit
%ifdef ARCH_X86_64
%if ARCH_X86_64
test r4b, 1
%else
test byte r4m, 1
......@@ -1116,7 +1116,7 @@ cglobal pixel_avg2_w%1_cache%2_%3
INIT_MMX
AVG_CACHELINE_CHECK 8, 64, mmx2
AVG_CACHELINE_CHECK 12, 64, mmx2
%ifndef ARCH_X86_64
%if ARCH_X86_64 == 0
AVG_CACHELINE_CHECK 16, 64, mmx2
AVG_CACHELINE_CHECK 20, 64, mmx2
AVG_CACHELINE_CHECK 8, 32, mmx2
......@@ -1191,7 +1191,7 @@ cglobal pixel_avg2_w16_cache64_ssse3
%else
lea r6, [avg_w16_addr + r6]
%endif
%ifdef UNIX64
%if UNIX64
jmp r6
%else
call r6
......@@ -1258,7 +1258,7 @@ cglobal mc_copy_w4_mmx, 4,6
lea r5, [r3*3]
lea r4, [r1*3]
je .end
%ifndef HIGH_BIT_DEPTH
%if HIGH_BIT_DEPTH == 0
%define mova movd