Commit 699b38e0 authored by Loren Merritt's avatar Loren Merritt Committed by Fiona Glaser
Browse files

Remove unnecessary PIC support macros

yasm has a directive to enable PIC globally
parent 6953f9ee
......@@ -59,7 +59,7 @@ endstruc
%macro LOAD_GLOBAL 4
%ifdef PIC
; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea
lea r11, [%2 GLOBAL]
lea r11, [%2]
%ifnidn %3, 0
add r11, %3
%endif
......
......@@ -349,7 +349,7 @@ cglobal x264_sub8x8_dct_%1, 3,3
global x264_sub8x8_dct_%1.skip_prologue
.skip_prologue:
%ifnidn %1, sse2
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
%endif
LOAD_DIFF8x4 0, 1, 2, 3, 6, 7, r1, r2-4*FDEC_STRIDE
SPILL r0, 1,2
......@@ -393,7 +393,7 @@ global x264_sub8x8_dct8_%1.skip_prologue
LOAD_DIFF m7, m0, none, [r1+7*FENC_STRIDE], [r2+3*FDEC_STRIDE]
UNSPILL r0, 0
%else
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
LOAD_DIFF8x4 0, 1, 2, 3, 4, 7, r1, r2-4*FDEC_STRIDE
SPILL r0, 0,1
SWAP 1, 7
......@@ -441,9 +441,9 @@ global x264_add8x8_idct_sse2.skip_prologue
SPILL r1, 0
TRANSPOSE2x4x4W 4,5,6,7,0
UNSPILL r1, 0
paddw m0, [pw_32 GLOBAL]
paddw m0, [pw_32]
IDCT4_1D 0,1,2,3,r1
paddw m4, [pw_32 GLOBAL]
paddw m4, [pw_32]
IDCT4_1D 4,5,6,7,r1
SPILL r1, 6,7
pxor m7, m7
......@@ -466,7 +466,7 @@ global x264_add8x8_idct8_sse2.skip_prologue
IDCT8_1D 0,1,2,3,4,5,6,7,r1
SPILL r1, 6
TRANSPOSE8x8W 0,1,2,3,4,5,6,7,[r1+0x60],[r1+0x40],1
paddw m0, [pw_32 GLOBAL]
paddw m0, [pw_32]
SPILL r1, 0
IDCT8_1D 0,1,2,3,4,5,6,7,r1
SPILL r1, 6,7
......
......@@ -143,7 +143,7 @@ INIT_XMM
cglobal x264_sub8x8_dct_%1, 3,3,11
add r2, 4*FDEC_STRIDE
%ifnidn %1, sse2
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
%endif
%ifdef WIN64
call .skip_prologue
......@@ -170,7 +170,7 @@ global x264_sub8x8_dct_%1.skip_prologue
cglobal x264_sub8x8_dct8_%1, 3,3,11
add r2, 4*FDEC_STRIDE
%ifnidn %1, sse2
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
%endif
%ifdef WIN64
call .skip_prologue
......@@ -227,7 +227,7 @@ global x264_add8x8_idct8_sse2.skip_prologue
movdqa m7, [r1+0x70]
IDCT8_1D 0,1,2,3,4,5,6,7,8,10
TRANSPOSE8x8W 0,1,2,3,4,5,6,7,8
paddw m0, [pw_32 GLOBAL] ; rounding for the >>6 at the end
paddw m0, [pw_32] ; rounding for the >>6 at the end
IDCT8_1D 0,1,2,3,4,5,6,7,8,10
DIFFx2 m0, m1, m8, m9, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]
DIFFx2 m2, m3, m8, m9, [r0-2*FDEC_STRIDE], [r0-1*FDEC_STRIDE]
......@@ -265,9 +265,9 @@ global x264_add8x8_idct_sse2.skip_prologue
TRANSPOSE2x4x4W 0,1,2,3,8
IDCT4_1D 4,5,6,7,8,10
TRANSPOSE2x4x4W 4,5,6,7,8
paddw m0, [pw_32 GLOBAL]
paddw m0, [pw_32]
IDCT4_1D 0,1,2,3,8,10
paddw m4, [pw_32 GLOBAL]
paddw m4, [pw_32]
IDCT4_1D 4,5,6,7,8,10
DIFFx2 m0, m1, m8, m9, [r0-4*FDEC_STRIDE], [r0-3*FDEC_STRIDE]
DIFFx2 m2, m3, m8, m9, [r0-2*FDEC_STRIDE], [r0-1*FDEC_STRIDE]
......
......@@ -80,7 +80,7 @@ cglobal x264_dct4x4dc_mmx, 1,1
movq m2, [r0+16]
movq m1, [r0+ 8]
movq m0, [r0+ 0]
movq m7, [pw_8000 GLOBAL] ; convert to unsigned and back, so that pavgw works
movq m7, [pw_8000] ; convert to unsigned and back, so that pavgw works
WALSH4_1D 0,1,2,3,4
TRANSPOSE4x4W 0,1,2,3,4
SUMSUB_BADC m1, m0, m3, m2, m4
......@@ -123,7 +123,7 @@ cglobal x264_sub4x4_dct_%1, 3,3
LOAD_DIFF m1, m4, m5, [r1+1*FENC_STRIDE], [r2+1*FDEC_STRIDE]
LOAD_DIFF m2, m4, m5, [r1+2*FENC_STRIDE], [r2+2*FDEC_STRIDE]
%else
mova m5, [hsub_mul GLOBAL]
mova m5, [hsub_mul]
LOAD_DIFF8x4_SSSE3 0, 3, 1, 2, 4, 5, r1, r2
%endif
DCT4_1D 0,1,2,3,4
......@@ -151,7 +151,7 @@ cglobal x264_add4x4_idct_mmx, 2,2
movq m0, [r1+ 0]
IDCT4_1D 0,1,2,3,4,5
TRANSPOSE4x4W 0,1,2,3,4
paddw m0, [pw_32 GLOBAL]
paddw m0, [pw_32]
IDCT4_1D 0,1,2,3,4,5
STORE_DIFF m0, m4, m7, [r0+0*FDEC_STRIDE]
STORE_DIFF m1, m4, m7, [r0+1*FDEC_STRIDE]
......@@ -179,7 +179,7 @@ cglobal x264_add4x4_idct_sse4, 2,2,6
punpckhdq m2, m0
SWAP 0, 1
mova m1, [pw_32_0 GLOBAL]
mova m1, [pw_32_0]
paddw m1, m0 ; row1/row0 corrected
psraw m0, 1 ; row1>>1/...
mova m3, m2 ; row3/row2
......@@ -221,7 +221,7 @@ cglobal %1, 3,3,11
pxor m7, m7
%else
add r2, 4*FDEC_STRIDE
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
%endif
.skip_prologue:
%ifdef WIN64
......@@ -335,7 +335,7 @@ cglobal x264_add8x8_idct_dc_mmx, 2,2
movq mm0, [r1]
pxor mm1, mm1
add r0, FDEC_STRIDE*4
paddw mm0, [pw_32 GLOBAL]
paddw mm0, [pw_32]
psraw mm0, 6
psubw mm1, mm0
packuswb mm0, mm0
......@@ -354,10 +354,10 @@ cglobal x264_add8x8_idct_dc_ssse3, 2,2
movq xmm0, [r1]
pxor xmm1, xmm1
add r0, FDEC_STRIDE*4
paddw xmm0, [pw_32 GLOBAL]
paddw xmm0, [pw_32]
psraw xmm0, 6
psubw xmm1, xmm0
movdqa xmm5, [pb_idctdc_unpack GLOBAL]
movdqa xmm5, [pb_idctdc_unpack]
packuswb xmm0, xmm0
packuswb xmm1, xmm1
pshufb xmm0, xmm5
......@@ -393,7 +393,7 @@ cglobal x264_add16x16_idct_dc_mmx, 2,3
.loop:
movq mm0, [r1]
pxor mm1, mm1
paddw mm0, [pw_32 GLOBAL]
paddw mm0, [pw_32]
psraw mm0, 6
psubw mm1, mm0
packuswb mm0, mm0
......@@ -447,8 +447,8 @@ cglobal x264_add16x16_idct_dc_sse2, 2,2,8
punpcklwd xmm2, xmm2
pxor xmm1, xmm1
pxor xmm3, xmm3
paddw xmm0, [pw_32 GLOBAL]
paddw xmm2, [pw_32 GLOBAL]
paddw xmm0, [pw_32]
paddw xmm2, [pw_32]
psraw xmm0, 6
psraw xmm2, 6
psubw xmm1, xmm0
......@@ -477,11 +477,11 @@ cglobal x264_add16x16_idct_dc_ssse3, 2,2,8
movdqa xmm0, [r1]
add r1, 16
pxor xmm1, xmm1
paddw xmm0, [pw_32 GLOBAL]
paddw xmm0, [pw_32]
psraw xmm0, 6
psubw xmm1, xmm0
movdqa xmm5, [ pb_idctdc_unpack GLOBAL]
movdqa xmm6, [pb_idctdc_unpack2 GLOBAL]
movdqa xmm5, [ pb_idctdc_unpack]
movdqa xmm6, [pb_idctdc_unpack2]
packuswb xmm0, xmm0
packuswb xmm1, xmm1
movdqa xmm2, xmm0
......@@ -815,8 +815,8 @@ cglobal x264_zigzag_scan_4x4_frame_mmx, 2,2
cglobal x264_zigzag_scan_4x4_frame_ssse3, 2,2
movdqa xmm1, [r1+16]
movdqa xmm0, [r1]
pshufb xmm1, [pb_scan4frameb GLOBAL]
pshufb xmm0, [pb_scan4framea GLOBAL]
pshufb xmm1, [pb_scan4frameb]
pshufb xmm0, [pb_scan4framea]
movdqa xmm2, xmm1
psrldq xmm1, 6
palignr xmm2, xmm0, 6
......@@ -963,9 +963,9 @@ cglobal x264_zigzag_sub_4x4%1_%2_ssse3, 3,3,8
punpcklqdq xmm0, xmm2
punpcklqdq xmm4, xmm6
%ifidn %2, frame
movdqa xmm7, [pb_sub4frame GLOBAL]
movdqa xmm7, [pb_sub4frame]
%else
movdqa xmm7, [pb_sub4field GLOBAL]
movdqa xmm7, [pb_sub4field]
%endif
pshufb xmm0, xmm7
pshufb xmm4, xmm7
......@@ -980,7 +980,7 @@ cglobal x264_zigzag_sub_4x4%1_%2_ssse3, 3,3,8
psubw xmm1, xmm5
%ifidn %1, ac
movd r2d, xmm0
pand xmm0, [pb_subacmask GLOBAL]
pand xmm0, [pb_subacmask]
%endif
movdqa [r0], xmm0
pxor xmm2, xmm2
......@@ -1039,7 +1039,7 @@ cglobal x264_zigzag_interleave_8x8_cavlc_mmx, 3,3
packsswb m5, m5
pxor m0, m0
pcmpeqb m5, m0
paddb m5, [pb_1 GLOBAL]
paddb m5, [pb_1]
movd r0d, m5
mov [r2+0], r0w
shr r0d, 16
......@@ -1085,7 +1085,7 @@ cglobal x264_zigzag_interleave_8x8_cavlc_sse2, 3,3,8
packsswb m2, m2
packsswb m2, m2
pcmpeqb m5, m2
paddb m5, [pb_1 GLOBAL]
paddb m5, [pb_1]
movd r0d, m5
mov [r2+0], r0w
shr r0d, 16
......
......@@ -233,19 +233,19 @@ SECTION .text
; clobbers: m0,3-6
%macro DEBLOCK_P0_Q0 0
mova m5, m1
pxor m5, m2 ; p0^q0
pand m5, [pb_01 GLOBAL] ; (p0^q0)&1
pxor m5, m2 ; p0^q0
pand m5, [pb_01] ; (p0^q0)&1
pcmpeqb m4, m4
pxor m3, m4
pavgb m3, m0 ; (p1 - q1 + 256)>>1
pavgb m3, [pb_03 GLOBAL] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pavgb m3, m0 ; (p1 - q1 + 256)>>1
pavgb m3, [pb_03] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pxor m4, m1
pavgb m4, m2 ; (q0 - p0 + 256)>>1
pavgb m4, m2 ; (q0 - p0 + 256)>>1
pavgb m3, m5
paddusb m3, m4 ; d+128+33
mova m6, [pb_a1 GLOBAL]
paddusb m3, m4 ; d+128+33
mova m6, [pb_a1]
psubusb m6, m3
psubusb m3, [pb_a1 GLOBAL]
psubusb m3, [pb_a1]
pminub m6, m7
pminub m3, m7
psubusb m1, m6
......@@ -261,10 +261,10 @@ SECTION .text
%macro LUMA_Q1 6
mova %6, m1
pavgb %6, m2
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3
pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
pand %6, [pb_01] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
mova %6, %1
psubusb %6, %5
paddusb %5, %1
......@@ -614,8 +614,8 @@ DEBLOCK_LUMA sse2, v, 16
%define mask0 spill(2)
%define mask1p spill(3)
%define mask1q spill(4)
%define mpb_00 [pb_00 GLOBAL]
%define mpb_01 [pb_01 GLOBAL]
%define mpb_00 [pb_00]
%define mpb_01 [pb_01]
%endif
;-----------------------------------------------------------------------------
......@@ -639,7 +639,7 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16
mova q1, [r0+r1]
%ifdef ARCH_X86_64
pxor mpb_00, mpb_00
mova mpb_01, [pb_01 GLOBAL]
mova mpb_01, [pb_01]
LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
SWAP 7, 12 ; m12=mask0
pavgb t5, mpb_00
......@@ -658,8 +658,8 @@ cglobal x264_deblock_%2_luma_intra_%1, 4,6,16
LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
mova m4, t5
mova mask0, m7
pavgb m4, [pb_00 GLOBAL]
pavgb m4, [pb_01 GLOBAL] ; alpha/4+1
pavgb m4, [pb_00]
pavgb m4, [pb_01] ; alpha/4+1
DIFF_GT2 p0, q0, m4, m6, m7 ; m6 = |p0-q0| > alpha/4+1
pand m6, mask0
DIFF_GT2 p0, p2, m5, m4, m7 ; m4 = |p2-p0| > beta-1
......@@ -835,7 +835,7 @@ chroma_inter_body_mmxext:
%macro CHROMA_INTRA_P0 3
movq m4, %1
pxor m4, %3
pand m4, [pb_01 GLOBAL] ; m4 = (p0^q1)&1
pand m4, [pb_01] ; m4 = (p0^q1)&1
pavgb %1, %3
psubusb %1, m4
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
......
......@@ -89,9 +89,9 @@ SECTION .text
%macro BIWEIGHT_START_MMX 0
movd m2, r6m
SPLATW m2, m2 ; weight_dst
mova m3, [pw_64 GLOBAL]
mova m3, [pw_64]
psubw m3, m2 ; weight_src
mova m4, [pw_32 GLOBAL] ; rounding
mova m4, [pw_32] ; rounding
pxor m5, m5
%endmacro
......@@ -111,7 +111,7 @@ SECTION .text
shl t7d, 8
add t6d, t7d
movd m3, t6d
mova m4, [pw_32 GLOBAL]
mova m4, [pw_32]
SPLATW m3, m3 ; weight_dst,src
%endmacro
......@@ -641,7 +641,7 @@ AVG2_W20 sse2_misalign
%macro INIT_SHIFT 2
and eax, 7
shl eax, 3
movd %1, [sw_64 GLOBAL]
movd %1, [sw_64]
movd %2, eax
psubw %1, %2
%endmacro
......@@ -778,10 +778,10 @@ cglobal x264_pixel_avg2_w16_cache64_ssse3
shl r6, 4 ;jump = (offset + align*2)*48
%define avg_w16_addr avg_w16_align1_1_ssse3-(avg_w16_align2_2_ssse3-avg_w16_align1_1_ssse3)
%ifdef PIC
lea r11, [avg_w16_addr GLOBAL]
lea r11, [avg_w16_addr]
add r6, r11
%else
lea r6, [avg_w16_addr + r6 GLOBAL]
lea r6, [avg_w16_addr + r6]
%endif
%ifdef UNIX64
jmp r6
......@@ -1007,7 +1007,7 @@ cglobal x264_mc_chroma_%1
SPLATW m5, m5 ; m5 = dx
SPLATW m6, m6 ; m6 = dy
mova m4, [pw_8 GLOBAL]
mova m4, [pw_8]
mova m0, m4
psubw m4, m5 ; m4 = 8-dx
psubw m0, m6 ; m0 = 8-dy
......@@ -1042,7 +1042,7 @@ cglobal x264_mc_chroma_%1
punpcklbw m2, m3
punpcklbw m1, m3
paddw m0, [pw_32 GLOBAL]
paddw m0, [pw_32]
pmullw m2, m5 ; line * cB
pmullw m1, m7 ; line * cD
......@@ -1084,9 +1084,9 @@ cglobal x264_mc_chroma_%1
movd m6, r4d
mov r5d, 1
.mc1d:
mova m5, [pw_8 GLOBAL]
mova m5, [pw_8]
SPLATW m6, m6
mova m7, [pw_4 GLOBAL]
mova m7, [pw_4]
psubw m5, m6
movifnidn r0, r0mp
movifnidn r1d, r1m
......@@ -1166,7 +1166,7 @@ cglobal x264_mc_chroma_ssse3%1, 0,6,%2
imul r4d, t0d ; (x*255+8)*(8-y)
cmp dword r6m, 4
jg .width8
mova m5, [pw_32 GLOBAL]
mova m5, [pw_32]
movd m6, r5d
movd m7, r4d
movifnidn r0, r0mp
......@@ -1178,10 +1178,10 @@ cglobal x264_mc_chroma_ssse3%1, 0,6,%2
and r2, ~3
and r5, 3
%ifdef PIC
lea r11, [ch_shuffle GLOBAL]
lea r11, [ch_shuffle]
movu m5, [r11 + r5*2]
%else
movu m5, [ch_shuffle + r5*2 GLOBAL]
movu m5, [ch_shuffle + r5*2]
%endif
movu m0, [r2]
pshufb m0, m5
......@@ -1197,8 +1197,8 @@ cglobal x264_mc_chroma_ssse3%1, 0,6,%2
pmaddubsw m1, m6
pmaddubsw m2, m7
pmaddubsw m3, m6
paddw m0, [pw_32 GLOBAL]
paddw m2, [pw_32 GLOBAL]
paddw m0, [pw_32]
paddw m2, [pw_32]
paddw m1, m0
paddw m3, m2
mova m0, m4
......@@ -1228,7 +1228,7 @@ INIT_XMM
cmp r5, 0x38
jge .split
%endif
mova m5, [pw_32 GLOBAL]
mova m5, [pw_32]
movh m0, [r2]
movh m1, [r2+1]
punpcklbw m0, m1
......@@ -1265,18 +1265,18 @@ INIT_XMM
and r2, ~7
and r5, 7
%ifdef PIC
lea r11, [ch_shuffle GLOBAL]
lea r11, [ch_shuffle]
movu m5, [r11 + r5*2]
%else
movu m5, [ch_shuffle + r5*2 GLOBAL]
movu m5, [ch_shuffle + r5*2]
%endif
movu m0, [r2]
pshufb m0, m5
%ifdef ARCH_X86_64
mova m8, [pw_32 GLOBAL]
mova m8, [pw_32]
%define round m8
%else
%define round [pw_32 GLOBAL]
%define round [pw_32]
%endif
.splitloop8:
movu m1, [r2+r3]
......
......@@ -125,7 +125,7 @@ cglobal x264_hpel_filter_v_%1, 5,6,%2
%ifnidn %1, ssse3
pxor m0, m0
%else
mova m0, [filt_mul51 GLOBAL]
mova m0, [filt_mul51]
%endif
.loop:
%ifidn %1, ssse3
......@@ -142,8 +142,8 @@ cglobal x264_hpel_filter_v_%1, 5,6,%2
pmaddubsw m4, m0
pmaddubsw m2, m0
pmaddubsw m5, m0
pmaddubsw m3, [filt_mul20 GLOBAL]
pmaddubsw m6, [filt_mul20 GLOBAL]
pmaddubsw m3, [filt_mul20]
pmaddubsw m6, [filt_mul20]
paddw m1, m2
paddw m4, m5
paddw m1, m3
......@@ -155,7 +155,7 @@ cglobal x264_hpel_filter_v_%1, 5,6,%2
LOAD_ADD m6, [r1+r3*2+mmsize/2], [r5+mmsize/2], m7 ; c1
FILT_V2
%endif
mova m7, [pw_16 GLOBAL]
mova m7, [pw_16]
mova [r2+r4*2], m1
mova [r2+r4*2+mmsize], m4
paddw m1, m7
......@@ -180,7 +180,7 @@ cglobal x264_hpel_filter_c_mmxext, 3,3
lea r1, [r1+r2*2]
neg r2
%define src r1+r2*2
movq m7, [pw_32 GLOBAL]
movq m7, [pw_32]
.loop:
movq m1, [src-4]
movq m2, [src-2]
......@@ -237,7 +237,7 @@ cglobal x264_hpel_filter_h_mmxext, 3,3
punpcklbw m7, m0
punpcklbw m6, m0
paddw m6, m7 ; a1
movq m7, [pw_1 GLOBAL]
movq m7, [pw_1]
FILT_H2 m1, m2, m3, m4, m5, m6
FILT_PACK m1, m4, 1
movntq [r0+r2], m1
......@@ -257,13 +257,13 @@ cglobal x264_hpel_filter_c_%1, 3,3,9
neg r2
%define src r1+r2*2
%ifidn %1, ssse3
mova m7, [pw_32 GLOBAL]
mova m7, [pw_32]
%define tpw_32 m7
%elifdef ARCH_X86_64
mova m8, [pw_32 GLOBAL]
mova m8, [pw_32]
%define tpw_32 m8
%else
%define tpw_32 [pw_32 GLOBAL]
%define tpw_32 [pw_32]
%endif
.loop:
%ifidn %1,sse2_misalign
......@@ -340,7 +340,7 @@ cglobal x264_hpel_filter_h_sse2, 3,3,8
punpcklbw m6, m0
punpcklbw m7, m0
paddw m6, m7 ; c1
mova m7, [pw_1 GLOBAL] ; FIXME xmm8
mova m7, [pw_1] ; FIXME xmm8
FILT_H2 m1, m2, m3, m4, m5, m6
FILT_PACK m1, m4, 1
movntdq [r0+r2], m1
......@@ -362,7 +362,7 @@ cglobal x264_hpel_filter_h_ssse3, 3,3
punpcklbw m1, m0 ; 00 -1 00 -2 00 -3 00 -4 00 -5 00 -6 00 -7 00 -8
movh m2, [src]
punpcklbw m2, m0
mova m7, [pw_1 GLOBAL]
mova m7, [pw_1]
.loop:
movh m3, [src+8]
punpcklbw m3, m0
......@@ -436,7 +436,7 @@ HPEL_V ssse3
mova m3, [r1]
mova %4, [r1+r2]
mova m0, [r1+r2*2]
mova %2, [filt_mul51 GLOBAL]
mova %2, [filt_mul51]
mova m4, m1
punpcklbw m1, m2
punpckhbw m4, m2
......@@ -452,8 +452,8 @@ HPEL_V ssse3
pmaddubsw m4, %2
pmaddubsw m0, %2
pmaddubsw m2, %2
pmaddubsw m3, [filt_mul20 GLOBAL]
pmaddubsw %1, [filt_mul20 GLOBAL]
pmaddubsw m3, [filt_mul20]
pmaddubsw %1, [filt_mul20]
psrlw %3, 8
psrlw %4, 8
paddw m1, m0
......@@ -1096,7 +1096,7 @@ cglobal x264_mbtree_propagate_cost_sse2, 6,6
add r4, r5
neg r5
pxor xmm5, xmm5
movdqa xmm4, [pd_128 GLOBAL]
movdqa xmm4, [pd_128]
.loop:
movq xmm2, [r2+r5] ; intra
movq xmm0, [r4+r5] ; invq
......
......@@ -59,7 +59,7 @@ SECTION .text
%endmacro
%macro HADDW 2
pmaddwd %1, [pw_1 GLOBAL]
pmaddwd %1, [pw_1]
HADDD %1, %2
%endmacro
......@@ -244,9 +244,9 @@ cglobal x264_pixel_ssd_%1x%2_%3, 0,0,0
%endif
%ifidn %3, ssse3
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
%elifidn %3, sse2
mova m7, [pw_00ff GLOBAL]
mova m7, [pw_00ff]
%elif %1 >= mmsize
pxor m7, m7
%endif
......@@ -310,7 +310,7 @@ SSD 4, 8, ssse3
pxor m5, m5 ; sum
pxor m6, m6 ; sum squared
%if %1
mova m7, [pw_00ff GLOBAL]
mova m7, [pw_00ff]
%else
pxor m7, m7 ; zero
%endif
......@@ -482,7 +482,7 @@ cglobal x264_pixel_var2_8x8_sse2, 5,6,8
cglobal x264_pixel_var2_8x8_ssse3, 5,6,8
pxor m5, m5 ; sum
pxor m6, m6 ; sum squared
mova m7, [hsub_mul GLOBAL]
mova m7, [hsub_mul]
mov r5d, 2
.loop:
movq m0, [r0]
......@@ -775,7 +775,7 @@ cglobal x264_pixel_satd_4x4_mmxext, 4,6
%macro SATD_START_SSE2 3
%ifnidn %1, sse2
mova %3, [hmul_8p GLOBAL]
mova %3, [hmul_8p]
%endif
lea r4, [3*r1]
lea r5, [3*r3]
......@@ -815,7 +815,7 @@ INIT_XMM
%ifnidn %1, sse2
cglobal x264_pixel_satd_4x4_%1, 4, 6, 6
SATD_START_MMX
mova m4, [hmul_4p GLOBAL]
mova m4, [hmul_4p]
LOAD_DUP_2x4P m2, m5, [r2], [r2+r3]
LOAD_DUP_2x4P m3, m5, [r2+2*r3], [r2+r5]
LOAD_DUP_2x4P m0, m5, [r0], [r0+r1]
......@@ -832,7 +832,7 @@ cglobal x264_pixel_satd_4x4_%1, 4, 6, 6
cglobal x264_pixel_satd_4x8_%1, 4, 6, 8
SATD_START_MMX
%ifnidn %1, sse2
mova m7, [hmul_4p GLOBAL]
mova m7, [hmul_4p]
%endif
movd m4, [r2]
movd m5, [r2+r3]
......@@ -889,14 +889,14 @@ cglobal x264_pixel_satd_16x4_internal_%1
cglobal x264_pixel_satd_16x8_%1, 4,6,12
SATD_START_SSE2 %1, m10, m7
%ifidn %1, sse2
mova m7, [pw_00ff GLOBAL]
mova m7, [pw_00ff]