Commit 4139febf authored by Sam Hocevar's avatar Sam Hocevar
Browse files

* common/i386/*.asm: don't use the "GLOBAL" reserved word, some versions

    NASM complain about it. Replaced it with "GOT_ebx".


git-svn-id: svn://svn.videolan.org/x264/trunk@451 df754926-b1dd-0310-bc7b-ec298dee348c
parent 059410ed
......@@ -169,7 +169,7 @@ x264_dct4x4dc_mmxext:
MMX_TRANSPOSE mm0, mm2, mm3, mm4, mm1 ; in: mm0, mm2, mm3, mm4 out: mm0, mm4, mm1, mm3
movq mm6, [x264_mmx_1 GLOBAL]
movq mm6, [x264_mmx_1 GOT_ebx]
paddw mm0, mm6
paddw mm4, mm6
psraw mm0, 1
......@@ -300,7 +300,7 @@ x264_add4x4_idct_mmxext:
MMX_SUMSUB_BADC mm2, mm3, mm4, mm1 ; mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13
MMX_ZERO mm7
movq mm6, [x264_mmx_32 GLOBAL]
movq mm6, [x264_mmx_32 GOT_ebx]
MMX_STORE_DIFF_4P mm2, mm0, mm6, mm7, [eax]
MMX_STORE_DIFF_4P mm4, mm0, mm6, mm7, [eax+ecx]
......@@ -397,10 +397,10 @@ x264_xdct8_mmxext:
picpush ebx
picgetgot ebx
movq mm5, [x264_mmx_PPNN GLOBAL]
movq mm6, [x264_mmx_PNNP GLOBAL]
movq mm4, [x264_mmx_PPPN GLOBAL]
movq mm7, [x264_mmx_PPNP GLOBAL]
movq mm5, [x264_mmx_PPNN GOT_ebx]
movq mm6, [x264_mmx_PNNP GOT_ebx]
movq mm4, [x264_mmx_PPPN GOT_ebx]
movq mm7, [x264_mmx_PPNP GOT_ebx]
;-------------------------------------------------------------------------
; horizontal dct ( compute 1 row at a time -> 8 loops )
......@@ -434,7 +434,7 @@ x264_xdct8_mmxext:
pshufw mm2, mm0, 11001001b ; (low)a1/a3/a0/a2(high)
pshufw mm0, mm0, 10011100b ; (low)a0/a2/a1/a3(high)
pmullw mm2, [x264_mmx_2121 GLOBAL]
pmullw mm2, [x264_mmx_2121 GOT_ebx]
pmullw mm0, mm5 ; (low)a0/a2/-a1/-a3(high)
psraw mm2, 1 ; (low)a1/a3>>1/a0/a2>>1(high)
paddw mm0, mm2 ; (low)dst0/dst2/dst4/dst6(high)
......@@ -554,10 +554,10 @@ x264_xidct8_mmxext:
picpush ebx
picgetgot ebx
movq mm4, [x264_mmx_PPNN GLOBAL]
movq mm5, [x264_mmx_PNPN GLOBAL]
movq mm6, [x264_mmx_PPNP GLOBAL]
movq mm7, [x264_mmx_PPPN GLOBAL]
movq mm4, [x264_mmx_PPNN GOT_ebx]
movq mm5, [x264_mmx_PNPN GOT_ebx]
movq mm6, [x264_mmx_PPNP GOT_ebx]
movq mm7, [x264_mmx_PPPN GOT_ebx]
;-------------------------------------------------------------------------
; horizontal idct ( compute 1 row at a time -> 8 loops )
......@@ -573,7 +573,7 @@ x264_xidct8_mmxext:
punpckhwd mm1, mm2 ; (low)d1,d5,d3,d7(high)
pshufw mm2, mm0, 10110001b ; (low)d4,d0,d6,d2(high)
pmullw mm0, [x264_mmx_p2n2p1p1 GLOBAL]; (low)2*d0,-2*d4,d2,d6(high)
pmullw mm0, [x264_mmx_p2n2p1p1 GOT_ebx]; (low)2*d0,-2*d4,d2,d6(high)
pmullw mm2, mm6 ; (low)d4,d0,-d6,d2(high)
psraw mm0, 1 ; (low)d0,-d4,d2>>1,d6>>1(high)
paddw mm0, mm2 ; (low)e0,e2,e4,e6(high)
......
......@@ -185,19 +185,19 @@ cglobal x264_deblock_h_chroma_intra_mmxext
pxor mm4, mm2
; b = p0^(q1>>2)
psrlw mm3, 2
pand mm3, [pb_3f GLOBAL]
pand mm3, [pb_3f GOT_ebx]
movq mm5, mm1
pxor mm5, mm3
; c = q0^(p1>>2)
psrlw mm0, 2
pand mm0, [pb_3f GLOBAL]
pand mm0, [pb_3f GOT_ebx]
movq mm6, mm2
pxor mm6, mm0
; d = (c^b) & ~(b^a) & 1
pxor mm6, mm5
pxor mm5, mm4
pandn mm5, mm6
pand mm5, [pb_01 GLOBAL]
pand mm5, [pb_01 GOT_ebx]
; delta = (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3
; = (avg(q0, p1>>2) + (d&a))
; - (avg(p0, q1>>2) + (d^(d&a)))
......@@ -227,10 +227,10 @@ cglobal x264_deblock_h_chroma_intra_mmxext
%macro LUMA_Q1_MMX 6
movq %6, mm1
pavgb %6, mm2
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3
pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
pand %6, [pb_01 GOT_ebx] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
movq %6, %1
psubusb %6, %5
paddusb %5, %1
......@@ -274,7 +274,7 @@ x264_deblock_v8_luma_mmxext:
punpcklbw mm4, mm4
punpcklbw mm4, mm4 ; tc = 4x tc0[1], 4x tc0[0]
movq [esp+8], mm4 ; tc
pcmpgtb mm4, [pb_ff GLOBAL]
pcmpgtb mm4, [pb_ff GOT_ebx]
pand mm4, mm7
movq [esp+0], mm4 ; mask
......@@ -284,7 +284,7 @@ x264_deblock_v8_luma_mmxext:
pcmpeqb mm6, mm4
pand mm6, mm4
pand mm4, [esp+8] ; tc
movq mm7, [pb_01 GLOBAL]
movq mm7, [pb_01 GOT_ebx]
pand mm7, mm6
pand mm6, mm4
paddb mm7, mm4
......@@ -298,7 +298,7 @@ x264_deblock_v8_luma_mmxext:
pand mm6, mm5
movq mm5, [esp+8] ; tc
pand mm5, mm6
pand mm6, [pb_01 GLOBAL]
pand mm6, [pb_01 GOT_ebx]
paddb mm7, mm6
movq mm3, [edi+esi]
LUMA_Q1_MMX mm3, mm4, [edi+2*esi], [edi+esi], mm5, mm6
......@@ -476,7 +476,7 @@ x264_deblock_h_chroma_mmxext:
%macro CHROMA_INTRA_P0 3
movq mm4, %1
pxor mm4, %3
pand mm4, [pb_01 GLOBAL] ; mm4 = (p0^q1)&1
pand mm4, [pb_01 GOT_ebx] ; mm4 = (p0^q1)&1
pavgb %1, %3
psubusb %1, mm4
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
......
......@@ -54,13 +54,14 @@ BITS 32
; and let you load non-shared .so objects (Linux, Win32...). However, OS X
; requires PIC code in its .dylib objects.
;
; - GLOBAL should be used as a suffix for global addressing, eg.
; mov eax, [foo GLOBAL]
; - GOT_* should be used as a suffix for global addressing, eg.
; picgetgot ebx
; mov eax, [foo GOT_ebx]
; instead of
; mov eax, [foo]
;
; - picgetgot computes the GOT address into the given register in PIC
; mode, otherwise does nothing. You need to do this before using GLOBAL.
; mode, otherwise does nothing. You need to do this before using GOT_*.
;
; - picpush and picpop respectively push and pop the given register
; in PIC mode, otherwise do nothing. You should always use them around
......@@ -81,7 +82,10 @@ BITS 32
%ifidn __OUTPUT_FORMAT__,macho
; There is no real global offset table on OS X, but we still
; need to reference our variables by offset.
%define GLOBAL + ebx
%define GOT_eax + eax
%define GOT_ebx + ebx
%define GOT_ecx + ecx
%define GOT_edx + edx
%macro picgetgot 1
call %%getgot
%%getgot:
......@@ -95,8 +99,10 @@ BITS 32
%define GOT __GLOBAL_OFFSET_TABLE_
%endif
extern GOT
; FIXME: find an elegant way to use registers other than ebx
%define GLOBAL + ebx wrt ..gotoff
%define GOT_eax + eax wrt ..gotoff
%define GOT_ebx + ebx wrt ..gotoff
%define GOT_ecx + ecx wrt ..gotoff
%define GOT_edx + edx wrt ..gotoff
%macro picgetgot 1
call %%getgot
%%getgot:
......@@ -112,7 +118,10 @@ BITS 32
%endmacro
%define picesp esp+4
%else
%define GLOBAL
%define GOT_eax
%define GOT_ebx
%define GOT_ecx
%define GOT_edx
%macro picgetgot 1
%endmacro
%macro picpush 1
......
......@@ -268,10 +268,10 @@ ALIGN 4
mov edx, [picesp+20] ; src
mov ecx, [picesp+24] ; i_src
pshufw mm4, [picesp+28], 0 ; weight_dst
movq mm5, [pw_64 GLOBAL]
psubw mm5, mm4 ; weight_src
movq mm6, [pw_32 GLOBAL] ; rounding
pshufw mm4, [picesp+28], 0 ; weight_dst
movq mm5, [pw_64 GOT_ebx]
psubw mm5, mm4 ; weight_src
movq mm6, [pw_32 GOT_ebx] ; rounding
pxor mm7, mm7
%endmacro
%macro BIWEIGHT_END_MMX 0
......@@ -516,7 +516,7 @@ x264_mc_chroma_mmxext:
pshufw mm5, [picesp+20], 0 ; mm5 = dx
pshufw mm6, [picesp+24], 0 ; mm6 = dy
movq mm4, [pw_8 GLOBAL]
movq mm4, [pw_8 GOT_ebx]
movq mm0, mm4
psubw mm4, mm5 ; mm4 = 8-dx
......@@ -552,7 +552,7 @@ ALIGN 4
punpcklbw mm2, mm3
punpcklbw mm1, mm3
paddw mm0, [pw_32 GLOBAL]
paddw mm0, [pw_32 GOT_ebx]
pmullw mm2, mm5 ; line * cB
pmullw mm1, mm7 ; line * cD
......
......@@ -180,7 +180,7 @@ loopcy:
mov edi, [picesp + tdst1]
lea ebp, [picesp + tbuffer]
mov esi, [picesp + tsrc]
movq mm7, [mmx_dw_one GLOBAL]
movq mm7, [mmx_dw_one GOT_ebx]
picpop ebx
......@@ -249,15 +249,15 @@ loopcx2:
paddw mm3, mm4
paddw mm1, mm6
movq mm5, [mmx_dw_20 GLOBAL]
movq mm4, [mmx_dw_5 GLOBAL]
movq mm5, [mmx_dw_20 GOT_ebx]
movq mm4, [mmx_dw_5 GOT_ebx]
movq mm6, mm1
pxor mm7, mm7
punpckhwd mm5, mm2
punpcklwd mm4, mm3
punpcklwd mm2, [mmx_dw_20 GLOBAL]
punpckhwd mm3, [mmx_dw_5 GLOBAL]
punpcklwd mm2, [mmx_dw_20 GOT_ebx]
punpckhwd mm3, [mmx_dw_5 GOT_ebx]
pcmpgtw mm7, mm1
......@@ -270,8 +270,8 @@ loopcx2:
paddd mm2, mm1
paddd mm3, mm6
paddd mm2, [mmx_dd_one GLOBAL]
paddd mm3, [mmx_dd_one GLOBAL]
paddd mm2, [mmx_dd_one GOT_ebx]
paddd mm3, [mmx_dd_one GOT_ebx]
psrad mm2, 10
psrad mm3, 10
......@@ -323,7 +323,7 @@ x264_horizontal_filter_mmxext :
pxor mm0, mm0
picpush ebx
picgetgot ebx
movq mm7, [mmx_dw_one GLOBAL]
movq mm7, [mmx_dw_one GOT_ebx]
picpop ebx
mov ecx, [esp + 32] ; height
......
......@@ -404,7 +404,7 @@ x264_pixel_ssd_16x8_sse2:
movdqa %2, %1
psrldq %1, 2
paddusw %1, %2
pand %1, [pd_0000ffff GLOBAL]
pand %1, [pd_0000ffff GOT_ebx]
movdqa %2, %1
psrldq %1, 4
paddd %1, %2
......
......@@ -85,7 +85,7 @@ cglobal predict_16x16_dc_top_mmxext
pavgb mm1, mm2
pxor mm2, mm3
movq %1 , %2
pand mm2, [pb_1 GLOBAL]
pand mm2, [pb_1 GOT_ebx]
psubusb mm1, mm2
pavgb %1 , mm1 ; %1 = (t[n-1] + t[n]*2 + t[n+1] + 2) >> 2
%endmacro
......@@ -157,7 +157,7 @@ predict_8x8_dc_core_mmxext:
pxor mm1, mm1
psadbw mm0, mm1
psadbw mm4, mm1
paddw mm0, [pw_8 GLOBAL]
paddw mm0, [pw_8 GOT_ebx]
paddw mm0, mm4
psrlw mm0, 4
pshufw mm0, mm0, 0
......@@ -212,7 +212,7 @@ predict_8x8c_dc_core_mmxext:
paddw mm0, [picesp + 8]
pshufw mm2, [picesp + 12], 0
psrlw mm0, 3
paddw mm1, [pw_2 GLOBAL]
paddw mm1, [pw_2 GOT_ebx]
movq mm3, mm2
pshufw mm1, mm1, 0
pshufw mm0, mm0, 0 ; dc0 (w)
......@@ -246,7 +246,7 @@ predict_8x8c_p_core_mmx:
pshufw mm2, [picesp +12], 0
pshufw mm4, [picesp +16], 0
movq mm1, mm2
pmullw mm2, [pw_3210 GLOBAL]
pmullw mm2, [pw_3210 GOT_ebx]
psllw mm1, 2
paddsw mm0, mm2 ; mm0 = {i+0*b, i+1*b, i+2*b, i+3*b}
paddsw mm1, mm0 ; mm1 = {i+4*b, i+5*b, i+6*b, i+7*b}
......@@ -293,7 +293,7 @@ predict_16x16_p_core_mmx:
pshufw mm4, [picesp +16], 0
movq mm5, mm2
movq mm1, mm2
pmullw mm5, [pw_3210 GLOBAL]
pmullw mm5, [pw_3210 GOT_ebx]
psllw mm2, 3
psllw mm1, 2
movq mm3, mm2
......@@ -421,7 +421,7 @@ ALIGN 16
predict_16x16_dc_top_mmxext:
picpush ebx
picgetgot ebx
PRED16x16_DC [pw_8 GLOBAL], 4, picesp
PRED16x16_DC [pw_8 GOT_ebx], 4, picesp
picpop ebx
ret
......@@ -480,7 +480,7 @@ ALIGN 16
neg eax
picpush ebx
picgetgot ebx
movq mm6, [pd_1 GLOBAL]
movq mm6, [pd_1 GOT_ebx]
picpop ebx
movd mm5, eax
pxor mm7, mm7
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment