Commit eea89389 authored by Sam Hocevar's avatar Sam Hocevar
Browse files

* Support for x86 position-independent code (PIC), needed for dynamic libs

    on Mac OS X Intel. I tried to make this as little intrusive as possible.


git-svn-id: svn://svn.videolan.org/x264/trunk@418 df754926-b1dd-0310-bc7b-ec298dee348c
parent 97f05071
......@@ -24,6 +24,7 @@ ASMSRC = common/i386/dct-a.asm common/i386/cpu-a.asm \
common/i386/pixel-sse2.asm common/i386/quant-a.asm \
common/i386/deblock-a.asm
OBJASM = $(ASMSRC:%.asm=%.o)
ASFLAGS += -Icommon/i386/
endif
# MMX/SSE optims
......@@ -73,6 +74,7 @@ checkasm: tools/checkasm.o libx264.a
$(CC) -o $@ $< libx264.a $(LDFLAGS)
common/amd64/*.o: common/amd64/amd64inc.asm
common/i386/*.o: common/i386/i386inc.asm
%.o: %.asm
$(AS) $(ASFLAGS) -o $@ $<
......
......@@ -958,6 +958,9 @@
<Filter
Name="I386"
Filter="*.h,*.c,*-a.asm">
<File
RelativePath="..\..\common\i386\i386inc.asm">
</File>
<File
RelativePath="..\..\common\i386\cpu-a.asm">
<FileConfiguration
......
......@@ -27,14 +27,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
;=============================================================================
; Code
......
......@@ -38,14 +38,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
%macro MMX_ZERO 1
pxor %1, %1
......@@ -161,6 +154,8 @@ ALIGN 16
; void __cdecl dct4x4dc( int16_t d[4][4] )
;-----------------------------------------------------------------------------
x264_dct4x4dc_mmxext:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
mov eax, [esp+ 4]
movq mm0, [eax+ 0]
movq mm1, [eax+ 8]
......@@ -177,7 +172,7 @@ x264_dct4x4dc_mmxext:
MMX_TRANSPOSE mm0, mm2, mm3, mm4, mm1 ; in: mm0, mm2, mm3, mm4 out: mm0, mm4, mm1, mm3
movq mm6, [x264_mmx_1]
movq mm6, [x264_mmx_1 GLOBAL]
paddw mm0, mm6
paddw mm4, mm6
psraw mm0, 1
......@@ -190,6 +185,7 @@ x264_dct4x4dc_mmxext:
movq [eax+16], mm1
psraw mm3, 1
movq [eax+24], mm3
POP_EBX_IF_PIC
ret
cglobal x264_idct4x4dc_mmxext
......@@ -276,6 +272,8 @@ ALIGN 16
; void __cdecl x264_add4x4_idct_mmxext( uint8_t *p_dst, int i_dst, int16_t dct[4][4] )
;-----------------------------------------------------------------------------
x264_add4x4_idct_mmxext:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
; Load dct coeffs
mov eax, [esp+12] ; dct
......@@ -305,13 +303,14 @@ x264_add4x4_idct_mmxext:
MMX_SUMSUB_BADC mm2, mm3, mm4, mm1 ; mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13
MMX_ZERO mm7
movq mm6, [x264_mmx_32]
movq mm6, [x264_mmx_32 GLOBAL]
MMX_STORE_DIFF_4P mm2, mm0, mm6, mm7, [eax]
MMX_STORE_DIFF_4P mm4, mm0, mm6, mm7, [eax+ecx]
MMX_STORE_DIFF_4P mm1, mm0, mm6, mm7, [eax+ecx*2]
MMX_STORE_DIFF_4P mm3, mm0, mm6, mm7, [eax+edx]
POP_EBX_IF_PIC
ret
......@@ -396,12 +395,14 @@ ALIGN 16
; void __cdecl x264_xdct8_mmxext( int16_t dest[8][8] );
;-----------------------------------------------------------------------------
x264_xdct8_mmxext:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
mov eax, [esp+04] ; dest
movq mm5, [x264_mmx_PPNN]
movq mm6, [x264_mmx_PNNP]
movq mm4, [x264_mmx_PPPN]
movq mm7, [x264_mmx_PPNP]
movq mm5, [x264_mmx_PPNN GLOBAL]
movq mm6, [x264_mmx_PNNP GLOBAL]
movq mm4, [x264_mmx_PPPN GLOBAL]
movq mm7, [x264_mmx_PPNP GLOBAL]
;-------------------------------------------------------------------------
; horizontal dct ( compute 1 row at a time -> 8 loops )
......@@ -435,7 +436,7 @@ x264_xdct8_mmxext:
pshufw mm2, mm0, 11001001b ; (low)a1/a3/a0/a2(high)
pshufw mm0, mm0, 10011100b ; (low)a0/a2/a1/a3(high)
pmullw mm2, [x264_mmx_2121]
pmullw mm2, [x264_mmx_2121 GLOBAL]
pmullw mm0, mm5 ; (low)a0/a2/-a1/-a3(high)
psraw mm2, 1 ; (low)a1/a3>>1/a0/a2>>1(high)
paddw mm0, mm2 ; (low)dst0/dst2/dst4/dst6(high)
......@@ -457,6 +458,7 @@ x264_xdct8_mmxext:
%assign disp disp+16
%endrep
POP_EBX_IF_PIC
ret
ALIGN 16
......@@ -549,12 +551,14 @@ ALIGN 16
; void __cdecl x264_xidct8_mmxext( int16_t dest[8][8] );
;-----------------------------------------------------------------------------
x264_xidct8_mmxext:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
mov eax, [esp+04] ; dest
movq mm4, [x264_mmx_PPNN]
movq mm5, [x264_mmx_PNPN]
movq mm6, [x264_mmx_PPNP]
movq mm7, [x264_mmx_PPPN]
movq mm4, [x264_mmx_PPNN GLOBAL]
movq mm5, [x264_mmx_PNPN GLOBAL]
movq mm6, [x264_mmx_PPNP GLOBAL]
movq mm7, [x264_mmx_PPPN GLOBAL]
;-------------------------------------------------------------------------
; horizontal idct ( compute 1 row at a time -> 8 loops )
......@@ -570,7 +574,7 @@ x264_xidct8_mmxext:
punpckhwd mm1, mm2 ; (low)d1,d5,d3,d7(high)
pshufw mm2, mm0, 10110001b ; (low)d4,d0,d6,d2(high)
pmullw mm0, [x264_mmx_p2n2p1p1]; (low)2*d0,-2*d4,d2,d6(high)
pmullw mm0, [x264_mmx_p2n2p1p1 GLOBAL]; (low)2*d0,-2*d4,d2,d6(high)
pmullw mm2, mm6 ; (low)d4,d0,-d6,d2(high)
psraw mm0, 1 ; (low)d0,-d4,d2>>1,d6>>1(high)
paddw mm0, mm2 ; (low)e0,e2,e4,e6(high)
......@@ -605,6 +609,7 @@ x264_xidct8_mmxext:
%assign disp disp+16
%endrep
POP_EBX_IF_PIC
ret
ALIGN 16
......
......@@ -22,14 +22,7 @@
BITS 32
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
SECTION .rodata align=16
pb_01: times 16 db 0x01
......@@ -192,19 +185,19 @@ cglobal x264_deblock_h_chroma_intra_mmxext
pxor mm4, mm2
; b = p0^(q1>>2)
psrlw mm3, 2
pand mm3, [pb_3f]
pand mm3, [pb_3f GLOBAL]
movq mm5, mm1
pxor mm5, mm3
; c = q0^(p1>>2)
psrlw mm0, 2
pand mm0, [pb_3f]
pand mm0, [pb_3f GLOBAL]
movq mm6, mm2
pxor mm6, mm0
; d = (c^b) & ~(b^a) & 1
pxor mm6, mm5
pxor mm5, mm4
pandn mm5, mm6
pand mm5, [pb_01]
pand mm5, [pb_01 GLOBAL]
; delta = (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3
; = (avg(q0, p1>>2) + (d&a))
; - (avg(p0, q1>>2) + (d^(d&a)))
......@@ -234,10 +227,10 @@ cglobal x264_deblock_h_chroma_intra_mmxext
%macro LUMA_Q1_MMX 6
movq %6, mm1
pavgb %6, mm2
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pavgb %2, %6 ; avg(p2,avg(p0,q0))
pxor %6, %3
pand %6, [pb_01] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
movq %6, %1
psubusb %6, %5
paddusb %5, %1
......@@ -254,6 +247,8 @@ ALIGN 16
; void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
x264_deblock_v8_luma_mmxext:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
push edi
push esi
mov edi, [esp+12] ; pix
......@@ -279,7 +274,7 @@ x264_deblock_v8_luma_mmxext:
punpcklbw mm4, mm4
punpcklbw mm4, mm4 ; tc = 4x tc0[1], 4x tc0[0]
movq [esp+8], mm4 ; tc
pcmpgtb mm4, [pb_ff]
pcmpgtb mm4, [pb_ff GLOBAL]
pand mm4, mm7
movq [esp+0], mm4 ; mask
......@@ -289,7 +284,7 @@ x264_deblock_v8_luma_mmxext:
pcmpeqb mm6, mm4
pand mm6, mm4
pand mm4, [esp+8] ; tc
movq mm7, [pb_01]
movq mm7, [pb_01 GLOBAL]
pand mm7, mm6
pand mm6, mm4
paddb mm7, mm4
......@@ -303,18 +298,19 @@ x264_deblock_v8_luma_mmxext:
pand mm6, mm5
movq mm5, [esp+8] ; tc
pand mm5, mm6
pand mm6, [pb_01]
pand mm6, [pb_01 GLOBAL]
paddb mm7, mm6
movq mm3, [edi+esi]
LUMA_Q1_MMX mm3, mm4, [edi+2*esi], [edi+esi], mm5, mm6
DEBLOCK_P0_Q0_MMX
DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
movq [eax+2*esi], mm1
movq [edi], mm2
add esp, 16
pop esi
pop edi
POP_EBX_IF_PIC
ret
......@@ -434,7 +430,8 @@ x264_deblock_v_chroma_mmxext:
movd mm6, [ebx]
punpcklbw mm6, mm6
pand mm7, mm6
DEBLOCK_P0_Q0_MMX
GET_GOT_IN_EBX_IF_PIC ; no need to push ebx, it's already been done
DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
movq [eax+esi], mm1
movq [edi], mm2
......@@ -461,7 +458,7 @@ x264_deblock_h_chroma_mmxext:
movd mm6, [ebx]
punpcklbw mm6, mm6
pand mm7, mm6
DEBLOCK_P0_Q0_MMX
DEBLOCK_P0_Q0_MMX ; XXX: make sure ebx has the GOT in PIC mode
movq mm0, [esp+8]
movq mm3, [esp+0]
......@@ -478,7 +475,7 @@ x264_deblock_h_chroma_mmxext:
%macro CHROMA_INTRA_P0 3
movq mm4, %1
pxor mm4, %3
pand mm4, [pb_01] ; mm4 = (p0^q1)&1
pand mm4, [pb_01 GLOBAL] ; mm4 = (p0^q1)&1
pavgb %1, %3
psubusb %1, mm4
pavgb %1, %2 ; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
......@@ -504,13 +501,16 @@ ALIGN 16
;-----------------------------------------------------------------------------
x264_deblock_v_chroma_intra_mmxext:
CHROMA_V_START
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
movq mm0, [eax]
movq mm1, [eax+esi]
movq mm2, [edi]
movq mm3, [edi+esi]
CHROMA_INTRA_BODY
CHROMA_INTRA_BODY ; XXX: make sure ebx has the GOT in PIC mode
movq [eax+esi], mm1
movq [edi], mm2
POP_EBX_IF_PIC
CHROMA_END
ALIGN 16
......@@ -519,9 +519,13 @@ ALIGN 16
;-----------------------------------------------------------------------------
x264_deblock_h_chroma_intra_mmxext:
CHROMA_H_START
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
TRANSPOSE4x8_LOAD PASS8ROWS(eax, edi, esi, ebp)
CHROMA_INTRA_BODY
CHROMA_INTRA_BODY ; XXX: make sure ebx has the GOT in PIC mode
TRANSPOSE8x4_STORE PASS8ROWS(eax, edi, esi, ebp)
pop ebp
POP_EBX_IF_PIC
pop ebp ; needed because of CHROMA_H_START
POP_EBX_IF_PIC
CHROMA_END
;*****************************************************************************
;* i386inc.asm: h264 encoder library
;*****************************************************************************
;* Copyright (C) 2006 x264 project
;*
;* Author: Sam Hocevar <sam@zoy.org>
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
;*****************************************************************************
BITS 32
;=============================================================================
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%ifdef __PIC__
extern _GLOBAL_OFFSET_TABLE_
%define GLOBAL wrt ..gotpc
%macro GET_GOT_IN_EBX_IF_PIC 0
call %%getgot
%%getgot:
pop ebx
add ebx, _GLOBAL_OFFSET_TABLE_ + $$ - %%getgot wrt ..gotpc
%endmacro
%macro PUSH_EBX_IF_PIC 0
push ebx
%endmacro
%macro POP_EBX_IF_PIC 0
pop ebx
%endmacro
%else
%define GLOBAL
%macro GET_GOT_IN_EBX_IF_PIC 0
%endmacro
%macro PUSH_EBX_IF_PIC 0
%endmacro
%macro POP_EBX_IF_PIC 0
%endmacro
%endif
......@@ -36,14 +36,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
;=============================================================================
; Constants
......@@ -272,18 +265,21 @@ ALIGN 4
%macro BIWEIGHT_START_MMX 0
push edi
push esi
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
mov edi, [esp+12] ; dst
mov esi, [esp+16] ; i_dst
mov edx, [esp+20] ; src
mov ecx, [esp+24] ; i_src
pshufw mm4, [esp+28], 0 ; weight_dst
movq mm5, [pw_64]
psubw mm5, mm4 ; weight_src
movq mm6, [pw_32] ; rounding
pshufw mm4, [esp+28], 0 ; weight_dst
movq mm5, [pw_64 GLOBAL]
psubw mm5, mm4 ; weight_src
movq mm6, [pw_32 GLOBAL] ; rounding
pxor mm7, mm7
%endmacro
%macro BIWEIGHT_END_MMX 0
POP_EBX_IF_PIC
pop esi
pop edi
ret
......@@ -516,12 +512,15 @@ ALIGN 16
x264_mc_chroma_sse:
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
pxor mm3, mm3
pshufw mm5, [esp+20], 0 ; mm5 - dx
pshufw mm6, [esp+24], 0 ; mm6 - dy
movq mm4, [pw_8]
movq mm4, [pw_8 GLOBAL]
movq mm0, mm4
psubw mm4, mm5 ; mm4 - 8-dx
......@@ -557,7 +556,7 @@ ALIGN 4
punpcklbw mm2, mm3
punpcklbw mm1, mm3
paddw mm0, [pw_32]
paddw mm0, [pw_32 GLOBAL]
pmullw mm2, mm5 ; line * cB
pmullw mm1, mm7 ; line * cD
......@@ -588,4 +587,5 @@ ALIGN 4
.finish
pop edi
POP_EBX_IF_PIC
ret
......@@ -24,14 +24,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
;=============================================================================
; Read only data
......@@ -139,9 +132,13 @@ x264_center_filter_mmxext :
push edi
push esi
push ebx
; commented out because it seems useless --sam
;push ebx
PUSH_EBX_IF_PIC
push ebp
GET_GOT_IN_EBX_IF_PIC
mov edx, [esp + 40] ; src_stride
lea edx, [edx + edx + 18 + tbuffer]
sub esp, edx
......@@ -173,11 +170,12 @@ x264_center_filter_mmxext :
sub eax, ecx
mov [esp + tsrc] ,eax ; src - 2 * src_stride
lea ebx, [ecx + ecx * 2] ; 3 * src_stride
; commented out because it seems useless --sam
;lea ebx, [ecx + ecx * 2] ; 3 * src_stride
lea edx, [ecx + ecx * 4] ; 5 * src_stride
pxor mm0, mm0 ; 0 ---> mm0
movq mm7, [mmx_dd_one] ; for rounding
movq mm7, [mmx_dd_one GLOBAL] ; for rounding
loopcy:
......@@ -193,7 +191,7 @@ loopcy:
pshufw mm2, mm1, 0
movq [ebp + 8], mm1
movq [ebp], mm2
paddw mm1, [mmx_dw_one]
paddw mm1, [mmx_dw_one GLOBAL]
psraw mm1, 5
packuswb mm1, mm1
......@@ -207,7 +205,7 @@ loopcx1:
FILT_ALL esi
movq [ebp + 2 * eax], mm1
paddw mm1, [mmx_dw_one]
paddw mm1, [mmx_dw_one GLOBAL]
psraw mm1, 5
packuswb mm1, mm1
movd [edi + eax - 4], mm1
......@@ -222,7 +220,7 @@ loopcx1:
pshufw mm2, mm1, 7
movq [ebp + 2 * eax], mm1
movq [ebp + 2 * eax + 8], mm2
paddw mm1, [mmx_dw_one]
paddw mm1, [mmx_dw_one GLOBAL]
psraw mm1, 5
packuswb mm1, mm1
movd [edi + eax - 4], mm1
......@@ -249,15 +247,15 @@ loopcx2:
paddw mm3, mm4
paddw mm1, mm6
movq mm5, [mmx_dw_20]
movq mm4, [mmx_dw_5]
movq mm5, [mmx_dw_20 GLOBAL]
movq mm4, [mmx_dw_5 GLOBAL]
movq mm6, mm1
pxor mm7, mm7
punpckhwd mm5, mm2
punpcklwd mm4, mm3
punpcklwd mm2, [mmx_dw_20]
punpckhwd mm3, [mmx_dw_5]
punpcklwd mm2, [mmx_dw_20 GLOBAL]
punpckhwd mm3, [mmx_dw_5 GLOBAL]
pcmpgtw mm7, mm1
......@@ -270,8 +268,8 @@ loopcx2:
paddd mm2, mm1
paddd mm3, mm6
paddd mm2, [mmx_dd_one]
paddd mm3, [mmx_dd_one]
paddd mm2, [mmx_dd_one GLOBAL]
paddd mm3, [mmx_dd_one GLOBAL]
psrad mm2, 10
psrad mm3, 10
......@@ -297,7 +295,9 @@ loopcx2:
add esp, [esp + toffset]
pop ebp
pop ebx
; commented out because it seems useless --sam
;pop ebx
POP_EBX_IF_PIC
pop esi
pop edi
......@@ -320,7 +320,10 @@ x264_horizontal_filter_mmxext :
mov esi, [esp + 20] ; src
pxor mm0, mm0
movq mm7, [mmx_dw_one]
PUSH_EBX_IF_PIC
GET_GOT_IN_EBX_IF_PIC
movq mm7, [mmx_dw_one GLOBAL]
POP_EBX_IF_PIC
mov ecx, [esp + 32] ; height
......
......@@ -27,14 +27,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
%macro SAD_INC_2x16P 0
movq mm1, [eax]
......
......@@ -26,15 +26,7 @@ BITS 32
; Macros and other preprocessor constants
;=============================================================================
%macro cglobal 1
%ifdef PREFIX
global _%1
%define %1 _%1
%else
global %1
%endif
%endmacro
%include "i386inc.asm"
%ifdef FORMAT_COFF
SECTION .rodata data
......@@ -409,12 +401,14 @@ x264_pixel_ssd_16x8_sse2:
%endmacro