Commit 7cbb27f0 authored by Fiona Glaser's avatar Fiona Glaser

x86: 32-byte align the stack if possible

Avoids the need for manual 32 byte array alignment on compilers that support
-mpreferred-stack-boundary.
parent 30c91f62
......@@ -48,15 +48,17 @@ void x264_cpu_sfence( void );
void x264_cpu_mask_misalign_sse( void );
void x264_safe_intel_cpu_indicator_init( void );
/* kluge:
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
* We need 16 byte alignment for SSE2, so here we make sure that the stack is
* aligned to 16 bytes.
* We need 32 byte alignment for AVX2, so here we make sure that the stack is
* aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* This applies only to x86_32, since other architectures that need alignment
* either have ABIs that ensure aligned stack, or don't support it at all. */
#if ARCH_X86 && HAVE_MMX
* aligning to 32 bytes only works if the compiler supports keeping that
* alignment between functions (osdep.h handles manual alignment of arrays
* if it doesn't).
*/
#if (ARCH_X86 || HAVE_32B_STACK_ALIGNMENT) && HAVE_MMX
int x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
......
......@@ -111,7 +111,13 @@
#define EXPAND(x) x
#if HAVE_32B_STACK_ALIGNMENT
#define ALIGNED_ARRAY_32( type, name, sub1, ... )\
ALIGNED_32( type name sub1 __VA_ARGS__ )
#else
#define ALIGNED_ARRAY_32( ... ) EXPAND( ALIGNED_ARRAY_EMU( 31, __VA_ARGS__ ) )
#endif
#define ALIGNED_ARRAY_64( ... ) EXPAND( ALIGNED_ARRAY_EMU( 63, __VA_ARGS__ ) )
/* For AVX2 */
......
......@@ -66,7 +66,27 @@ cglobal cpu_xgetbv, 3,7
mov [r4], edx
RET
%if ARCH_X86_64 == 0
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
;-----------------------------------------------------------------------------
cglobal stack_align
push rbp
mov rbp, rsp
%if WIN64
sub rsp, 32 ; shadow space
%endif
and rsp, ~31
mov rax, r0
mov r0, r1
mov r1, r2
mov r2, r3
call rax
leave
ret
%else
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
......@@ -94,14 +114,11 @@ cglobal cpu_cpuid_test
popfd
ret
;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
;-----------------------------------------------------------------------------
cglobal stack_align
push ebp
mov ebp, esp
sub esp, 12
and esp, ~15
and esp, ~31
mov ecx, [ebp+8]
mov edx, [ebp+12]
mov [esp], edx
......@@ -168,7 +185,7 @@ cglobal safe_intel_cpu_indicator_init
%if WIN64
sub rsp, 32 ; shadow space
%endif
and rsp, ~15
and rsp, ~31
call intel_cpu_indicator_init
leave
%if ARCH_X86_64
......
......@@ -717,6 +717,10 @@ if [ $asm = auto -a \( $ARCH = X86 -o $ARCH = X86_64 \) ] ; then
exit 1
fi
define HAVE_MMX
if cc_check '' -mpreferred-stack-boundary=5 ; then
CFLAGS="$CFLAGS -mpreferred-stack-boundary=5"
define HAVE_32B_STACK_ALIGNMENT
fi
fi
if [ $asm = auto -a $ARCH = ARM ] ; then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment