Commit b5bc5d69 authored by Henrik Gramner's avatar Henrik Gramner Committed by Anton Mitrofanov

x86: Perform stack realignment in C instead of assembly

Simplifies a lot of code and avoids having to export public asm functions.

Note that the force_align_arg_pointer function attribute is broken in clang
versions prior to 6.0.1 which may result in crashes, so make sure to either
use a newer clang version or a different compiler.
parent 34c06d1c
......@@ -196,7 +196,7 @@ error:
/****************************************************************************
* x264_picture_init:
****************************************************************************/
static void picture_init( x264_picture_t *pic )
REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
......@@ -204,15 +204,10 @@ static void picture_init( x264_picture_t *pic )
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
void x264_picture_init( x264_picture_t *pic )
{
x264_stack_align( picture_init, pic );
}
/****************************************************************************
* x264_picture_alloc:
****************************************************************************/
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
......@@ -243,7 +238,7 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
picture_init( pic );
x264_picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
......@@ -265,15 +260,10 @@ static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_hei
return 0;
}
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
static void picture_clean( x264_picture_t *pic )
REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
......@@ -281,15 +271,10 @@ static void picture_clean( x264_picture_t *pic )
memset( pic, 0, sizeof( x264_picture_t ) );
}
void x264_picture_clean( x264_picture_t *pic )
{
x264_stack_align( picture_clean, pic );
}
/****************************************************************************
* x264_param_default:
****************************************************************************/
static void param_default( x264_param_t *param )
REALIGN_STACK void x264_param_default( x264_param_t *param )
{
/* */
memset( param, 0, sizeof( x264_param_t ) );
......@@ -434,11 +419,6 @@ static void param_default( x264_param_t *param )
param->i_avcintra_flavor = X264_AVCINTRA_FLAVOR_PANASONIC;
}
void x264_param_default( x264_param_t *param )
{
x264_stack_align( param_default, param );
}
static int param_apply_preset( x264_param_t *param, const char *preset )
{
char *end;
......@@ -656,9 +636,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
return 0;
}
static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
REALIGN_STACK int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
param_default( param );
x264_param_default( param );
if( preset && param_apply_preset( param, preset ) < 0 )
return -1;
......@@ -667,12 +647,7 @@ static int param_default_preset( x264_param_t *param, const char *preset, const
return 0;
}
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
return x264_stack_align( param_default_preset, param, preset, tune );
}
static void param_apply_fastfirstpass( x264_param_t *param )
REALIGN_STACK void x264_param_apply_fastfirstpass( x264_param_t *param )
{
/* Set faster options in case of turbo firstpass. */
if( param->rc.b_stat_write && !param->rc.b_stat_read )
......@@ -687,11 +662,6 @@ static void param_apply_fastfirstpass( x264_param_t *param )
}
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
{
x264_stack_align( param_apply_fastfirstpass, param );
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
......@@ -709,7 +679,7 @@ static int profile_string_to_int( const char *str )
return -1;
}
static int param_apply_profile( x264_param_t *param, const char *profile )
REALIGN_STACK int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
......@@ -776,11 +746,6 @@ static int param_apply_profile( x264_param_t *param, const char *profile )
return 0;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
return x264_stack_align( param_apply_profile, param, profile );
}
static int parse_enum( const char *arg, const char * const *names, int *dst )
{
for( int i = 0; names[i]; i++ )
......@@ -842,7 +807,7 @@ static double atof_internal( const char *str, int *b_error )
#define atoi(str) atoi_internal( str, &b_error )
#define atof(str) atof_internal( str, &b_error )
static int param_parse( x264_param_t *p, const char *name, const char *value )
REALIGN_STACK int x264_param_parse( x264_param_t *p, const char *name, const char *value )
{
char *name_buf = NULL;
int b_error = 0;
......@@ -1343,11 +1308,6 @@ static int param_parse( x264_param_t *p, const char *name, const char *value )
return b_error ? errortype : 0;
}
int x264_param_parse( x264_param_t *param, const char *name, const char *value )
{
return x264_stack_align( param_parse, param, name, value );
}
/****************************************************************************
* x264_param2string:
****************************************************************************/
......
......@@ -46,23 +46,6 @@ void x264_cpu_sfence( void );
#endif
#define x264_sfence x264_cpu_sfence
/* kludge:
* gcc can't give variables any greater alignment than the stack frame has.
* We need 32 byte alignment for AVX2, so here we make sure that the stack is
* aligned to 32 bytes.
* gcc 4.2 introduced __attribute__((force_align_arg_pointer)) to fix this
* problem, but I don't want to require such a new version.
* aligning to 32 bytes only works if the compiler supports keeping that
* alignment between functions (osdep.h handles manual alignment of arrays
* if it doesn't).
*/
#if HAVE_MMX && (STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4))
intptr_t x264_stack_align( void (*func)(), ... );
#define x264_stack_align(func,...) x264_stack_align((void (*)())func, __VA_ARGS__)
#else
#define x264_stack_align(func,...) func(__VA_ARGS__)
#endif
typedef struct
{
const char *name;
......
......@@ -163,6 +163,12 @@ int x264_is_pipe( const char *path );
#define ALIGNED_ARRAY_64 ALIGNED_ARRAY_16
#endif
#if STACK_ALIGNMENT > 16 || (ARCH_X86 && STACK_ALIGNMENT > 4)
#define REALIGN_STACK __attribute__((force_align_arg_pointer))
#else
#define REALIGN_STACK
#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
#define ALWAYS_INLINE __attribute__((always_inline)) inline
......
......@@ -47,7 +47,7 @@ struct x264_threadpool_t
x264_sync_frame_list_t done; /* list of jobs that have finished processing */
};
static void *threadpool_thread_internal( x264_threadpool_t *pool )
REALIGN_STACK static void *threadpool_thread( x264_threadpool_t *pool )
{
if( pool->init_func )
pool->init_func( pool->init_arg );
......@@ -72,11 +72,6 @@ static void *threadpool_thread_internal( x264_threadpool_t *pool )
return NULL;
}
static void *threadpool_thread( x264_threadpool_t *pool )
{
return (void*)x264_stack_align( threadpool_thread_internal, pool );
}
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg )
{
......
......@@ -78,33 +78,7 @@ cglobal cpu_sfence
sfence
ret
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args)
;-----------------------------------------------------------------------------
cvisible stack_align
mov rax, r0mp
mov r0, r1mp
mov r1, r2mp
mov r2, r3mp
mov r3, r4mp
mov r4, r5mp
push rbp
mov rbp, rsp
%if WIN64
sub rsp, 40 ; shadow space + r4
%endif
and rsp, ~(STACK_ALIGNMENT-1)
%if WIN64
mov [rsp+32], r4
%endif
call rax
leave
ret
%else
%if ARCH_X86_64 == 0
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
; return 0 if unsupported
......@@ -130,24 +104,4 @@ cglobal cpu_cpuid_test
pop ebx
popfd
ret
cvisible stack_align
push ebp
mov ebp, esp
sub esp, 20
and esp, ~(STACK_ALIGNMENT-1)
mov r0, [ebp+12]
mov r1, [ebp+16]
mov r2, [ebp+20]
mov [esp+ 0], r0
mov [esp+ 4], r1
mov [esp+ 8], r2
mov r0, [ebp+24]
mov r1, [ebp+28]
mov [esp+12], r0
mov [esp+16], r1
call [ebp+ 8]
leave
ret
%endif
......@@ -73,7 +73,7 @@ typedef struct x264_api_t
int (*encoder_invalidate_reference)( x264_t *, int64_t pts );
} x264_api_t;
static x264_api_t *encoder_open( x264_param_t *param )
REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param )
{
x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
if( !api )
......@@ -118,82 +118,77 @@ static x264_api_t *encoder_open( x264_param_t *param )
return NULL;
}
return api;
}
x264_t *x264_encoder_open( x264_param_t *param )
{
/* x264_t is opaque */
return (x264_t *)x264_stack_align( encoder_open, param );
return (x264_t *)api;
}
void x264_encoder_close( x264_t *h )
REALIGN_STACK void x264_encoder_close( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->encoder_close, api->x264 );
api->encoder_close( api->x264 );
free( api );
}
void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->nal_encode, api->x264, dst, nal );
api->nal_encode( api->x264, dst, nal );
}
int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_reconfig, api->x264, param );
return api->encoder_reconfig( api->x264, param );
}
void x264_encoder_parameters( x264_t *h, x264_param_t *param )
REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->encoder_parameters, api->x264, param );
api->encoder_parameters( api->x264, param );
}
int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_headers, api->x264, pp_nal, pi_nal );
return api->encoder_headers( api->x264, pp_nal, pi_nal );
}
int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_encode, api->x264, pp_nal, pi_nal, pic_in, pic_out );
return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
}
int x264_encoder_delayed_frames( x264_t *h )
REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_delayed_frames, api->x264 );
return api->encoder_delayed_frames( api->x264 );
}
int x264_encoder_maximum_delayed_frames( x264_t *h )
REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_maximum_delayed_frames, api->x264 );
return api->encoder_maximum_delayed_frames( api->x264 );
}
void x264_encoder_intra_refresh( x264_t *h )
REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
x264_stack_align( api->encoder_intra_refresh, api->x264 );
api->encoder_intra_refresh( api->x264 );
}
int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
{
x264_api_t *api = (x264_api_t *)h;
return x264_stack_align( api->encoder_invalidate_reference, api->x264, pts );
return api->encoder_invalidate_reference( api->x264, pts );
}
......@@ -87,7 +87,7 @@ static void lookahead_slicetype_decide( x264_t *h )
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
}
static void *lookahead_thread_internal( x264_t *h )
REALIGN_STACK static void *lookahead_thread( x264_t *h )
{
while( !h->lookahead->b_exit_thread )
{
......@@ -122,10 +122,6 @@ static void *lookahead_thread_internal( x264_t *h )
return NULL;
}
static void *lookahead_thread( x264_t *h )
{
return (void*)x264_stack_align( lookahead_thread_internal, h );
}
#endif
int x264_lookahead_init( x264_t *h, int i_slicetype_length )
......
......@@ -2914,7 +2914,7 @@ static int check_all_flags( void )
return ret;
}
static int main_internal( int argc, char **argv )
REALIGN_STACK int main( int argc, char **argv )
{
#ifdef _WIN32
/* Disable the Windows Error Reporting dialog */
......@@ -2973,8 +2973,3 @@ static int main_internal( int argc, char **argv )
print_bench();
return 0;
}
int main( int argc, char **argv )
{
return x264_stack_align( main_internal, argc, argv );
}
......@@ -373,7 +373,7 @@ static void print_version_info( void )
#endif
}
static int main_internal( int argc, char **argv )
REALIGN_STACK int main( int argc, char **argv )
{
if( argc == 4 && !strcmp( argv[1], "--autocomplete" ) )
return x264_cli_autocomplete( argv[2], argv[3] );
......@@ -428,11 +428,6 @@ static int main_internal( int argc, char **argv )
return ret;
}
int main( int argc, char **argv )
{
return x264_stack_align( main_internal, argc, argv );
}
static char const *strtable_lookup( const char * const table[], int idx )
{
int i = 0; while( table[i] ) i++;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment