Commit 7737e6ad authored by Henrik Gramner's avatar Henrik Gramner
Browse files

Fix clang stack alignment issues

Clang emits aligned AVX stores for things like zeroing stack-allocated
variables when using -mavx even with -fno-tree-vectorize set which can
result in crashes if this occurs before we've realigned the stack.

Previously we only ensured that the stack was realigned before calling
assembly functions that accesses stack-allocated buffers but this is
not sufficient. Fix the issue by changing the stack realignment to
instead occur immediately in all CLI, API and thread entry points.
parent 26b99cce
......@@ -196,7 +196,7 @@ error:
/****************************************************************************
* x264_picture_init:
****************************************************************************/
void x264_picture_init( x264_picture_t *pic )
static void picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
......@@ -204,10 +204,15 @@ void x264_picture_init( x264_picture_t *pic )
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
void x264_picture_init( x264_picture_t *pic )
{
x264_stack_align( picture_init, pic );
}
/****************************************************************************
* x264_picture_alloc:
****************************************************************************/
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
static int picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
......@@ -237,7 +242,7 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
x264_picture_init( pic );
picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
......@@ -259,10 +264,15 @@ int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_heigh
return 0;
}
int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
return x264_stack_align( picture_alloc, pic, i_csp, i_width, i_height );
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
void x264_picture_clean( x264_picture_t *pic )
static void picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
......@@ -270,10 +280,15 @@ void x264_picture_clean( x264_picture_t *pic )
memset( pic, 0, sizeof( x264_picture_t ) );
}
void x264_picture_clean( x264_picture_t *pic )
{
x264_stack_align( picture_clean, pic );
}
/****************************************************************************
* x264_param_default:
****************************************************************************/
void x264_param_default( x264_param_t *param )
static void param_default( x264_param_t *param )
{
/* */
memset( param, 0, sizeof( x264_param_t ) );
......@@ -416,6 +431,11 @@ void x264_param_default( x264_param_t *param )
param->psz_clbin_file = NULL;
}
void x264_param_default( x264_param_t *param )
{
x264_stack_align( param_default, param );
}
static int param_apply_preset( x264_param_t *param, const char *preset )
{
char *end;
......@@ -643,9 +663,9 @@ static int param_apply_tune( x264_param_t *param, const char *tune )
return 0;
}
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
static int param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
x264_param_default( param );
param_default( param );
if( preset && param_apply_preset( param, preset ) < 0 )
return -1;
......@@ -654,7 +674,12 @@ int x264_param_default_preset( x264_param_t *param, const char *preset, const ch
return 0;
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
int x264_param_default_preset( x264_param_t *param, const char *preset, const char *tune )
{
return x264_stack_align( param_default_preset, param, preset, tune );
}
static void param_apply_fastfirstpass( x264_param_t *param )
{
/* Set faster options in case of turbo firstpass. */
if( param->rc.b_stat_write && !param->rc.b_stat_read )
......@@ -669,6 +694,11 @@ void x264_param_apply_fastfirstpass( x264_param_t *param )
}
}
void x264_param_apply_fastfirstpass( x264_param_t *param )
{
x264_stack_align( param_apply_fastfirstpass, param );
}
static int profile_string_to_int( const char *str )
{
if( !strcasecmp( str, "baseline" ) )
......@@ -686,7 +716,7 @@ static int profile_string_to_int( const char *str )
return -1;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
static int param_apply_profile( x264_param_t *param, const char *profile )
{
if( !profile )
return 0;
......@@ -748,6 +778,11 @@ int x264_param_apply_profile( x264_param_t *param, const char *profile )
return 0;
}
int x264_param_apply_profile( x264_param_t *param, const char *profile )
{
return x264_stack_align( param_apply_profile, param, profile );
}
static int parse_enum( const char *arg, const char * const *names, int *dst )
{
for( int i = 0; names[i]; i++ )
......@@ -809,7 +844,7 @@ static double atof_internal( const char *str, int *b_error )
#define atoi(str) atoi_internal( str, &b_error )
#define atof(str) atof_internal( str, &b_error )
int x264_param_parse( x264_param_t *p, const char *name, const char *value )
static int param_parse( x264_param_t *p, const char *name, const char *value )
{
char *name_buf = NULL;
int b_error = 0;
......@@ -1308,6 +1343,11 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
return b_error ? errortype : 0;
}
int x264_param_parse( x264_param_t *param, const char *name, const char *value )
{
return x264_stack_align( param_parse, param, name, value );
}
/****************************************************************************
* x264_param2string:
****************************************************************************/
......
......@@ -47,7 +47,7 @@ struct x264_threadpool_t
x264_sync_frame_list_t done; /* list of jobs that have finished processing */
};
static void *threadpool_thread( x264_threadpool_t *pool )
static void *threadpool_thread_internal( x264_threadpool_t *pool )
{
if( pool->init_func )
pool->init_func( pool->init_arg );
......@@ -66,12 +66,17 @@ static void *threadpool_thread( x264_threadpool_t *pool )
x264_pthread_mutex_unlock( &pool->run.mutex );
if( !job )
continue;
job->ret = (void*)x264_stack_align( job->func, job->arg ); /* execute the function */
job->ret = job->func( job->arg );
x264_sync_frame_list_push( &pool->done, (void*)job );
}
return NULL;
}
static void *threadpool_thread( x264_threadpool_t *pool )
{
return (void*)x264_stack_align( threadpool_thread_internal, pool );
}
int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
void (*init_func)(void *), void *init_arg )
{
......
......@@ -64,23 +64,42 @@ cglobal cpu_xgetbv
%endif
ret
;-----------------------------------------------------------------------------
; void cpu_emms( void )
;-----------------------------------------------------------------------------
cglobal cpu_emms
emms
ret
;-----------------------------------------------------------------------------
; void cpu_sfence( void )
;-----------------------------------------------------------------------------
cglobal cpu_sfence
sfence
ret
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
; intptr_t stack_align( void (*func)(void*), ... ); (up to 5 args)
;-----------------------------------------------------------------------------
cglobal stack_align
push rbp
mov rbp, rsp
mov rax, r0mp
mov r0, r1mp
mov r1, r2mp
mov r2, r3mp
mov r3, r4mp
mov r4, r5mp
push rbp
mov rbp, rsp
%if WIN64
sub rsp, 40 ; shadow space + r4
%endif
and rsp, ~(STACK_ALIGNMENT-1)
%if WIN64
sub rsp, 32 ; shadow space
mov [rsp+32], r4
%endif
and rsp, ~(STACK_ALIGNMENT-1)
mov rax, r0
mov r0, r1
mov r1, r2
mov r2, r3
call rax
call rax
leave
ret
......@@ -113,33 +132,22 @@ cglobal cpu_cpuid_test
ret
cglobal stack_align
push ebp
mov ebp, esp
sub esp, 12
and esp, ~(STACK_ALIGNMENT-1)
mov ecx, [ebp+8]
mov edx, [ebp+12]
mov [esp], edx
mov edx, [ebp+16]
mov [esp+4], edx
mov edx, [ebp+20]
mov [esp+8], edx
call ecx
push ebp
mov ebp, esp
sub esp, 20
and esp, ~(STACK_ALIGNMENT-1)
mov r0, [ebp+12]
mov r1, [ebp+16]
mov r2, [ebp+20]
mov [esp+ 0], r0
mov [esp+ 4], r1
mov [esp+ 8], r2
mov r0, [ebp+24]
mov r1, [ebp+28]
mov [esp+12], r0
mov [esp+16], r1
call [ebp+ 8]
leave
ret
%endif
;-----------------------------------------------------------------------------
; void cpu_emms( void )
;-----------------------------------------------------------------------------
cglobal cpu_emms
emms
ret
;-----------------------------------------------------------------------------
; void cpu_sfence( void )
;-----------------------------------------------------------------------------
cglobal cpu_sfence
sfence
ret
......@@ -73,7 +73,7 @@ typedef struct x264_api_t
int (*encoder_invalidate_reference)( x264_t *, int64_t pts );
} x264_api_t;
x264_t *x264_encoder_open( x264_param_t *param )
static x264_api_t *encoder_open( x264_param_t *param )
{
x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
if( !api )
......@@ -118,15 +118,20 @@ x264_t *x264_encoder_open( x264_param_t *param )
return NULL;
}
return api;
}
x264_t *x264_encoder_open( x264_param_t *param )
{
/* x264_t is opaque */
return (x264_t *)api;
return (x264_t *)x264_stack_align( encoder_open, param );
}
void x264_encoder_close( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
api->encoder_close( api->x264 );
x264_stack_align( api->encoder_close, api->x264 );
free( api );
}
......@@ -134,61 +139,61 @@ void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
{
x264_api_t *api = (x264_api_t *)h;
api->nal_encode( api->x264, dst, nal );
x264_stack_align( api->nal_encode, api->x264, dst, nal );
}
int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_reconfig( api->x264, param );
return x264_stack_align( api->encoder_reconfig, api->x264, param );
}
void x264_encoder_parameters( x264_t *h, x264_param_t *param )
{
x264_api_t *api = (x264_api_t *)h;
api->encoder_parameters( api->x264, param );
x264_stack_align( api->encoder_parameters, api->x264, param );
}
int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_headers( api->x264, pp_nal, pi_nal );
return x264_stack_align( api->encoder_headers, api->x264, pp_nal, pi_nal );
}
int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
return x264_stack_align( api->encoder_encode, api->x264, pp_nal, pi_nal, pic_in, pic_out );
}
int x264_encoder_delayed_frames( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_delayed_frames( api->x264 );
return x264_stack_align( api->encoder_delayed_frames, api->x264 );
}
int x264_encoder_maximum_delayed_frames( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_maximum_delayed_frames( api->x264 );
return x264_stack_align( api->encoder_maximum_delayed_frames, api->x264 );
}
void x264_encoder_intra_refresh( x264_t *h )
{
x264_api_t *api = (x264_api_t *)h;
api->encoder_intra_refresh( api->x264 );
x264_stack_align( api->encoder_intra_refresh, api->x264 );
}
int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
{
x264_api_t *api = (x264_api_t *)h;
return api->encoder_invalidate_reference( api->x264, pts );
return x264_stack_align( api->encoder_invalidate_reference, api->x264, pts );
}
......@@ -1564,7 +1564,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( h->param.b_cabac )
x264_cabac_init( h );
else
x264_stack_align( x264_cavlc_init, h );
x264_cavlc_init( h );
mbcmp_init( h );
chroma_dsp_init( h );
......@@ -3087,7 +3087,7 @@ static void *slices_write( x264_t *h )
}
}
h->sh.i_last_mb = X264_MIN( h->sh.i_last_mb, last_thread_mb );
if( x264_stack_align( slice_write, h ) )
if( slice_write( h ) )
goto fail;
h->sh.i_first_mb = h->sh.i_last_mb + 1;
// if i_first_mb is not the last mb in a row then go to the next mb in MBAFF order
......@@ -3122,7 +3122,7 @@ static int threaded_slices_write( x264_t *h )
t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1;
}
x264_stack_align( x264_analyse_weight_frame, h, h->mb.i_mb_height*16 + 16 );
x264_analyse_weight_frame( h, h->mb.i_mb_height*16 + 16 );
x264_threads_distribute_ratecontrol( h );
......@@ -3300,7 +3300,7 @@ int x264_encoder_encode( x264_t *h,
return -1;
}
else
x264_stack_align( x264_adaptive_quant_frame, h, fenc, pic_in->prop.quant_offsets );
x264_adaptive_quant_frame( h, fenc, pic_in->prop.quant_offsets );
if( pic_in->prop.quant_offsets_free )
pic_in->prop.quant_offsets_free( pic_in->prop.quant_offsets );
......
......@@ -67,7 +67,7 @@ static void lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
#if HAVE_THREAD
static void lookahead_slicetype_decide( x264_t *h )
{
x264_stack_align( x264_slicetype_decide, h );
x264_slicetype_decide( h );
lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
......@@ -82,12 +82,12 @@ static void lookahead_slicetype_decide( x264_t *h )
/* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
x264_stack_align( x264_slicetype_analyse, h, shift_frames );
x264_slicetype_analyse( h, shift_frames );
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
}
static void *lookahead_thread( x264_t *h )
static void *lookahead_thread_internal( x264_t *h )
{
while( !h->lookahead->b_exit_thread )
{
......@@ -121,6 +121,11 @@ static void *lookahead_thread( x264_t *h )
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
return NULL;
}
static void *lookahead_thread( x264_t *h )
{
return (void*)x264_stack_align( lookahead_thread_internal, h );
}
#endif
int x264_lookahead_init( x264_t *h, int i_slicetype_length )
......@@ -230,14 +235,14 @@ void x264_lookahead_get_frames( x264_t *h )
if( h->frames.current[0] || !h->lookahead->next.i_size )
return;
x264_stack_align( x264_slicetype_decide, h );
x264_slicetype_decide( h );
lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
/* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
x264_stack_align( x264_slicetype_analyse, h, shift_frames );
x264_slicetype_analyse( h, shift_frames );
lookahead_encoder_shift( h );
}
......
......@@ -574,7 +574,7 @@ int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offs
rc->mbtree.qpbuf_pos--;
}
else
x264_stack_align( x264_adaptive_quant_frame, h, frame, quant_offsets );
x264_adaptive_quant_frame( h, frame, quant_offsets );
return 0;
fail:
x264_log( h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n" );
......
......@@ -2913,7 +2913,7 @@ static int check_all_flags( void )
return ret;
}
int main(int argc, char *argv[])
static int main_internal( int argc, char **argv )
{
#ifdef _WIN32
/* Disable the Windows Error Reporting dialog */
......@@ -2973,3 +2973,7 @@ int main(int argc, char *argv[])
return 0;
}
int main( int argc, char **argv )
{
return x264_stack_align( main_internal, argc, argv );
}
......@@ -351,7 +351,7 @@ static void print_version_info( void )
#endif
}
int main( int argc, char **argv )
static int main_internal( int argc, char **argv )
{
x264_param_t param;
cli_opt_t opt = {0};
......@@ -403,6 +403,11 @@ int main( int argc, char **argv )
return ret;
}
int main( int argc, char **argv )
{
return x264_stack_align( main_internal, argc, argv );
}
static char const *strtable_lookup( const char * const table[], int idx )
{
int i = 0; while( table[i] ) i++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment