Commit 9384a738 authored by Henrik Gramner's avatar Henrik Gramner
Browse files

Correctly align buffers for AVX and AVX-512

Fixes segfaults on Windows where the stack is only 16-byte aligned.
parent b00bcafe
...@@ -569,16 +569,16 @@ struct x264_t ...@@ -569,16 +569,16 @@ struct x264_t
ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] ); ALIGNED_64( pixel fdec_buf[54*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */ /* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
ALIGNED_16( pixel i4x4_fdec_buf[16*16] ); ALIGNED_32( pixel i4x4_fdec_buf[16*16] );
ALIGNED_16( pixel i8x8_fdec_buf[16*16] ); ALIGNED_32( pixel i8x8_fdec_buf[16*16] );
ALIGNED_64( dctcoef i8x8_dct_buf[3][64] ); ALIGNED_64( dctcoef i8x8_dct_buf[3][64] );
ALIGNED_64( dctcoef i4x4_dct_buf[15][16] ); ALIGNED_64( dctcoef i4x4_dct_buf[15][16] );
uint32_t i4x4_nnz_buf[4]; uint32_t i4x4_nnz_buf[4];
uint32_t i8x8_nnz_buf[4]; uint32_t i8x8_nnz_buf[4];
/* Psy trellis DCT data */ /* Psy trellis DCT data */
ALIGNED_16( dctcoef fenc_dct8[4][64] ); ALIGNED_64( dctcoef fenc_dct8[4][64] );
ALIGNED_16( dctcoef fenc_dct4[16][16] ); ALIGNED_64( dctcoef fenc_dct4[16][16] );
/* Psy RD SATD/SA8D scores cache */ /* Psy RD SATD/SA8D scores cache */
ALIGNED_64( uint32_t fenc_satd_cache[32] ); ALIGNED_64( uint32_t fenc_satd_cache[32] );
......
...@@ -558,7 +558,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra, ...@@ -558,7 +558,7 @@ static ALWAYS_INLINE const int8_t *predict_4x4_mode_available( int force_intra,
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */ /* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline psy_trellis_init( x264_t *h, int do_both_dct ) static void inline psy_trellis_init( x264_t *h, int do_both_dct )
{ {
ALIGNED_16( static pixel zero[16*FDEC_STRIDE] ) = {0}; ALIGNED_64( static pixel zero[16*FDEC_STRIDE] ) = {0};
if( do_both_dct || h->mb.b_transform_8x8 ) if( do_both_dct || h->mb.b_transform_8x8 )
h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero ); h->dctf.sub16x16_dct8( h->mb.pic.fenc_dct8, h->mb.pic.p_fenc[0], zero );
...@@ -2011,7 +2011,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) ...@@ -2011,7 +2011,7 @@ static void mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
} }
else else
{ {
ALIGNED_ARRAY_32( pixel, pixuv, [2],[16*FENC_STRIDE] ); ALIGNED_ARRAY_64( pixel, pixuv, [2],[16*FENC_STRIDE] );
int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
int v_shift = CHROMA_V_SHIFT; int v_shift = CHROMA_V_SHIFT;
......
...@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2 ...@@ -243,7 +243,7 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
stride <<= b_field; stride <<= b_field;
if( b_chroma ) if( b_chroma )
{ {
ALIGNED_ARRAY_32( pixel, pix,[FENC_STRIDE*16] ); ALIGNED_ARRAY_64( pixel, pix,[FENC_STRIDE*16] );
int chromapix = h->luma2chroma_pixel[PIXEL_16x16]; int chromapix = h->luma2chroma_pixel[PIXEL_16x16];
int shift = 7 - CHROMA_V_SHIFT; int shift = 7 - CHROMA_V_SHIFT;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment