Commit aa48c1fb authored by David Conrad's avatar David Conrad Committed by Fiona Glaser
Browse files

Fix x264 compilation on Apple GCC

Apple's GCC stupidly ignores the ARM ABI and doesn't give any stack alignment beyond 4.
parent fd1cf294
......@@ -182,7 +182,10 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf )
pf->avg[PIXEL_4x4] = x264_pixel_avg_4x4_neon;
pf->avg[PIXEL_4x2] = x264_pixel_avg_4x2_neon;
// Apple's gcc stupidly cannot align stack variables, and ALIGNED_ARRAY can't work on structs
#ifndef SYS_MACOSX
pf->memcpy_aligned = x264_memcpy_aligned_neon;
#endif
pf->memzero_aligned = x264_memzero_aligned_neon;
pf->mc_chroma = x264_mc_chroma_neon;
......
......@@ -221,7 +221,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
int ref[2];
ALIGNED_8( int16_t mv[2][2] );
ALIGNED_ARRAY_8( int16_t, mv,[2],[2] );
int i_list;
int i8;
const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
......
......@@ -54,12 +54,25 @@
#define ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
#define ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 )
// current arm compilers only maintain 8-byte stack alignment
// and cannot align stack variables to more than 8-bytes
// ARM compiliers don't reliably align stack variables
// - EABI requires only 8 byte stack alignment to be maintained
// - gcc can't align stack variables to more even if the stack were to be correctly aligned outside the function
// - armcc can't either, but is nice enough to actually tell you so
// - Apple gcc only maintains 4 byte alignment
// - llvm can align the stack, but only in svn and (unrelated) it exposes bugs in all released GNU binutils...
#if defined(ARCH_ARM) && defined(SYS_MACOSX)
#define ALIGNED_ARRAY_8( type, name, sub1, ... )\
uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 7]; \
type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+7) & ~7)
#else
#define ALIGNED_ARRAY_8( type, name, sub1, ... )\
ALIGNED_8( type name sub1 __VA_ARGS__ )
#endif
#ifdef ARCH_ARM
#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
ALIGNED_8( uint8_t name##_8 [sizeof(type sub1 __VA_ARGS__) + 8] );\
type (*name) __VA_ARGS__ = (void*)(name##_8 + ((intptr_t)name##_8 & 8))
uint8_t name##_u [sizeof(type sub1 __VA_ARGS__) + 15];\
type (*name) __VA_ARGS__ = (void*)((intptr_t)(name##_u+15) & ~15)
#else
#define ALIGNED_ARRAY_16( type, name, sub1, ... )\
ALIGNED_16( type name sub1 __VA_ARGS__ )
......
......@@ -1585,7 +1585,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
{
ALIGNED_8( uint8_t pix1[16*8] );
ALIGNED_ARRAY_8( uint8_t, pix1,[16*8] );
uint8_t *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
......@@ -1956,7 +1956,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
ALIGNED_8( uint8_t pix[2][8*8] );
ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*8] );
int i, l;
/* XXX Needed for x264_mb_predict_mv */
......@@ -2089,7 +2089,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
ALIGNED_8( uint8_t pix[2][8*16] );
ALIGNED_ARRAY_8( uint8_t, pix,[2],[8*16] );
ALIGNED_4( int16_t mvc[2][2] );
int i, l;
......
......@@ -903,8 +903,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
const uint16_t *p_cost_m1x = m1->p_cost_mv - m1->mvp[0];
const uint16_t *p_cost_m1y = m1->p_cost_mv - m1->mvp[1];
ALIGNED_ARRAY_16( uint8_t, pixy_buf,[2],[9][16*16] );
ALIGNED_8( uint8_t pixu_buf[2][9][8*8] );
ALIGNED_8( uint8_t pixv_buf[2][9][8*8] );
ALIGNED_ARRAY_8( uint8_t, pixu_buf,[2],[9][8*8] );
ALIGNED_ARRAY_8( uint8_t, pixv_buf,[2],[9][8*8] );
uint8_t *src0[9];
uint8_t *src1[9];
uint8_t *pix = &h->mb.pic.p_fdec[0][(i8>>1)*8*FDEC_STRIDE+(i8&1)*8];
......
......@@ -122,7 +122,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, ui
int i_lines = fenc->i_lines_lowres;
int i_width = fenc->i_width_lowres;
uint8_t *fenc_plane = fenc->lowres[0];
ALIGNED_8( uint8_t buf[8*8] );
ALIGNED_ARRAY_8( uint8_t, buf,[8*8] );
int pixoff = 0;
int i_mb = 0;
......@@ -259,7 +259,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
ALIGNED_ARRAY_8( uint8_t, pix1,[9*FDEC_STRIDE] );
uint8_t *pix2 = pix1+8;
x264_me_t m[2];
int i_bcost = COST_MAX;
......@@ -340,7 +340,7 @@ static int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
if( b_bidir )
{
int16_t *mvr = fref1->lowres_mvs[0][p1-p0-1][i_mb_xy];
ALIGNED_8( int16_t dmv[2][2] );
ALIGNED_ARRAY_8( int16_t, dmv,[2],[2] );
m[1].i_pixel = PIXEL_8x8;
m[1].p_cost_mv = a->p_cost_mv;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment