Commit 542027fa authored by Loren Merritt's avatar Loren Merritt

cosmetics in DECLARE_ALIGNED

parent 52fb8334
......@@ -27,7 +27,7 @@
typedef struct
{
/* context */
DECLARE_ALIGNED( uint8_t, state[460], 16 );
DECLARE_ALIGNED_16( uint8_t state[460] );
int f8_bits_encoded; // only if using x264_cabac_size_decision()
......
......@@ -336,11 +336,11 @@ struct x264_t
/* Current MB DCT coeffs */
struct
{
DECLARE_ALIGNED( int16_t, luma16x16_dc[16], 16 );
DECLARE_ALIGNED( int16_t, chroma_dc[2][4], 16 );
DECLARE_ALIGNED_16( int16_t luma16x16_dc[16] );
DECLARE_ALIGNED_16( int16_t chroma_dc[2][4] );
// FIXME share memory?
DECLARE_ALIGNED( int16_t, luma8x8[4][64], 16 );
DECLARE_ALIGNED( int16_t, luma4x4[16+8][16], 16 );
DECLARE_ALIGNED_16( int16_t luma8x8[4][64] );
DECLARE_ALIGNED_16( int16_t luma4x4[16+8][16] );
} dct;
/* MB table and cache for current frame/mb */
......@@ -430,14 +430,14 @@ struct x264_t
/* space for p_fenc and p_fdec */
#define FENC_STRIDE 16
#define FDEC_STRIDE 32
DECLARE_ALIGNED( uint8_t, fenc_buf[24*FENC_STRIDE], 16 );
DECLARE_ALIGNED( uint8_t, fdec_buf[27*FDEC_STRIDE], 16 );
DECLARE_ALIGNED_16( uint8_t fenc_buf[24*FENC_STRIDE] );
DECLARE_ALIGNED_16( uint8_t fdec_buf[27*FDEC_STRIDE] );
/* i4x4 and i8x8 backup data, for skipping the encode stage when possible */
DECLARE_ALIGNED( uint8_t, i4x4_fdec_buf[16*16], 16 );
DECLARE_ALIGNED( uint8_t, i8x8_fdec_buf[16*16], 16 );
DECLARE_ALIGNED( int16_t, i8x8_dct_buf[3][64], 16 );
DECLARE_ALIGNED( int16_t, i4x4_dct_buf[15][16], 16 );
DECLARE_ALIGNED_16( uint8_t i4x4_fdec_buf[16*16] );
DECLARE_ALIGNED_16( uint8_t i8x8_fdec_buf[16*16] );
DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
/* pointer over mb of the frame to be compressed */
uint8_t *p_fenc[3];
......@@ -464,16 +464,16 @@ struct x264_t
uint8_t non_zero_count[X264_SCAN8_SIZE];
/* -1 if unused, -2 if unavailable */
DECLARE_ALIGNED( int8_t, ref[2][X264_SCAN8_SIZE], 4 );
DECLARE_ALIGNED_4( int8_t ref[2][X264_SCAN8_SIZE] );
/* 0 if not available */
DECLARE_ALIGNED( int16_t, mv[2][X264_SCAN8_SIZE][2], 16 );
DECLARE_ALIGNED( int16_t, mvd[2][X264_SCAN8_SIZE][2], 4 );
DECLARE_ALIGNED_16( int16_t mv[2][X264_SCAN8_SIZE][2] );
DECLARE_ALIGNED_4( int16_t mvd[2][X264_SCAN8_SIZE][2] );
/* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
DECLARE_ALIGNED( int8_t, skip[X264_SCAN8_SIZE], 4 );
DECLARE_ALIGNED_4( int8_t skip[X264_SCAN8_SIZE] );
DECLARE_ALIGNED( int16_t, direct_mv[2][X264_SCAN8_SIZE][2], 16 ) ;
DECLARE_ALIGNED_16( int16_t direct_mv[2][X264_SCAN8_SIZE][2] );
int8_t direct_ref[2][X264_SCAN8_SIZE];
int pskip_mv[2];
......
......@@ -660,7 +660,7 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
const int i_ref1 = h->mb.cache.ref[1][i8];
const int mvx1 = x264_clip3( h->mb.cache.mv[1][i8][0], h->mb.mv_min[0], h->mb.mv_max[0] );
int mvy1 = x264_clip3( h->mb.cache.mv[1][i8][1], h->mb.mv_min[1], h->mb.mv_max[1] );
DECLARE_ALIGNED( uint8_t, tmp[16*16], 16 );
DECLARE_ALIGNED_16( uint8_t tmp[16*16] );
int i_mode = x264_size2pixel[height][width];
x264_mb_mc_0xywh( h, x, y, width, height );
......
......@@ -58,10 +58,13 @@
#endif
#ifdef _MSC_VER
#define DECLARE_ALIGNED( type, var, n ) __declspec(align(n)) type var
#define DECLARE_ALIGNED( var, n ) __declspec(align(n)) var
#else
#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n)))
#define DECLARE_ALIGNED( var, n ) var __attribute__((aligned(n)))
#endif
#define DECLARE_ALIGNED_16( var ) DECLARE_ALIGNED( var, 16 )
#define DECLARE_ALIGNED_8( var ) DECLARE_ALIGNED( var, 8 )
#define DECLARE_ALIGNED_4( var ) DECLARE_ALIGNED( var, 4 )
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
#define UNUSED __attribute__((unused))
......
......@@ -48,7 +48,7 @@
static inline void write16x4(uint8_t *dst, int dst_stride,
register vec_u8_t r0, register vec_u8_t r1,
register vec_u8_t r2, register vec_u8_t r3) {
DECLARE_ALIGNED(unsigned char, result[64], 16);
DECLARE_ALIGNED_16(unsigned char result[64]);
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
int int_dst_stride = dst_stride/4;
......@@ -229,7 +229,7 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
}
#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
DECLARE_ALIGNED(unsigned char, temp[16], 16); \
DECLARE_ALIGNED_16(unsigned char temp[16]); \
register vec_u8_t alphavec; \
register vec_u8_t betavec; \
register vec_u8_t mask; \
......
......@@ -262,7 +262,7 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
int d8x = mvx & 0x07;
int d8y = mvy & 0x07;
DECLARE_ALIGNED( uint16_t, coeff[4], 16 );
DECLARE_ALIGNED_16( uint16_t coeff[4] );
coeff[0] = (8-d8x)*(8-d8y);
coeff[1] = d8x *(8-d8y);
coeff[2] = (8-d8x)*d8y;
......@@ -328,7 +328,7 @@ static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
int d8x = mvx & 0x07;
int d8y = mvy & 0x07;
DECLARE_ALIGNED( uint16_t, coeff[4], 16 );
DECLARE_ALIGNED_16( uint16_t coeff[4] );
coeff[0] = (8-d8x)*(8-d8y);
coeff[1] = d8x *(8-d8y);
coeff[2] = (8-d8x)*d8y;
......
......@@ -38,7 +38,7 @@ static int name( uint8_t *pix1, int i_pix1, \
uint8_t *pix2, int i_pix2 ) \
{ \
int y; \
DECLARE_ALIGNED( int, sum, 16 ); \
DECLARE_ALIGNED_16( int sum ); \
\
LOAD_ZERO; \
PREP_LOAD; \
......@@ -121,7 +121,7 @@ PIXEL_SAD_ALTIVEC( pixel_sad_8x8_altivec, 8, 8, 2s, 1 )
static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v;
......@@ -161,7 +161,7 @@ static int pixel_satd_4x4_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v;
......@@ -211,7 +211,7 @@ static int pixel_satd_4x8_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
......@@ -260,7 +260,7 @@ static int pixel_satd_8x4_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
......@@ -315,7 +315,7 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
PREP_DIFF;
vec_s16_t diff0v, diff1v, diff2v, diff3v,
......@@ -395,7 +395,7 @@ static int pixel_satd_8x16_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
LOAD_ZERO;
PREP_LOAD;
......@@ -478,7 +478,7 @@ static int pixel_satd_16x8_altivec( uint8_t *pix1, int i_pix1,
static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
uint8_t *pix2, int i_pix2 )
{
DECLARE_ALIGNED( int, i_satd, 16 );
DECLARE_ALIGNED_16( int i_satd );
LOAD_ZERO;
PREP_LOAD;
......@@ -604,10 +604,10 @@ static int pixel_satd_16x16_altivec( uint8_t *pix1, int i_pix1,
static void pixel_sad_x4_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED( int, sum3, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
......@@ -730,9 +730,9 @@ static void pixel_sad_x4_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *p
static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
......@@ -832,10 +832,10 @@ static void pixel_sad_x3_16x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *p
static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED( int, sum3, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
......@@ -958,9 +958,9 @@ static void pixel_sad_x4_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi
static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
......@@ -1061,10 +1061,10 @@ static void pixel_sad_x3_16x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi
static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED( int, sum3, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
......@@ -1184,9 +1184,9 @@ static void pixel_sad_x4_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi
static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
......@@ -1289,10 +1289,10 @@ static void pixel_sad_x3_8x16_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pi
static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, uint8_t *pix3, int i_stride, int scores[4] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED( int, sum3, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
DECLARE_ALIGNED_16( int sum3 );
int y;
LOAD_ZERO;
......@@ -1414,9 +1414,9 @@ static void pixel_sad_x4_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix
static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix1, uint8_t *pix2, int i_stride, int scores[3] )
{
DECLARE_ALIGNED( int, sum0, 16 );
DECLARE_ALIGNED( int, sum1, 16 );
DECLARE_ALIGNED( int, sum2, 16 );
DECLARE_ALIGNED_16( int sum0 );
DECLARE_ALIGNED_16( int sum1 );
DECLARE_ALIGNED_16( int sum2 );
int y;
LOAD_ZERO;
......@@ -1523,7 +1523,7 @@ static void pixel_sad_x3_8x8_altivec( uint8_t *fenc, uint8_t *pix0, uint8_t *pix
static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
uint8_t *pix2, int i_stride_pix2)
{
DECLARE_ALIGNED( int, sum, 16 );
DECLARE_ALIGNED_16( int sum );
int y;
LOAD_ZERO;
......@@ -1607,7 +1607,7 @@ static int pixel_ssd_16x16_altivec ( uint8_t *pix1, int i_stride_pix1,
static int pixel_ssd_8x8_altivec ( uint8_t *pix1, int i_stride_pix1,
uint8_t *pix2, int i_stride_pix2)
{
DECLARE_ALIGNED( int, sum, 16 );
DECLARE_ALIGNED_16( int sum );
int y;
LOAD_ZERO;
......@@ -1801,7 +1801,7 @@ static void ssim_4x4x2_core_altivec( const uint8_t *pix1, int stride1,
const uint8_t *pix2, int stride2,
int sums[2][4] )
{
DECLARE_ALIGNED( int, temp[4], 16 );
DECLARE_ALIGNED_16( int temp[4] );
int y;
vec_u8_t pix1v, pix2v;
......
......@@ -455,7 +455,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )
PREDICT_8x8_LOAD_TOP\
PREDICT_8x8_LOAD_LEFT\
int t;\
DECLARE_ALIGNED( int16_t, sa8d_1d[2][8], 16 );\
DECLARE_ALIGNED_16( int16_t sa8d_1d[2][8] );\
SUMSUB(l0,l4,l1,l5,l2,l6,l3,l7);\
SUMSUB(l0,l2,l1,l3,l4,l6,l5,l7);\
SUMSUB(l0,l1,l2,l3,l4,l5,l6,l7);\
......
......@@ -45,7 +45,7 @@ typedef struct
/* 8x8 */
int i_cost8x8;
/* [ref][0] is 16x16 mv, [ref][1..4] are 8x8 mv from partition [0..3] */
DECLARE_ALIGNED( int, mvc[32][5][2], 8 );
DECLARE_ALIGNED_8( int mvc[32][5][2] );
x264_me_t me8x8[4];
/* Sub 4x4 */
......@@ -586,7 +586,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
/* 8x8 prediction selection */
if( flags & X264_ANALYSE_I8x8 )
{
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
DECLARE_ALIGNED_16( uint8_t edge[33] );
x264_pixel_cmp_t sa8d = (*h->pixf.mbcmp == *h->pixf.sad) ? h->pixf.sad[PIXEL_8x8] : h->pixf.sa8d[PIXEL_8x8];
int i_satd_thresh = a->b_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
int i_cost = 0;
......@@ -857,7 +857,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
}
else if( h->mb.i_type == I_8x8 )
{
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
DECLARE_ALIGNED_16( uint8_t edge[33] );
for( idx = 0; idx < 4; idx++ )
{
uint64_t pels_h = 0;
......@@ -1166,7 +1166,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
DECLARE_ALIGNED( int, mvc[3][2], 8 );
DECLARE_ALIGNED_8( int mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
......@@ -1216,7 +1216,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
{
x264_me_t m;
uint8_t **p_fenc = h->mb.pic.p_fenc;
DECLARE_ALIGNED( int, mvc[3][2], 8 );
DECLARE_ALIGNED_8( int mvc[3][2] );
int i, j;
/* XXX Needed for x264_mb_predict_mv */
......@@ -1263,7 +1263,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
static int x264_mb_analyse_inter_p4x4_chroma( x264_t *h, x264_mb_analysis_t *a, uint8_t **p_fref, int i8x8, int pixel )
{
DECLARE_ALIGNED( uint8_t, pix1[16*8], 8 );
DECLARE_ALIGNED_8( uint8_t pix1[16*8] );
uint8_t *pix2 = pix1+8;
const int i_stride = h->mb.pic.i_stride[1];
const int or = 4*(i8x8&1) + 2*(i8x8&2)*i_stride;
......@@ -1443,8 +1443,8 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
DECLARE_ALIGNED( uint8_t, pix1[16*16], 16 );
DECLARE_ALIGNED( uint8_t, pix2[16*16], 16 );
DECLARE_ALIGNED_16( uint8_t pix1[16*16] );
DECLARE_ALIGNED_16( uint8_t pix2[16*16] );
uint8_t *src2;
int stride2 = 16;
int weight;
......@@ -1655,7 +1655,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
DECLARE_ALIGNED( uint8_t, pix[2][8*8], 8 );
DECLARE_ALIGNED_8( uint8_t pix[2][8*8] );
int i, l;
/* XXX Needed for x264_mb_predict_mv */
......@@ -1719,8 +1719,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
DECLARE_ALIGNED( uint8_t, pix[2][16*8], 16 );
DECLARE_ALIGNED( int, mvc[2][2], 8 );
DECLARE_ALIGNED_16( uint8_t pix[2][16*8] );
DECLARE_ALIGNED_8( int mvc[2][2] );
int i, l;
h->mb.i_partition = D_16x8;
......@@ -1788,8 +1788,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
uint8_t **p_fref[2] =
{ h->mb.pic.p_fref[0][a->l0.i_ref],
h->mb.pic.p_fref[1][a->l1.i_ref] };
DECLARE_ALIGNED( uint8_t, pix[2][8*16], 8 );
DECLARE_ALIGNED( int, mvc[2][2], 8 );
DECLARE_ALIGNED_8( uint8_t pix[2][8*16] );
DECLARE_ALIGNED_8( int mvc[2][2] );
int i, l;
h->mb.i_partition = D_8x16;
......
......@@ -85,7 +85,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
int y = 4 * block_idx_y[idx];
uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
if( h->mb.b_lossless )
{
......@@ -113,7 +113,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
int y = 8 * (idx>>1);
uint8_t *p_src = &h->mb.pic.p_fenc[0][x+y*FENC_STRIDE];
uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
......@@ -132,7 +132,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
uint8_t *p_src = h->mb.pic.p_fenc[0];
uint8_t *p_dst = h->mb.pic.p_fdec[0];
DECLARE_ALIGNED( int16_t, dct4x4[16+1][4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[16+1][4][4] );
int i;
......@@ -195,8 +195,8 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
int i_decimate_score = 0;
DECLARE_ALIGNED( int16_t, dct2x2[2][2] , 16 );
DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
if( h->mb.b_lossless )
{
......@@ -366,7 +366,7 @@ void x264_macroblock_encode( x264_t *h )
}
else if( h->mb.i_type == I_8x8 )
{
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
DECLARE_ALIGNED_16( uint8_t edge[33] );
h->mb.b_transform_8x8 = 1;
/* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */
if( h->mb.i_skip_intra )
......@@ -431,7 +431,7 @@ void x264_macroblock_encode( x264_t *h )
}
else if( h->mb.b_transform_8x8 )
{
DECLARE_ALIGNED( int16_t, dct8x8[4][8][8], 16 );
DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
int nnz8x8[4] = {1,1,1,1};
b_decimate &= !h->mb.b_trellis; // 8x8 trellis is inherently optimal decimation
h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
......@@ -476,7 +476,7 @@ void x264_macroblock_encode( x264_t *h )
}
else
{
DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
int nnz8x8[4] = {1,1,1,1};
h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.p_fdec[0] );
......@@ -616,9 +616,9 @@ void x264_macroblock_encode( x264_t *h )
*****************************************************************************/
int x264_macroblock_probe_skip( x264_t *h, const int b_bidir )
{
DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
DECLARE_ALIGNED( int16_t, dctscan[16], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
DECLARE_ALIGNED_16( int16_t dct2x2[2][2] );
DECLARE_ALIGNED_16( int16_t dctscan[16] );
int i_qp = h->mb.i_qp;
int mvp[2];
......@@ -786,7 +786,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
if( h->mb.b_transform_8x8 )
{
DECLARE_ALIGNED( int16_t, dct8x8[8][8], 16 );
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
......@@ -805,7 +805,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
else
{
int i4;
DECLARE_ALIGNED( int16_t, dct4x4[4][4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
h->quantf.quant_4x4( dct4x4[0], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
h->quantf.quant_4x4( dct4x4[1], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
......@@ -836,7 +836,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
for( ch = 0; ch < 2; ch++ )
{
DECLARE_ALIGNED( int16_t, dct4x4[4][4], 16 );
DECLARE_ALIGNED_16( int16_t dct4x4[4][4] );
p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
......
......@@ -161,7 +161,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int
int bpred_mx = 0, bpred_my = 0, bpred_cost = COST_MAX;
int omx, omy, pmx, pmy;
uint8_t *p_fref = m->p_fref[0];
DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int i, j;
int dir;
......@@ -462,8 +462,8 @@ me_hex2:
* because sum(abs(diff)) >= abs(diff(sum)). */
const int stride = m->i_stride[0];
uint16_t *sums_base = m->integral;
DECLARE_ALIGNED( static uint8_t, zero[16*16], 16 ) = {0,};
DECLARE_ALIGNED( int, enc_dc[4], 16 );
DECLARE_ALIGNED_16( static uint8_t zero[16*16] );
DECLARE_ALIGNED_16( int enc_dc[4] );
int sad_size = i_pixel <= PIXEL_8x8 ? PIXEL_8x8 : PIXEL_4x4;
int delta = x264_pixel_size[sad_size].w;
int16_t xs_buf[64];
......@@ -671,7 +671,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
const int i_pixel = m->i_pixel;
const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
DECLARE_ALIGNED( uint8_t, pix[2][32*18], 16 ); // really 17x17, but round up for alignment
DECLARE_ALIGNED_16( uint8_t pix[2][32*18] ); // really 17x17, but round up for alignment
int omx, omy;
int i;
......@@ -822,9 +822,9 @@ int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight
const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 );
DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 );
DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
DECLARE_ALIGNED_16( uint8_t pix0[9][16*16] );
DECLARE_ALIGNED_16( uint8_t pix1[9][16*16] );
DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int bm0x = m0->mv[0], om0x = bm0x;
int bm0y = m0->mv[1], om0y = bm0y;
int bm1x = m1->mv[0], om1x = bm1x;
......@@ -912,7 +912,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
const int bh = x264_pixel_size[m->i_pixel].h>>2;
const int i_pixel = m->i_pixel;
DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX;
int bmx = m->mv[0];
int bmy = m->mv[1];
......
......@@ -44,7 +44,7 @@ typedef struct
/* output */
int cost_mv; /* lambda * nbits for the chosen mv */
int cost; /* satd + lambda * nbits */
DECLARE_ALIGNED( int, mv[2], 8 );
DECLARE_ALIGNED_8( int mv[2] );
} x264_me_t;
void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
......
......@@ -54,7 +54,7 @@ int x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * ( i_mb_x + i_mb_y * i_stride );
DECLARE_ALIGNED( uint8_t, pix1[9*FDEC_STRIDE], 8 );
DECLARE_ALIGNED_8( uint8_t pix1[9*FDEC_STRIDE] );
uint8_t *pix2 = pix1+8;
x264_me_t m[2];
int i_bcost = COST_MAX;
......@@ -218,7 +218,7 @@ lowres_intra_mb:
if( i_icost < i_bcost * 2 )
{
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
DECLARE_ALIGNED_16( uint8_t edge[33] );
x264_predict_8x8_filter( pix, edge, ALL_NEIGHBORS, ALL_NEIGHBORS );
for( i=3; i<9; i++ )
{
......
......@@ -3,8 +3,6 @@
#include <gtk/gtk.h>
#define DECLARE_ALIGNED( type, var, n ) type var __attribute__((aligned(n)))
#include "../x264.h"
#include "../common/common.h"
......
......@@ -34,7 +34,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
x264_predict_t predict_8x8c[4+3];
x264_predict_t predict_4x4[9+3];
x264_predict8x8_t predict_8x8[9+3];
DECLARE_ALIGNED( uint8_t, edge[33], 16 );
DECLARE_ALIGNED_16( uint8_t edge[33] );
uint16_t cost_mv[32];
int ret = 0, ok, used_asm;
int i, j;
......@@ -160,8 +160,8 @@ static int check_pixel( int cpu_ref, int cpu_new )
for( i=0; i<100 && ok; i++ )
if( pixel_asm.ads[i&3] != pixel_ref.ads[i&3] )
{
DECLARE_ALIGNED( uint16_t, sums[72], 16 );