Commit 54e784fd authored by Anton Mitrofanov's avatar Anton Mitrofanov Committed by Fiona Glaser

More cosmetics

parent e9970289
...@@ -87,8 +87,8 @@ static void sigill_handler( int sig ) ...@@ -87,8 +87,8 @@ static void sigill_handler( int sig )
#endif #endif
#ifdef HAVE_MMX #ifdef HAVE_MMX
extern int x264_cpu_cpuid_test( void ); int x264_cpu_cpuid_test( void );
extern uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx ); uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
uint32_t x264_cpu_detect( void ) uint32_t x264_cpu_detect( void )
{ {
......
...@@ -295,7 +295,8 @@ int x264_macroblock_cache_allocate( x264_t *h ) ...@@ -295,7 +295,8 @@ int x264_macroblock_cache_allocate( x264_t *h )
} }
return 0; return 0;
fail: return -1; fail:
return -1;
} }
void x264_macroblock_cache_free( x264_t *h ) void x264_macroblock_cache_free( x264_t *h )
{ {
...@@ -348,7 +349,8 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead ) ...@@ -348,7 +349,8 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
CHECKED_MALLOC( h->scratch_buffer, scratch_size ); CHECKED_MALLOC( h->scratch_buffer, scratch_size );
return 0; return 0;
fail: return -1; fail:
return -1;
} }
void x264_macroblock_thread_free( x264_t *h, int b_lookahead ) void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
......
...@@ -97,9 +97,9 @@ static void name( uint8_t *pix1, int i_stride_pix1, \ ...@@ -97,9 +97,9 @@ static void name( uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2, \ uint8_t *pix2, int i_stride_pix2, \
uint8_t *pix3, int i_stride_pix3, int weight ) \ uint8_t *pix3, int i_stride_pix3, int weight ) \
{ \ { \
if( weight == 32 )\ if( weight == 32 ) \
pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \ pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height ); \
else\ else \
pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \ pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, pix3, i_stride_pix3, width, height, weight ); \
} }
PIXEL_AVG_C( pixel_avg_16x16, 16, 16 ) PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
......
...@@ -394,7 +394,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[ ...@@ -394,7 +394,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref]; int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
int i = 0; int i = 0;
#define SET_MVP(mvp)\ #define SET_MVP(mvp) \
{ \ { \
CP32( mvc[i], mvp ); \ CP32( mvc[i], mvp ); \
i++; \ i++; \
...@@ -445,13 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[ ...@@ -445,13 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
if( h->sh.b_mbaff && field^(i_ref&1) ) if( h->sh.b_mbaff && field^(i_ref&1) )
refpoc += h->sh.i_delta_poc_bottom; refpoc += h->sh.i_delta_poc_bottom;
#define SET_TMVP( dx, dy )\ #define SET_TMVP( dx, dy ) \
{ \ { \
int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \ int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field];\ int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field]; \
mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8;\ mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8; \
mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8;\ mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8; \
i++;\ i++; \
} }
SET_TMVP(0,0); SET_TMVP(0,0);
......
...@@ -205,7 +205,7 @@ void x264_sub8x8_dct8_altivec( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 ) ...@@ -205,7 +205,7 @@ void x264_sub8x8_dct8_altivec( int16_t dct[8][8], uint8_t *pix1, uint8_t *pix2 )
vec_st( dct_tr1v, 16, (signed short *)dct ); vec_st( dct_tr1v, 16, (signed short *)dct );
vec_st( dct_tr2v, 32, (signed short *)dct ); vec_st( dct_tr2v, 32, (signed short *)dct );
vec_st( dct_tr3v, 48, (signed short *)dct ); vec_st( dct_tr3v, 48, (signed short *)dct );
vec_st( dct_tr4v, 64, (signed short *)dct ); vec_st( dct_tr4v, 64, (signed short *)dct );
vec_st( dct_tr5v, 80, (signed short *)dct ); vec_st( dct_tr5v, 80, (signed short *)dct );
vec_st( dct_tr6v, 96, (signed short *)dct ); vec_st( dct_tr6v, 96, (signed short *)dct );
......
...@@ -291,8 +291,8 @@ static void mc_chroma_2xh( uint8_t *dst, int i_dst_stride, ...@@ -291,8 +291,8 @@ static void mc_chroma_2xh( uint8_t *dst, int i_dst_stride,
} }
#define DO_PROCESS_W4( a ) \ #define DO_PROCESS_W4( a ) \
dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \ dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B ) dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride, static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
...@@ -369,10 +369,10 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride, ...@@ -369,10 +369,10 @@ static void mc_chroma_altivec_4xh( uint8_t *dst, int i_dst_stride,
} }
} }
#define DO_PROCESS_W8( a ) \ #define DO_PROCESS_W8( a ) \
src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \ src##a##v_16A = vec_u8_to_u16( src##a##v_8A ); \
src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \ src##a##v_16B = vec_u8_to_u16( src##a##v_8B ); \
dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \ dstv_16A = vec_mladd( src##a##v_16A, coeff##a##v, dstv_16A ); \
dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B ) dstv_16B = vec_mladd( src##a##v_16B, coeff##a##v, dstv_16B )
static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride, static void mc_chroma_altivec_8xh( uint8_t *dst, int i_dst_stride,
......
...@@ -113,13 +113,13 @@ typedef union { ...@@ -113,13 +113,13 @@ typedef union {
vec_u8_t _hv, _lv vec_u8_t _hv, _lv
#define PREP_LOAD_SRC( src ) \ #define PREP_LOAD_SRC( src ) \
vec_u8_t _##src##_ = vec_lvsl(0, src) vec_u8_t _##src##_ = vec_lvsl(0, src)
#define VEC_LOAD_G( p, v, n, t ) \ #define VEC_LOAD_G( p, v, n, t ) \
_hv = vec_ld( 0, p ); \ _hv = vec_ld( 0, p ); \
v = (t) vec_lvsl( 0, p ); \ v = (t) vec_lvsl( 0, p ); \
_lv = vec_ld( n - 1, p ); \ _lv = vec_ld( n - 1, p ); \
v = (t) vec_perm( _hv, _lv, (vec_u8_t) v ) v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
#define VEC_LOAD( p, v, n, t, g ) \ #define VEC_LOAD( p, v, n, t, g ) \
_hv = vec_ld( 0, p ); \ _hv = vec_ld( 0, p ); \
...@@ -134,7 +134,7 @@ typedef union { ...@@ -134,7 +134,7 @@ typedef union {
#define VEC_LOAD_PARTIAL( p, v, n, t, g) \ #define VEC_LOAD_PARTIAL( p, v, n, t, g) \
_hv = vec_ld( 0, p); \ _hv = vec_ld( 0, p); \
v = (t) vec_perm( _hv, _hv, (vec_u8_t) _##g##_ ) v = (t) vec_perm( _hv, _hv, (vec_u8_t) _##g##_ )
/*********************************************************************** /***********************************************************************
* PREP_STORE##n: declares required vectors to store n bytes to a * PREP_STORE##n: declares required vectors to store n bytes to a
...@@ -155,7 +155,7 @@ typedef union { ...@@ -155,7 +155,7 @@ typedef union {
_lv = vec_perm( (vec_u8_t) v, _tmp1v, _##o##r_ ); \ _lv = vec_perm( (vec_u8_t) v, _tmp1v, _##o##r_ ); \
vec_st( _lv, 15, (uint8_t *) p ); \ vec_st( _lv, 15, (uint8_t *) p ); \
_hv = vec_perm( _tmp1v, (vec_u8_t) v, _##o##r_ ); \ _hv = vec_perm( _tmp1v, (vec_u8_t) v, _##o##r_ ); \
vec_st( _hv, 0, (uint8_t *) p ) vec_st( _hv, 0, (uint8_t *) p )
#define PREP_STORE8 \ #define PREP_STORE8 \
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
#include "common/common.h" #include "common/common.h"
#include "ppccommon.h" #include "ppccommon.h"
#include "quant.h" #include "quant.h"
// quant of a whole 4x4 block, unrolled 2x and "pre-scheduled" // quant of a whole 4x4 block, unrolled 2x and "pre-scheduled"
#define QUANT_16_U( idx0, idx1 ) \ #define QUANT_16_U( idx0, idx1 ) \
...@@ -55,7 +55,7 @@ ...@@ -55,7 +55,7 @@
nz = vec_or(nz, vec_or(temp1v, temp2v)); \ nz = vec_or(nz, vec_or(temp1v, temp2v)); \
vec_st(temp2v, (idx1), (int16_t*)dct); \ vec_st(temp2v, (idx1), (int16_t*)dct); \
} }
int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] ) int x264_quant_4x4_altivec( int16_t dct[4][4], uint16_t mf[16], uint16_t bias[16] )
{ {
LOAD_ZERO; LOAD_ZERO;
...@@ -220,7 +220,7 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64 ...@@ -220,7 +220,7 @@ int x264_quant_8x8_altivec( int16_t dct[8][8], uint16_t mf[64], uint16_t bias[64
vec_u16_t biasvB; vec_u16_t biasvB;
vec_s16_t temp1v, temp2v; vec_s16_t temp1v, temp2v;
vec_u32_u qbits_u; vec_u32_u qbits_u;
qbits_u.s[0]=16; qbits_u.s[0]=16;
i_qbitsv = vec_splat(qbits_u.v, 0); i_qbitsv = vec_splat(qbits_u.v, 0);
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
* 16x16 prediction for intra luma block * 16x16 prediction for intra luma block
****************************************************************************/ ****************************************************************************/
#define PREDICT_16x16_DC(v) \ #define PREDICT_16x16_DC(v)\
for( int i = 0; i < 16; i++ )\ for( int i = 0; i < 16; i++ )\
{\ {\
M32( src+ 0 ) = v;\ M32( src+ 0 ) = v;\
......
...@@ -43,7 +43,7 @@ const pw_64, times 8 dw 64 ...@@ -43,7 +43,7 @@ const pw_64, times 8 dw 64
const pw_32_0, times 4 dw 32, const pw_32_0, times 4 dw 32,
times 4 dw 0 times 4 dw 0
const pw_8000, times 8 dw 0x8000 const pw_8000, times 8 dw 0x8000
const pw_3fff, times 8 dw 0x3fff const pw_3fff, times 8 dw 0x3fff
const pd_1, times 4 dd 1 const pd_1, times 4 dd 1
const pd_128, times 4 dd 128 const pd_128, times 4 dd 128
......
...@@ -103,7 +103,7 @@ void x264_integral_init8v_sse2( uint16_t *sum8, int stride ); ...@@ -103,7 +103,7 @@ void x264_integral_init8v_sse2( uint16_t *sum8, int stride );
void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride ); void x264_integral_init4v_ssse3( uint16_t *sum8, uint16_t *sum4, int stride );
void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs, void x264_mbtree_propagate_cost_sse2( int *dst, uint16_t *propagate_in, uint16_t *intra_costs,
uint16_t *inter_costs, uint16_t *inv_qscales, int len ); uint16_t *inter_costs, uint16_t *inv_qscales, int len );
#define LOWRES(cpu) \ #define LOWRES(cpu)\
void x264_frame_init_lowres_core_##cpu( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,\ void x264_frame_init_lowres_core_##cpu( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,\
int src_stride, int dst_stride, int width, int height ); int src_stride, int dst_stride, int width, int height );
LOWRES(mmxext) LOWRES(mmxext)
......
...@@ -326,7 +326,7 @@ static void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] ) ...@@ -326,7 +326,7 @@ static void x264_predict_8x8_vr_mmxext( uint8_t *src, uint8_t edge[33] )
t=e; e+=f; f-=t;\ t=e; e+=f; f-=t;\
t=g; g+=h; h-=t; t=g; g+=h; h-=t;
#define INTRA_SA8D_X3(cpu) \ #define INTRA_SA8D_X3(cpu)\
void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )\
{\ {\
PREDICT_8x8_LOAD_TOP\ PREDICT_8x8_LOAD_TOP\
......
...@@ -736,13 +736,13 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl ...@@ -736,13 +736,13 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
} }
#endif #endif
#define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra ) \ #define block_residual_write_cabac_cbf( h, cb, i_ctxBlockCat, i_idx, l, b_intra )\
{ \ {\
int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra ); \ int ctxidxinc = x264_cabac_mb_cbf_ctxidxinc( h, i_ctxBlockCat, i_idx, b_intra );\
if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\ if( h->mb.cache.non_zero_count[x264_scan8[i_idx]] )\
{\ {\
x264_cabac_encode_decision( cb, ctxidxinc, 1 );\ x264_cabac_encode_decision( cb, ctxidxinc, 1 );\
block_residual_write_cabac( h, cb, i_ctxBlockCat, l ); \ block_residual_write_cabac( h, cb, i_ctxBlockCat, l );\
}\ }\
else\ else\
x264_cabac_encode_decision( cb, ctxidxinc, 0 );\ x264_cabac_encode_decision( cb, ctxidxinc, 0 );\
......
...@@ -914,14 +914,14 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite ...@@ -914,14 +914,14 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
m->cost_mv = p_cost_mvx[bmx] + p_cost_mvy[bmy]; m->cost_mv = p_cost_mvx[bmx] + p_cost_mvy[bmy];
} }
#define BIME_CACHE( dx, dy, list ) \ #define BIME_CACHE( dx, dy, list )\
{ \ {\
x264_me_t *m = m##list;\ x264_me_t *m = m##list;\
int i = 4 + 3*dx + dy; \ int i = 4 + 3*dx + dy;\
int mvx = bm##list##x+dx;\ int mvx = bm##list##x+dx;\
int mvy = bm##list##y+dy;\ int mvy = bm##list##y+dy;\
stride[list][i] = bw;\ stride[list][i] = bw;\
src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none ); \ src[list][i] = h->mc.get_ref( pixy_buf[list][i], &stride[list][i], m->p_fref, m->i_stride[0], mvx, mvy, bw, bh, weight_none );\
if( rd )\ if( rd )\
{\ {\
h->mc.mc_chroma( pixu_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\ h->mc.mc_chroma( pixu_buf[list][i], 8, m->p_fref[4], m->i_stride[1], mvx, mvy + mv##list##y_offset, bw>>1, bh>>1 );\
...@@ -1107,11 +1107,11 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei ...@@ -1107,11 +1107,11 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
{ \ { \
uint64_t cost; \ uint64_t cost; \
M32( cache_mv ) = pack16to32_mask(mx,my); \ M32( cache_mv ) = pack16to32_mask(mx,my); \
if( m->i_pixel <= PIXEL_8x8 )\ if( m->i_pixel <= PIXEL_8x8 ) \
{\ { \
h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\ h->mc.mc_chroma( pixu, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 );\ h->mc.mc_chroma( pixv, FDEC_STRIDE, m->p_fref[5], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
}\ } \
cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \ cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \ COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
} \ } \
......
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
/* maximum size of the sequence of filters to try on non script files */ /* maximum size of the sequence of filters to try on non script files */
#define AVS_MAX_SEQUENCE 5 #define AVS_MAX_SEQUENCE 5
#define LOAD_AVS_FUNC(name, continue_on_fail) \ #define LOAD_AVS_FUNC(name, continue_on_fail)\
{\ {\
h->func.name = (void*)GetProcAddress( h->library, #name );\ h->func.name = (void*)GetProcAddress( h->library, #name );\
if( !continue_on_fail && !h->func.name )\ if( !continue_on_fail && !h->func.name )\
......
...@@ -265,7 +265,7 @@ static int check_pixel( int cpu_ref, int cpu_new ) ...@@ -265,7 +265,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1)); buf3[i] = ~(buf4[i] = -(buf1[i&~0x88]&1));
#define TEST_PIXEL( name, align ) \ #define TEST_PIXEL( name, align ) \
ok = 1, used_asm = 0;\ ok = 1, used_asm = 0; \
for( int i = 0; i < 7; i++ ) \ for( int i = 0; i < 7; i++ ) \
{ \ { \
int res_c, res_asm; \ int res_c, res_asm; \
...@@ -305,7 +305,7 @@ static int check_pixel( int cpu_ref, int cpu_new ) ...@@ -305,7 +305,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
TEST_PIXEL( sa8d, 1 ); TEST_PIXEL( sa8d, 1 );
#define TEST_PIXEL_X( N ) \ #define TEST_PIXEL_X( N ) \
ok = 1; used_asm = 0;\ ok = 1; used_asm = 0; \
for( int i = 0; i < 7; i++ ) \ for( int i = 0; i < 7; i++ ) \
{ \ { \
int res_c[4]={0}, res_asm[4]={0}; \ int res_c[4]={0}, res_asm[4]={0}; \
...@@ -350,7 +350,7 @@ static int check_pixel( int cpu_ref, int cpu_new ) ...@@ -350,7 +350,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
{ \ { \
set_func_name( "%s_%s", "var", pixel_names[i] ); \ set_func_name( "%s_%s", "var", pixel_names[i] ); \
used_asm = 1; \ used_asm = 1; \
/* abi-check wrapper can't return uint64_t, so separate it from return value check */\ /* abi-check wrapper can't return uint64_t, so separate it from return value check */ \
call_c1( pixel_c.var[i], buf1, 16 ); \ call_c1( pixel_c.var[i], buf1, 16 ); \
call_a1( pixel_asm.var[i], buf1, 16 ); \ call_a1( pixel_asm.var[i], buf1, 16 ); \
uint64_t res_c = pixel_c.var[i]( buf1, 16 ); \ uint64_t res_c = pixel_c.var[i]( buf1, 16 ); \
...@@ -415,7 +415,7 @@ static int check_pixel( int cpu_ref, int cpu_new ) ...@@ -415,7 +415,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \ if( pixel_asm.name && pixel_asm.name != pixel_ref.name ) \
{ \ { \
int res_c[3], res_asm[3]; \ int res_c[3], res_asm[3]; \
set_func_name( #name );\ set_func_name( #name ); \
used_asm = 1; \ used_asm = 1; \
memcpy( buf3, buf2, 1024 ); \ memcpy( buf3, buf2, 1024 ); \
for( int i = 0; i < 3; i++ ) \ for( int i = 0; i < 3; i++ ) \
...@@ -538,7 +538,7 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -538,7 +538,7 @@ static int check_dct( int cpu_ref, int cpu_new )
#define TEST_DCT( name, t1, t2, size ) \ #define TEST_DCT( name, t1, t2, size ) \
if( dct_asm.name != dct_ref.name ) \ if( dct_asm.name != dct_ref.name ) \
{ \ { \
set_func_name( #name );\ set_func_name( #name ); \
used_asm = 1; \ used_asm = 1; \
call_c( dct_c.name, t1, buf1, buf2 ); \ call_c( dct_c.name, t1, buf1, buf2 ); \
call_a( dct_asm.name, t2, buf1, buf2 ); \ call_a( dct_asm.name, t2, buf1, buf2 ); \
...@@ -579,7 +579,7 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -579,7 +579,7 @@ static int check_dct( int cpu_ref, int cpu_new )
#define TEST_IDCT( name, src ) \ #define TEST_IDCT( name, src ) \
if( dct_asm.name != dct_ref.name ) \ if( dct_asm.name != dct_ref.name ) \
{ \ { \
set_func_name( #name );\ set_func_name( #name ); \
used_asm = 1; \ used_asm = 1; \
memcpy( buf3, buf1, 32*32 ); \ memcpy( buf3, buf1, 32*32 ); \
memcpy( buf4, buf1, 32*32 ); \ memcpy( buf4, buf1, 32*32 ); \
...@@ -644,12 +644,12 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -644,12 +644,12 @@ static int check_dct( int cpu_ref, int cpu_new )
ALIGNED_16( int16_t level1[64] ); ALIGNED_16( int16_t level1[64] );
ALIGNED_16( int16_t level2[64] ); ALIGNED_16( int16_t level2[64] );
#define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \ #define TEST_ZIGZAG_SCAN( name, t1, t2, dct, size ) \
if( zigzag_asm.name != zigzag_ref.name ) \ if( zigzag_asm.name != zigzag_ref.name ) \
{ \ { \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \ used_asm = 1; \
memcpy(dct, buf1, size*sizeof(int16_t));\ memcpy(dct, buf1, size*sizeof(int16_t)); \
call_c( zigzag_c.name, t1, dct ); \ call_c( zigzag_c.name, t1, dct ); \
call_a( zigzag_asm.name, t2, dct ); \ call_a( zigzag_asm.name, t2, dct ); \
if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \ if( memcmp( t1, t2, size*sizeof(int16_t) ) ) \
...@@ -663,18 +663,18 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -663,18 +663,18 @@ static int check_dct( int cpu_ref, int cpu_new )
if( zigzag_asm.name != zigzag_ref.name ) \ if( zigzag_asm.name != zigzag_ref.name ) \
{ \ { \
int nz_a, nz_c; \ int nz_a, nz_c; \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \ used_asm = 1; \
memcpy( buf3, buf1, 16*FDEC_STRIDE ); \ memcpy( buf3, buf1, 16*FDEC_STRIDE ); \
memcpy( buf4, buf1, 16*FDEC_STRIDE ); \ memcpy( buf4, buf1, 16*FDEC_STRIDE ); \
nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \ nz_c = call_c1( zigzag_c.name, t1, buf2, buf3 ); \
nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4 ); \ nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4 ); \
if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \ if( memcmp( t1, t2, size*sizeof(int16_t) )|| memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a ) \
{ \ { \
ok = 0; \ ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \ fprintf( stderr, #name " [FAILED]\n" ); \
} \ } \
call_c2( zigzag_c.name, t1, buf2, buf3 ); \ call_c2( zigzag_c.name, t1, buf2, buf3 ); \
call_a2( zigzag_asm.name, t2, buf2, buf4 ); \ call_a2( zigzag_asm.name, t2, buf2, buf4 ); \
} }
...@@ -683,7 +683,7 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -683,7 +683,7 @@ static int check_dct( int cpu_ref, int cpu_new )
{ \ { \
int nz_a, nz_c; \ int nz_a, nz_c; \
int16_t dc_a, dc_c; \ int16_t dc_a, dc_c; \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \ used_asm = 1; \
for( int i = 0; i < 2; i++ ) \ for( int i = 0; i < 2; i++ ) \
{ \ { \
...@@ -694,27 +694,27 @@ static int check_dct( int cpu_ref, int cpu_new ) ...@@ -694,27 +694,27 @@ static int check_dct( int cpu_ref, int cpu_new )
memcpy( buf3 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \ memcpy( buf3 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
memcpy( buf4 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \ memcpy( buf4 + j*FDEC_STRIDE, (i?buf1:buf2) + j*FENC_STRIDE, 4 ); \
} \ } \
nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \ nz_c = call_c1( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \ nz_a = call_a1( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \ if( memcmp( t1+1, t2+1, 15*sizeof(int16_t) ) || memcmp( buf3, buf4, 16*FDEC_STRIDE ) || nz_c != nz_a || dc_c != dc_a ) \
{ \ { \
ok = 0; \ ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \ fprintf( stderr, #name " [FAILED]\n" ); \
break; \ break; \
} \ } \
} \ } \
call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \ call_c2( zigzag_c.name, t1, buf2, buf3, &dc_c ); \
call_a2( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \ call_a2( zigzag_asm.name, t2, buf2, buf4, &dc_a ); \
} }
#define TEST_INTERLEAVE( name, t1, t2, dct, size ) \ #define TEST_INTERLEAVE( name, t1, t2, dct, size ) \
if( zigzag_asm.name != zigzag_ref.name ) \ if( zigzag_asm.name != zigzag_ref.name ) \
{ \ { \
for( int j = 0; j < 100; j++ ) \ for( int j = 0; j < 100; j++ ) \
{ \ { \
set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" );\ set_func_name( "zigzag_"#name"_%s", interlace?"field":"frame" ); \
used_asm = 1; \ used_asm = 1; \
memcpy(dct, buf1, size*sizeof(int16_t));\ memcpy(dct, buf1, size*sizeof(int16_t)); \
for( int i = 0; i < size; i++ ) \ for( int i = 0; i < size; i++ ) \
dct[i] = rand()&0x1F ? 0 : dct[i]; \ dct[i] = rand()&0x1F ? 0 : dct[i]; \
memcpy(buf3, buf4, 10*sizeof(uint8_t)); \ memcpy(buf3, buf4, 10*sizeof(uint8_t)); \
...@@ -784,7 +784,7 @@ static int check_mc( int cpu_ref, int cpu_new ) ...@@ -784,7 +784,7 @@ static int check_mc( int cpu_ref, int cpu_new )
if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \ if( mc_a.mc_luma != mc_ref.mc_luma && !(w&(w-1)) && h<=16 ) \
{ \ { \
const x264_weight_t *weight = weight_none; \ const x264_weight_t *weight = weight_none; \
set_func_name( "mc_luma_%dx%d", w, h );\ set_func_name( "mc_luma_%dx%d", w, h ); \
used_asm = 1; \ used_asm = 1; \
memset( buf3, 0xCD, 1024 ); \ memset( buf3, 0xCD, 1024 ); \
memset( buf4, 0xCD, 1024 ); \ memset( buf4, 0xCD, 1024 ); \
...@@ -801,7 +801,7 @@ static int check_mc( int cpu_ref, int cpu_new ) ...@@ -801,7 +801,7 @@ static int check_mc( int cpu_ref, int cpu_new )
uint8_t *ref = dst2; \ uint8_t *ref = dst2; \
int ref_stride = 32; \ int ref_stride = 32; \
const x264_weight_t *weight = weight_none; \ const x264_weight_t *weight = weight_none; \
set_func_name( "get_ref_%dx%d", w, h );\ set_func_name( "get_ref_%dx%d", w, h ); \
used_asm = 1; \ used_asm = 1; \
memset( buf3, 0xCD, 1024 ); \ memset( buf3, 0xCD, 1024 ); \
memset( buf4, 0xCD, 1024 ); \ memset( buf4, 0xCD, 1024 ); \
...@@ -819,13 +819,13 @@ static int check_mc( int cpu_ref, int cpu_new ) ...@@ -819,13 +819,13 @@ static int check_mc( int cpu_ref, int cpu_new )
#define MC_TEST_CHROMA( w, h ) \ #define MC_TEST_CHROMA( w, h ) \
if( mc_a.mc_chroma != mc_ref.mc_chroma ) \ if( mc_a.mc_chroma != mc_ref.mc_chroma ) \
{ \ { \
set_func_name( "mc_chroma_%dx%d", w, h );\ set_func_name( "mc_chroma_%dx%d", w, h ); \
used_asm = 1; \ used_asm = 1; \
memset( buf3, 0xCD, 1024 ); \ memset( buf3, 0xCD, 1024 ); \
memset( buf4, 0xCD, 1024 ); \ memset( buf4, 0xCD, 1024 ); \
call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \ call_c( mc_c.mc_chroma, dst1, 16, src, 64, dx, dy, w, h ); \
call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \ call_a( mc_a.mc_chroma, dst2, 16, src, 64, dx, dy, w, h ); \
/* mc_chroma width=2 may write garbage to the right of dst. ignore that. */\ /* mc_chroma width=2 may write garbage to the right of dst. ignore that. */ \
for( int j = 0; j < h; j++ ) \ for( int j = 0; j < h; j++ ) \
for( int i = w; i < 4; i++ ) \ for( int i = w; i < 4; i++ ) \
dst2[i+j*16] = dst1[i+j*16]; \ dst2[i+j*16] = dst1[i+j*16]; \
...@@ -878,7 +878,7 @@ static int check_mc( int cpu_ref, int cpu_new ) ...@@ -878,7 +878,7 @@ static int check_mc( int cpu_ref, int cpu_new )
memcpy( buf4, buf1+320, 320 ); \