From 30140b34b879605cf70cab0634a4a8faef5b6e60 Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Wed, 3 Dec 2014 22:36:12 +0300 Subject: [PATCH] Fix bugs/typos in motion compensation and cache_load Didn't affect output due to the incorrect values either not being used in the code path or producing equal results compared to the correct values. Also deduplicate hpel_ref arrays. --- common/aarch64/mc-c.c | 11 ++++------- common/arm/mc-c.c | 11 ++++------- common/macroblock.c | 2 +- common/mc.c | 12 ++++++------ common/mc.h | 2 ++ common/ppc/mc.c | 15 +++++---------- common/x86/mc-c.c | 11 ++++------- 7 files changed, 26 insertions(+), 38 deletions(-) diff --git a/common/aarch64/mc-c.c b/common/aarch64/mc-c.c index 73f6df96..c71606fc 100644 --- a/common/aarch64/mc-c.c +++ b/common/aarch64/mc-c.c @@ -132,9 +132,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, x264_mc_copy_w16_neon, }; -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, uint8_t *src[4], intptr_t i_src_stride, int mvx, int mvy, @@ -142,13 +139,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -168,13 +165,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index 3805e736..e59bd4f3 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -136,9 +136,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, x264_mc_copy_w16_neon, }; -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, uint8_t *src[4], intptr_t i_src_stride, int mvx, int mvy, @@ -146,13 +143,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); @@ -172,13 +169,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset; if ( (mvy&3) == 3 ) // explict if() to force conditional add src1 += i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); diff --git a/common/macroblock.c b/common/macroblock.c index 8494bfe1..a658d18e 100644 --- a/common/macroblock.c +++ b/common/macroblock.c @@ -1158,7 +1158,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m { // Looking at the bottom field so always take the bottom macroblock of the pair. h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; - h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; + h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]]; h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]]; CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] ); CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] ); diff --git a/common/mc.c b/common/mc.c index 6a8b1b81..f9723516 100644 --- a/common/mc.c +++ b/common/mc.c @@ -189,8 +189,8 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, } } -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; +const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; +const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2}; static void mc_luma( pixel *dst, intptr_t i_dst_stride, pixel *src[4], intptr_t i_src_stride, @@ -199,11 +199,11 @@ static void mc_luma( pixel *dst, intptr_t i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) @@ -222,11 +222,11 @@ static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel_avg( dst, *i_dst_stride, src1, i_src_stride, src2, i_src_stride, i_width, i_height ); if( weight->weightfn ) diff --git a/common/mc.h b/common/mc.h index 1e97499a..582c0ab4 100644 --- a/common/mc.h +++ b/common/mc.h @@ -41,6 +41,8 @@ typedef struct x264_weight_t } ALIGNED_16( x264_weight_t ); extern const x264_weight_t x264_weight_none[3]; +extern const uint8_t x264_hpel_ref0[16]; +extern const uint8_t x264_hpel_ref1[16]; #define SET_WEIGHT( w, b, s, d, o )\ {\ diff --git a/common/ppc/mc.c b/common/ppc/mc.c index 9d554feb..f7de1730 100644 --- a/common/ppc/mc.c +++ b/common/ppc/mc.c @@ -40,24 +40,19 @@ typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src, uint8_t *dst, intptr_t i_dst, int i_height ); - -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - - static inline int x264_tapfilter( uint8_t *pix, int i_pix_next ) { return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[ 3*i_pix_next]; } + static inline int x264_tapfilter1( uint8_t *pix ) { return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + pix[ 3]; } - static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst, uint8_t *src1, intptr_t i_src1, uint8_t *src2, int i_height ) @@ -181,10 +176,10 @@ static void mc_luma_altivec( uint8_t *dst, intptr_t i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); switch( i_width ) { @@ -229,10 +224,10 @@ static uint8_t *get_ref_altivec( uint8_t *dst, intptr_t *i_dst_stride, { int qpel_idx = ((mvy&3)<<2) + (mvx&3); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); - uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; + uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; if( qpel_idx & 5 ) /* qpel interpolation needed */ { - uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); + uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); switch( i_width ) { case 4: diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index 9bd990ce..d231a8c9 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -363,9 +363,6 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w ) } #endif // !HIGH_BIT_DEPTH -static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; -static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; - #define MC_LUMA(name,instr1,instr2)\ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\ pixel *src[4], intptr_t i_src_stride,\ @@ -374,10 +371,10 @@ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\ {\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ x264_pixel_avg_wtab_##instr1[i_width>>2](\ dst, i_dst_stride, src1, i_src_stride,\ src2, i_height );\ @@ -412,10 +409,10 @@ static pixel *get_ref_##name( pixel *dst, intptr_t *i_dst_stride,\ {\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ - pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ + pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ if( qpel_idx & 5 ) /* qpel interpolation needed */\ {\ - pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ + pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ x264_pixel_avg_wtab_##name[i_width>>2](\ dst, *i_dst_stride, src1, i_src_stride,\ src2, i_height );\ -- GitLab