Commit 30140b34 authored by Anton Mitrofanov's avatar Anton Mitrofanov Committed by Henrik Gramner

Fix bugs/typos in motion compensation and cache_load

Didn't affect output due to the incorrect values either not being used in the
code path or producing equal results compared to the correct values.

Also deduplicate hpel_ref arrays.
parent a46820e0
...@@ -132,9 +132,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, ...@@ -132,9 +132,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *,
x264_mc_copy_w16_neon, x264_mc_copy_w16_neon,
}; };
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
uint8_t *src[4], intptr_t i_src_stride, uint8_t *src[4], intptr_t i_src_stride,
int mvx, int mvy, int mvx, int mvy,
...@@ -142,13 +139,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, ...@@ -142,13 +139,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride; src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2]( x264_pixel_avg_wtab_neon[i_width>>2](
dst, i_dst_stride, src1, i_src_stride, dst, i_dst_stride, src1, i_src_stride,
src2, i_height ); src2, i_height );
...@@ -168,13 +165,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, ...@@ -168,13 +165,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride; src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2]( x264_pixel_avg_wtab_neon[i_width>>2](
dst, *i_dst_stride, src1, i_src_stride, dst, *i_dst_stride, src1, i_src_stride,
src2, i_height ); src2, i_height );
......
...@@ -136,9 +136,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *, ...@@ -136,9 +136,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *,
x264_mc_copy_w16_neon, x264_mc_copy_w16_neon,
}; };
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
uint8_t *src[4], intptr_t i_src_stride, uint8_t *src[4], intptr_t i_src_stride,
int mvx, int mvy, int mvx, int mvy,
...@@ -146,13 +143,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride, ...@@ -146,13 +143,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride; src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2]( x264_pixel_avg_wtab_neon[i_width>>2](
dst, i_dst_stride, src1, i_src_stride, dst, i_dst_stride, src1, i_src_stride,
src2, i_height ); src2, i_height );
...@@ -172,13 +169,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride, ...@@ -172,13 +169,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride; src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2]( x264_pixel_avg_wtab_neon[i_width>>2](
dst, *i_dst_stride, src1, i_src_stride, dst, *i_dst_stride, src1, i_src_stride,
src2, i_height ); src2, i_height );
......
...@@ -1158,7 +1158,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m ...@@ -1158,7 +1158,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
{ {
// Looking at the bottom field so always take the bottom macroblock of the pair. // Looking at the bottom field so always take the bottom macroblock of the pair.
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]]; h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]];
h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]]; h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] ); CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] ); CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
......
...@@ -189,8 +189,8 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src, ...@@ -189,8 +189,8 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
} }
} }
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma( pixel *dst, intptr_t i_dst_stride, static void mc_luma( pixel *dst, intptr_t i_dst_stride,
pixel *src[4], intptr_t i_src_stride, pixel *src[4], intptr_t i_src_stride,
...@@ -199,11 +199,11 @@ static void mc_luma( pixel *dst, intptr_t i_dst_stride, ...@@ -199,11 +199,11 @@ static void mc_luma( pixel *dst, intptr_t i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2); int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, i_dst_stride, src1, i_src_stride, pixel_avg( dst, i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height ); src2, i_src_stride, i_width, i_height );
if( weight->weightfn ) if( weight->weightfn )
...@@ -222,11 +222,11 @@ static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride, ...@@ -222,11 +222,11 @@ static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2); int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, *i_dst_stride, src1, i_src_stride, pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height ); src2, i_src_stride, i_width, i_height );
if( weight->weightfn ) if( weight->weightfn )
......
...@@ -41,6 +41,8 @@ typedef struct x264_weight_t ...@@ -41,6 +41,8 @@ typedef struct x264_weight_t
} ALIGNED_16( x264_weight_t ); } ALIGNED_16( x264_weight_t );
extern const x264_weight_t x264_weight_none[3]; extern const x264_weight_t x264_weight_none[3];
extern const uint8_t x264_hpel_ref0[16];
extern const uint8_t x264_hpel_ref1[16];
#define SET_WEIGHT( w, b, s, d, o )\ #define SET_WEIGHT( w, b, s, d, o )\
{\ {\
......
...@@ -40,24 +40,19 @@ ...@@ -40,24 +40,19 @@
typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src, typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src,
uint8_t *dst, intptr_t i_dst, int i_height ); uint8_t *dst, intptr_t i_dst, int i_height );
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static inline int x264_tapfilter( uint8_t *pix, int i_pix_next ) static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
{ {
return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] +
pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] +
pix[ 3*i_pix_next]; pix[ 3*i_pix_next];
} }
static inline int x264_tapfilter1( uint8_t *pix ) static inline int x264_tapfilter1( uint8_t *pix )
{ {
return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] +
pix[ 3]; pix[ 3];
} }
static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst, static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst,
uint8_t *src1, intptr_t i_src1, uint8_t *src1, intptr_t i_src1,
uint8_t *src2, int i_height ) uint8_t *src2, int i_height )
...@@ -181,10 +176,10 @@ static void mc_luma_altivec( uint8_t *dst, intptr_t i_dst_stride, ...@@ -181,10 +176,10 @@ static void mc_luma_altivec( uint8_t *dst, intptr_t i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
switch( i_width ) switch( i_width )
{ {
...@@ -229,10 +224,10 @@ static uint8_t *get_ref_altivec( uint8_t *dst, intptr_t *i_dst_stride, ...@@ -229,10 +224,10 @@ static uint8_t *get_ref_altivec( uint8_t *dst, intptr_t *i_dst_stride,
{ {
int qpel_idx = ((mvy&3)<<2) + (mvx&3); int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2); intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride; uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */ if( qpel_idx & 5 ) /* qpel interpolation needed */
{ {
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
switch( i_width ) switch( i_width )
{ {
case 4: case 4:
......
...@@ -363,9 +363,6 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w ) ...@@ -363,9 +363,6 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
} }
#endif // !HIGH_BIT_DEPTH #endif // !HIGH_BIT_DEPTH
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
#define MC_LUMA(name,instr1,instr2)\ #define MC_LUMA(name,instr1,instr2)\
static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\
pixel *src[4], intptr_t i_src_stride,\ pixel *src[4], intptr_t i_src_stride,\
...@@ -374,10 +371,10 @@ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\ ...@@ -374,10 +371,10 @@ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\
{\ {\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
if( qpel_idx & 5 ) /* qpel interpolation needed */\ if( qpel_idx & 5 ) /* qpel interpolation needed */\
{\ {\
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
x264_pixel_avg_wtab_##instr1[i_width>>2](\ x264_pixel_avg_wtab_##instr1[i_width>>2](\
dst, i_dst_stride, src1, i_src_stride,\ dst, i_dst_stride, src1, i_src_stride,\
src2, i_height );\ src2, i_height );\
...@@ -412,10 +409,10 @@ static pixel *get_ref_##name( pixel *dst, intptr_t *i_dst_stride,\ ...@@ -412,10 +409,10 @@ static pixel *get_ref_##name( pixel *dst, intptr_t *i_dst_stride,\
{\ {\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\ int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\ int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\ pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
if( qpel_idx & 5 ) /* qpel interpolation needed */\ if( qpel_idx & 5 ) /* qpel interpolation needed */\
{\ {\
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\ pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
x264_pixel_avg_wtab_##name[i_width>>2](\ x264_pixel_avg_wtab_##name[i_width>>2](\
dst, *i_dst_stride, src1, i_src_stride,\ dst, *i_dst_stride, src1, i_src_stride,\
src2, i_height );\ src2, i_height );\
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment