Commit 30140b34 authored by Anton Mitrofanov's avatar Anton Mitrofanov Committed by Henrik Gramner

Fix bugs/typos in motion compensation and cache_load

Didn't affect output due to the incorrect values either not being used in the
code path or producing equal results compared to the correct values.

Also deduplicate hpel_ref arrays.
parent a46820e0
......@@ -132,9 +132,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *,
x264_mc_copy_w16_neon,
};
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
uint8_t *src[4], intptr_t i_src_stride,
int mvx, int mvy,
......@@ -142,13 +139,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2](
dst, i_dst_stride, src1, i_src_stride,
src2, i_height );
......@@ -168,13 +165,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2](
dst, *i_dst_stride, src1, i_src_stride,
src2, i_height );
......
......@@ -136,9 +136,6 @@ static void (* const x264_mc_copy_wtab_neon[5])( uint8_t *, intptr_t, uint8_t *,
x264_mc_copy_w16_neon,
};
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
uint8_t *src[4], intptr_t i_src_stride,
int mvx, int mvy,
......@@ -146,13 +143,13 @@ static void mc_luma_neon( uint8_t *dst, intptr_t i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2](
dst, i_dst_stride, src1, i_src_stride,
src2, i_height );
......@@ -172,13 +169,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, intptr_t *i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset;
if ( (mvy&3) == 3 ) // explict if() to force conditional add
src1 += i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_neon[i_width>>2](
dst, *i_dst_stride, src1, i_src_stride,
src2, i_height );
......
......@@ -1158,7 +1158,7 @@ static void ALWAYS_INLINE x264_macroblock_cache_load( x264_t *h, int mb_x, int m
{
// Looking at the bottom field so always take the bottom macroblock of the pair.
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[1]];
h->mb.cache.topright_ref[l][2] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[2]];
CP32( h->mb.cache.topright_mv[l][0], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*4 + s4x4*left_index_table->mv[1]] );
......
......@@ -189,8 +189,8 @@ static void hpel_filter( pixel *dsth, pixel *dstv, pixel *dstc, pixel *src,
}
}
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
const uint8_t x264_hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
const uint8_t x264_hpel_ref1[16] = {0,0,1,0,2,2,3,2,2,2,3,2,2,2,3,2};
static void mc_luma( pixel *dst, intptr_t i_dst_stride,
pixel *src[4], intptr_t i_src_stride,
......@@ -199,11 +199,11 @@ static void mc_luma( pixel *dst, intptr_t i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );
if( weight->weightfn )
......@@ -222,11 +222,11 @@ static pixel *get_ref( pixel *dst, intptr_t *i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
src2, i_src_stride, i_width, i_height );
if( weight->weightfn )
......
......@@ -41,6 +41,8 @@ typedef struct x264_weight_t
} ALIGNED_16( x264_weight_t );
extern const x264_weight_t x264_weight_none[3];
extern const uint8_t x264_hpel_ref0[16];
extern const uint8_t x264_hpel_ref1[16];
#define SET_WEIGHT( w, b, s, d, o )\
{\
......
......@@ -40,24 +40,19 @@
typedef void (*pf_mc_t)( uint8_t *src, intptr_t i_src,
uint8_t *dst, intptr_t i_dst, int i_height );
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
{
return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] +
pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] +
pix[ 3*i_pix_next];
}
static inline int x264_tapfilter1( uint8_t *pix )
{
return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] +
pix[ 3];
}
static inline void x264_pixel_avg2_w4_altivec( uint8_t *dst, intptr_t i_dst,
uint8_t *src1, intptr_t i_src1,
uint8_t *src2, int i_height )
......@@ -181,10 +176,10 @@ static void mc_luma_altivec( uint8_t *dst, intptr_t i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
switch( i_width )
{
......@@ -229,10 +224,10 @@ static uint8_t *get_ref_altivec( uint8_t *dst, intptr_t *i_dst_stride,
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
intptr_t offset = (mvy>>2)*i_src_stride + (mvx>>2);
uint8_t *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
uint8_t *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;
if( qpel_idx & 5 ) /* qpel interpolation needed */
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
uint8_t *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
switch( i_width )
{
case 4:
......
......@@ -363,9 +363,6 @@ static void x264_weight_cache_ssse3( x264_t *h, x264_weight_t *w )
}
#endif // !HIGH_BIT_DEPTH
static const uint8_t hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const uint8_t hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
#define MC_LUMA(name,instr1,instr2)\
static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\
pixel *src[4], intptr_t i_src_stride,\
......@@ -374,10 +371,10 @@ static void mc_luma_##name( pixel *dst, intptr_t i_dst_stride,\
{\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
if( qpel_idx & 5 ) /* qpel interpolation needed */\
{\
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
x264_pixel_avg_wtab_##instr1[i_width>>2](\
dst, i_dst_stride, src1, i_src_stride,\
src2, i_height );\
......@@ -412,10 +409,10 @@ static pixel *get_ref_##name( pixel *dst, intptr_t *i_dst_stride,\
{\
int qpel_idx = ((mvy&3)<<2) + (mvx&3);\
int offset = (mvy>>2)*i_src_stride + (mvx>>2);\
pixel *src1 = src[hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
pixel *src1 = src[x264_hpel_ref0[qpel_idx]] + offset + ((mvy&3) == 3) * i_src_stride;\
if( qpel_idx & 5 ) /* qpel interpolation needed */\
{\
pixel *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
pixel *src2 = src[x264_hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);\
x264_pixel_avg_wtab_##name[i_width>>2](\
dst, *i_dst_stride, src1, i_src_stride,\
src2, i_height );\
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment