Commit 0f65f519 authored by Loren Merritt's avatar Loren Merritt

multiple hypothesis mv prediction:

1-3% improved compression, and .5-1% faster


git-svn-id: svn://svn.videolan.org/x264/trunk@63 df754926-b1dd-0310-bc7b-ec298dee348c
parent 2489b6a6
......@@ -269,7 +269,8 @@ struct x264_t
int8_t *chroma_pred_mode; /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* mb mv for each possible ref */
/* current value */
int i_type;
......
......@@ -349,6 +349,41 @@ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] )
}
}
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc )
{
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
int i = 0;
if( h->mb.i_mb_x > 0 )
{
int i_mb_l = h->mb.i_mb_xy - 1;
mvc[i][0] = mvr[i_mb_l][0];
mvc[i][1] = mvr[i_mb_l][1];
i++;
}
if( h->mb.i_mb_y > 0 )
{
int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride;
mvc[i][0] = mvr[i_mb_t][0];
mvc[i][1] = mvr[i_mb_t][1];
i++;
if( h->mb.i_mb_x > 0 )
{
mvc[i][0] = mvr[i_mb_t - 1][0];
mvc[i][1] = mvr[i_mb_t - 1][1];
i++;
}
if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 )
{
mvc[i][0] = mvr[i_mb_t + 1][0];
mvc[i][1] = mvr[i_mb_t + 1][1];
i++;
}
}
*i_mvc = i;
}
static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
{
const int i8 = x264_scan8[0]+x+8*y;
......@@ -532,7 +567,8 @@ void x264_mb_mc( x264_t *h )
void x264_macroblock_cache_init( x264_t *h )
{
int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
int i, j;
int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
h->mb.i_mb_stride = h->sps->i_mb_width;
......@@ -558,12 +594,20 @@ void x264_macroblock_cache_init( x264_t *h )
h->mb.mvd[1] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
}
for( i=0; i<2; i++ )
for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */
h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) );
/* init with not avaiable (for top right idx=7,15) */
memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
}
void x264_macroblock_cache_end( x264_t *h )
{
int i, j;
for( i=0; i<2; i++ )
for( j=0; j<16; j++ )
x264_free( h->mb.mvr[i][j] );
if( h->param.b_cabac )
{
x264_free( h->mb.chroma_pred_mode );
......
......@@ -143,17 +143,22 @@ void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale );
/* x264_mb_predict_mv_16x16:
* set mvp with predicted mv for D_16x16 block
* h->mb. need only valid values from others block */
* h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] );
/* x264_mb_predict_mv_pskip:
* set mvp with predicted mv for P_SKIP
* h->mb. need only valid values from others block */
* h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] );
/* x264_mb_predict_mv:
* set mvp with predicted mv for all blocks except P_SKIP
* h->mb. need valid ref/partition/sub of current block to be valid
* and valid mv/ref from others block . */
* and valid mv/ref from other blocks . */
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] );
/* x264_mb_predict_mv_ref16x16:
* set mvc with D_16x16 prediction.
* uses all neighbors, even those that didn't end up using this ref.
* need only valid values from other blocks */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc );
int x264_mb_predict_intra4x4_mode( x264_t *h, int idx );
......
......@@ -455,6 +455,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
{
x264_me_t m;
int i_ref;
int mvc[4][2], i_mvc;
/* 16x16 Search on all ref frame */
m.i_pixel = PIXEL_16x16;
......@@ -462,9 +463,6 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
m.p_fenc = h->mb.pic.p_fenc[0];
m.i_stride= h->mb.pic.i_stride[0];
m.i_mv_range = a->i_mv_range;
m.b_mvc = 0;
// m.mvc[0] = 0;
// m.mvc[1] = 0;
a->l0.me16x16.cost = INT_MAX;
for( i_ref = 0; i_ref < h->i_ref0; i_ref++ )
......@@ -472,7 +470,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
/* search with ref */
m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
x264_me_search( h, &m );
x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc );
x264_me_search( h, &m, mvc, i_mvc );
/* add ref cost */
m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
......@@ -482,6 +481,10 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
a->l0.i_ref = i_ref;
a->l0.me16x16 = m;
}
/* save mv for predicting neighbors */
h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0];
h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1];
}
/* subtract ref cost, so we don't have to add it for the other P types */
......@@ -495,12 +498,16 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[5][2], i_mvc;
int i;
/* XXX Needed for x264_mb_predict_mv */
h->mb.i_partition = D_8x8;
i_mvc = 1;
mvc[0][0] = a->l0.me16x16.mv[0];
mvc[0][1] = a->l0.me16x16.mv[1];
for( i = 0; i < 4; i++ )
{
x264_me_t *m = &a->l0.me8x8[i];
......@@ -515,21 +522,14 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a )
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
if( i == 0 )
{
m->b_mvc = 1;
m->mvc[0] = a->l0.me16x16.mv[0];
m->mvc[1] = a->l0.me16x16.mv[1];
}
else
{
m->b_mvc = 0;
}
x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, mvc, i_mvc );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] );
mvc[i_mvc][0] = m->mv[0];
mvc[i_mvc][1] = m->mv[1];
i_mvc++;
}
a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost +
......@@ -540,7 +540,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[2][2];
int i;
/* XXX Needed for x264_mb_predict_mv */
......@@ -558,12 +558,13 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a )
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
m->b_mvc = 1;
m->mvc[0] = a->l0.me8x8[2*i].mv[0];
m->mvc[1] = a->l0.me8x8[2*i].mv[1];
mvc[0][0] = a->l0.me8x8[2*i].mv[0];
mvc[0][1] = a->l0.me8x8[2*i].mv[1];
mvc[1][0] = a->l0.me8x8[2*i+1].mv[0];
mvc[1][1] = a->l0.me8x8[2*i+1].mv[1];
x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, mvc, 2 );
x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] );
}
......@@ -575,7 +576,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0];
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[2][2];
int i;
/* XXX Needed for x264_mb_predict_mv */
......@@ -593,12 +594,13 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a )
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
m->b_mvc = 1;
m->mvc[0] = a->l0.me8x8[i].mv[0];
m->mvc[1] = a->l0.me8x8[i].mv[1];
mvc[0][0] = a->l0.me8x8[i].mv[0];
mvc[0][1] = a->l0.me8x8[i].mv[1];
mvc[1][0] = a->l0.me8x8[i+2].mv[0];
mvc[1][1] = a->l0.me8x8[i+2].mv[1];
x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, mvc, 2 );
x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] );
}
......@@ -621,6 +623,7 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
const int idx = 4*i8x8 + i4x4;
const int x4 = block_idx_x[idx];
const int y4 = block_idx_y[idx];
const int i_mvc = (i4x4 == 0);
x264_me_t *m = &a->l0.me4x4[i8x8][i4x4];
......@@ -632,19 +635,8 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
if( i4x4 == 0 )
{
m->b_mvc = 1;
m->mvc[0] = a->l0.me8x8[i8x8].mv[0];
m->mvc[1] = a->l0.me8x8[i8x8].mv[1];
}
else
{
m->b_mvc = 0;
}
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc );
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] );
}
......@@ -670,6 +662,7 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
const int idx = 4*i8x8 + 2*i8x4;
const int x4 = block_idx_x[idx];
const int y4 = block_idx_y[idx];
const int i_mvc = (i8x4 == 0);
x264_me_t *m = &a->l0.me8x4[i8x8][i8x4];
......@@ -681,19 +674,8 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
if( i8x4 == 0 )
{
m->b_mvc = 1;
m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
}
else
{
m->b_mvc = 0;
}
x264_mb_predict_mv( h, 0, idx, 2, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] );
}
......@@ -716,6 +698,7 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
const int idx = 4*i8x8 + i4x8;
const int x4 = block_idx_x[idx];
const int y4 = block_idx_y[idx];
const int i_mvc = (i4x8 == 0);
x264_me_t *m = &a->l0.me4x8[i8x8][i4x8];
......@@ -727,19 +710,8 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
m->i_stride= h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
if( i4x8 == 0 )
{
m->b_mvc = 1;
m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0];
m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1];
}
else
{
m->b_mvc = 0;
}
x264_mb_predict_mv( h, 0, idx, 1, m->mvp );
x264_me_search( h, m );
x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc );
x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] );
}
......@@ -760,7 +732,6 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
m.lm = a->i_lambda;
m.p_fenc = h->mb.pic.p_fenc[0];
m.i_stride= h->mb.pic.i_stride[0];
m.b_mvc = 0;
m.i_mv_range = a->i_mv_range;
/* ME for List 0 */
......@@ -770,7 +741,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
/* search with ref */
m.p_fref = h->mb.pic.p_fref[0][i_ref][0];
x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp );
x264_me_search( h, &m );
x264_me_search( h, &m, NULL, 0 );
/* add ref cost */
m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref );
......@@ -789,7 +760,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
/* search with ref */
m.p_fref = h->mb.pic.p_fref[1][i_ref][0];
x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp );
x264_me_search( h, &m );
x264_me_search( h, &m, NULL, 0 );
/* add ref cost */
m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref );
......
......@@ -42,7 +42,7 @@ const static int subpel_iterations[][4] =
static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
void x264_me_search( x264_t *h, x264_me_t *m )
void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc )
{
const int i_pixel = m->i_pixel;
int bcost;
......@@ -66,19 +66,22 @@ void x264_me_search( x264_t *h, x264_me_t *m )
/* try a candidate if provided */
if( m->b_mvc )
for( i_iter = 0; i_iter < i_mvc; i_iter++ )
{
const int mx = x264_clip3( ( m->mvc[0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
const int my = x264_clip3( ( m->mvc[1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
uint8_t *p_fref2 = &m->p_fref[my*m->i_stride+mx];
int cost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride, p_fref2, m->i_stride ) +
m->lm * ( bs_size_se( m->mvc[0] - m->mvp[0] ) + bs_size_se( m->mvc[1] - m->mvp[1] ) );
if( cost < bcost )
const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
if( mx != bmx || my != bmy )
{
bmx = mx;
bmy = my;
bcost = cost;
p_fref = p_fref2;
uint8_t *p_fref2 = &m->p_fref[my*m->i_stride+mx];
int cost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride, p_fref2, m->i_stride ) +
m->lm * ( bs_size_se( mx - m->mvp[0] ) + bs_size_se( my - m->mvp[1] ) );
if( cost < bcost )
{
bmx = mx;
bmy = my;
bcost = cost;
p_fref = p_fref2;
}
}
}
......
......@@ -38,15 +38,12 @@ typedef struct
int mvp[2];
int b_mvc;
int mvc[2];
/* output */
int cost; /* satd + lm * nbits */
int mv[2];
} x264_me_t;
void x264_me_search( x264_t *h, x264_me_t *m );
void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc );
void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment