Commit 94123d65 authored by Fiona Glaser's avatar Fiona Glaser
Browse files

Improve temporal MV prediction

Predict based on the results of p16x16 search, not final MVs.
This lets us get predictions even if mode decision chose intra.
Also improves cache coherency.
parent 8399311e
......@@ -105,6 +105,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
CHECKED_MALLOC( frame->mb_partition, i_mb_count * sizeof(uint8_t));
CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( frame->mv16x16, 2*i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
if( h->param.i_bframe )
{
......@@ -206,6 +207,7 @@ void x264_frame_delete( x264_frame_t *frame )
x264_free( frame->mb_partition );
x264_free( frame->mv[0] );
x264_free( frame->mv[1] );
x264_free( frame->mv16x16 );
x264_free( frame->ref[0] );
x264_free( frame->ref[1] );
x264_pthread_mutex_destroy( &frame->mutex );
......
......@@ -83,6 +83,7 @@ typedef struct x264_frame
int8_t *mb_type;
uint8_t *mb_partition;
int16_t (*mv[2])[2];
int16_t (*mv16x16)[2];
int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
/* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
......@@ -96,7 +97,7 @@ typedef struct x264_frame
int8_t *ref[2];
int i_ref[2];
int ref_poc[2][16];
int16_t inv_ref_poc[2][32]; // inverse values (list0 only) to avoid divisions in MB encoding
int16_t inv_ref_poc[2]; // inverse values of ref0 poc to avoid divisions in temporal MV prediction
/* for adaptive B-frame decision.
* contains the SATD cost of the lowres frame encoded in various modes
......
......@@ -25,24 +25,6 @@
#include "common.h"
#include "encoder/me.h"
/* Set up a lookup table for delta pocs to reduce an IDIV to an IMUL */
static void setup_inverse_delta_pocs( x264_t *h )
{
for( int field = 0; field <= h->sh.b_mbaff; field++ )
{
int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
for( int i = 0; i < (h->i_ref0<<h->sh.b_mbaff); i++ )
{
int refpoc = h->fref0[i>>h->sh.b_mbaff]->i_poc;
if( h->sh.b_mbaff && field^(i&1) )
refpoc += h->sh.i_delta_poc_bottom;
int delta = curpoc - refpoc;
h->fdec->inv_ref_poc[field][i] = (256 + delta/2) / delta;
}
}
}
static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height )
{
int i8 = x264_scan8[0]+x+8*y;
......@@ -268,7 +250,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
else if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_BLIND )
i_refs = X264_MIN(16, i_refs + 1); //blind weights add one duplicate frame
for( int j = 0; j < i_refs; j++ )
for( int j = !i; j < i_refs; j++ )
CHECKED_MALLOC( h->mb.mvr[i][j], 2 * i_mb_count * sizeof(int16_t) );
}
......@@ -318,7 +300,7 @@ fail: return -1;
void x264_macroblock_cache_free( x264_t *h )
{
for( int i = 0; i < 2; i++ )
for( int j = 0; j < 32; j++ )
for( int j = !i; j < 32; j++ )
x264_free( h->mb.mvr[i][j] );
for( int i = 0; i < 16; i++ )
x264_free( h->mb.p_weight_buf[i] );
......@@ -382,6 +364,7 @@ void x264_macroblock_slice_init( x264_t *h )
{
h->mb.mv[0] = h->fdec->mv[0];
h->mb.mv[1] = h->fdec->mv[1];
h->mb.mvr[0][0] = h->fdec->mv16x16;
h->mb.ref[0] = h->fdec->ref[0];
h->mb.ref[1] = h->fdec->ref[1];
h->mb.type = h->fdec->mb_type;
......@@ -416,7 +399,17 @@ void x264_macroblock_slice_init( x264_t *h )
/* init with not available (for top right idx=7,15) */
memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
setup_inverse_delta_pocs( h );
if( h->i_ref0 > 0 )
for( int field = 0; field <= h->sh.b_mbaff; field++ )
{
int curpoc = h->fdec->i_poc + field*h->sh.i_delta_poc_bottom;
int refpoc = h->fref0[0]->i_poc;
if( h->sh.b_mbaff && field )
refpoc += h->sh.i_delta_poc_bottom;
int delta = curpoc - refpoc;
h->fdec->inv_ref_poc[field] = (256 + delta/2) / delta;
}
h->mb.i_neighbour4[6] =
h->mb.i_neighbour4[9] =
......
......@@ -445,17 +445,13 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
if( h->sh.b_mbaff && field^(i_ref&1) )
refpoc += h->sh.i_delta_poc_bottom;
#define SET_TMVP(dx, dy) { \
int i_b4 = h->mb.i_b4_xy + dx*4 + dy*4*h->mb.i_b4_stride; \
int i_b8 = h->mb.i_b8_xy + dx*2 + dy*2*h->mb.i_b8_stride; \
int ref_col = l0->ref[0][i_b8]; \
if( ref_col >= 0 ) \
{ \
int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field][ref_col];\
mvc[i][0] = (l0->mv[0][i_b4][0]*scale + 128) >> 8;\
mvc[i][1] = (l0->mv[0][i_b4][1]*scale + 128) >> 8;\
i++; \
} \
#define SET_TMVP( dx, dy )\
{ \
int mb_index = h->mb.i_mb_xy + dx + dy*h->mb.i_mb_stride; \
int scale = (curpoc - refpoc) * l0->inv_ref_poc[h->mb.b_interlaced&field];\
mvc[i][0] = (l0->mv16x16[mb_index][0]*scale + 128) >> 8;\
mvc[i][1] = (l0->mv16x16[mb_index][1]*scale + 128) >> 8;\
i++;\
}
SET_TMVP(0,0);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment