Commit 199ff740 authored by Loren Merritt's avatar Loren Merritt

implement macroblock types B_SKIP, B_DIRECT, B_8x8


git-svn-id: svn://svn.videolan.org/x264/trunk@68 df754926-b1dd-0310-bc7b-ec298dee348c
parent b6954ba2
......@@ -79,7 +79,7 @@ void x264_param_default( x264_param_t *param )
param->rc.i_qp_max = 51;
param->rc.i_qp_step = 4;
param->rc.f_ip_factor = 1.4;
param->rc.f_pb_factor = 1.4;
param->rc.f_pb_factor = 1.3;
param->rc.b_stat_write = 0;
param->rc.psz_stat_out = "x264_2pass.log";
......@@ -97,7 +97,8 @@ void x264_param_default( x264_param_t *param )
/* */
param->analyse.intra = X264_ANALYSE_I4x4;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_subpel_refine = 1;
param->analyse.b_psnr = 1;
}
......
......@@ -250,6 +250,8 @@ struct x264_t
/* MB table and cache for current frame/mb */
struct
{
int i_mb_count; /* number of mbs in a frame */
/* Strides */
int i_mb_stride;
......@@ -270,7 +272,15 @@ struct x264_t
int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */
int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* mb mv for each possible ref */
int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
/* for B_SKIP and B_DIRECT motion prediction */
struct
{
int16_t (*mv)[2]; /* keep only L0 */
int8_t *ref;
} list1ref0;
/* current value */
int i_type;
......@@ -313,6 +323,12 @@ struct x264_t
/* 0 if non avaible */
int16_t mv[2][X264_SCAN8_SIZE][2];
int16_t mvd[2][X264_SCAN8_SIZE][2];
/* 1 if SKIP or DIRECT. set only for B-frames + CABAC */
int8_t skip[X264_SCAN8_SIZE];
int16_t direct_mv[2][X264_SCAN8_SIZE][2];
int8_t direct_ref[2][X264_SCAN8_SIZE];
} cache;
/* */
......
......@@ -349,6 +349,183 @@ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] )
}
}
static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
{
int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
int i;
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
for( i = 0; i < 4; i++ )
{
const int x8 = 2*(i%2);
const int y8 = 2*(i/2);
/* TODO: MapColToList0 */
const int i_ref = h->mb.list1ref0.ref[ i_mb_8x8 + x8/2 + y8 * h->mb.i_mb_stride ];
if( i_ref == -1 )
{
x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, 0 );
x264_macroblock_cache_mv( h, x8, y8, 2, 2, 0, 0, 0 );
x264_macroblock_cache_mv( h, x8, y8, 2, 2, 1, 0, 0 );
}
else
{
int tb = x264_clip3( h->fdec->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
int td = x264_clip3( h->fref1[0]->i_poc - h->fref0[i_ref]->i_poc, -128, 127 );
int tx = (16384 + (abs(td) >> 1)) / td;
int dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
int x4, y4;
x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref );
for( y4 = y8; y4 < y8+2; y4++ )
for( x4 = x8; x4 < x8+2; x4++ )
{
const int16_t *mv_col = h->mb.list1ref0.mv[ i_mb_4x4 + x4 + y4 * 4 * h->mb.i_mb_stride ];
if( td == 0 /* || pic0 is a long-term ref */ )
{
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_col[0], mv_col[1] );
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
}
else
{
int mv_l0[2];
mv_l0[0] = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
mv_l0[1] = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, mv_l0[0], mv_l0[1] );
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, mv_l0[0] - mv_col[0], mv_l0[1] - mv_col[1] );
}
}
}
}
return 1;
}
static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
{
int ref[2];
int mv[2][2];
int i_list;
int i8, i4;
const int s8x8 = 2 * h->mb.i_mb_stride;
const int s4x4 = 4 * h->mb.i_mb_stride;
const int8_t *l1ref = &h->mb.list1ref0.ref[ 2*h->mb.i_mb_x + 2*s8x8*h->mb.i_mb_y ];
const int16_t (*l1mv)[2] = (const int16_t (*)[2])
&h->mb.list1ref0.mv[ 4*h->mb.i_mb_x + 4*s4x4*h->mb.i_mb_y ];
for( i_list=0; i_list<2; i_list++ )
{
int i_refa = h->mb.cache.ref[i_list][X264_SCAN8_0 - 1];
int i_refb = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8];
int i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 + 4];
if( i_refc == -2 )
i_refc = h->mb.cache.ref[i_list][X264_SCAN8_0 - 8 - 1];
ref[i_list] = i_refa;
if( ref[i_list] < 0 || ( i_refb < ref[i_list] && i_refb >= 0 ))
ref[i_list] = i_refb;
if( ref[i_list] < 0 || ( i_refc < ref[i_list] && i_refc >= 0 ))
ref[i_list] = i_refc;
if( ref[i_list] < 0 )
ref[i_list] = -1;
}
if( ref[0] < 0 && ref[1] < 0 )
{
ref[0] =
ref[1] = 0;
mv[0][0] =
mv[0][1] =
mv[1][0] =
mv[1][1] = 0;
}
else
{
for( i_list=0; i_list<2; i_list++ )
{
if( ref[i_list] >= 0 )
x264_mb_predict_mv_16x16( h, i_list, ref[i_list], mv[i_list] );
else
mv[i_list][0] = mv[i_list][1] = 0;
}
}
/* FIXME: clip mv ? */
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, ref[0] );
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, ref[1] );
x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, mv[0][0], mv[0][1] );
x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, mv[1][0], mv[1][1] );
/* col_zero_flag */
for( i8=0; i8<4; i8++ )
{
const int x8 = i8%2;
const int y8 = i8/2;
if( l1ref[ x8 + y8*s8x8 ] == 0 )
{
for( i4=0; i4<4; i4++ )
{
const int x4 = i4%2 + 2*x8;
const int y4 = i4/2 + 2*y8;
const int16_t *mvcol = l1mv[x4 + y4*s4x4];
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
if( ref[0] == 0 )
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0, 0 );
if( ref[1] == 0 )
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0, 0 );
}
}
}
}
return 1;
}
int x264_mb_predict_mv_direct16x16( x264_t *h )
{
int b_available;
if( h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_NONE )
return 0;
else if( h->sh.b_direct_spatial_mv_pred )
b_available = x264_mb_predict_mv_direct16x16_spatial( h );
else
b_available = x264_mb_predict_mv_direct16x16_temporal( h );
/* cache ref & mv */
if( b_available )
{
int i, l;
for( l = 0; l < 2; l++ )
for( i = 0; i < 4; i++ )
h->mb.cache.direct_ref[l][i] = h->mb.cache.ref[l][x264_scan8[i*4]];
memcpy(h->mb.cache.direct_mv, h->mb.cache.mv, sizeof(h->mb.cache.mv));
}
return b_available;
}
void x264_mb_load_mv_direct8x8( x264_t *h, int idx )
{
const int x = 2*(idx%2);
const int y = 2*(idx/2);
int l;
x264_macroblock_cache_ref( h, x, y, 2, 2, 0, h->mb.cache.direct_ref[0][idx] );
x264_macroblock_cache_ref( h, x, y, 2, 2, 1, h->mb.cache.direct_ref[1][idx] );
for( l = 0; l < 2; l++ )
{
*(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]] =
*(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]];
*(uint64_t*)h->mb.cache.mv[l][x264_scan8[idx*4]+8] =
*(uint64_t*)h->mb.cache.direct_mv[l][x264_scan8[idx*4]+8];
}
}
/* This just improves encoder performance, it's not part of the spec */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc )
{
int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref];
......@@ -469,6 +646,49 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
}
static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
{
const int i8 = x264_scan8[0] + x + 8*y;
/* FIXME: optimize based on current block size, not global settings? */
if( h->sps->b_direct8x8_inference )
{
if( h->mb.cache.ref[0][i8] >= 0 )
if( h->mb.cache.ref[1][i8] >= 0 )
x264_mb_mc_01xywh( h, x, y, 2, 2 );
else
x264_mb_mc_0xywh( h, x, y, 2, 2 );
else
x264_mb_mc_1xywh( h, x, y, 2, 2 );
}
else
{
if( h->mb.cache.ref[0][i8] >= 0 )
{
if( h->mb.cache.ref[1][i8] >= 0 )
{
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
}
else
{
x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
}
}
else
{
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
}
}
}
void x264_mb_mc( x264_t *h )
{
......@@ -489,7 +709,7 @@ void x264_mb_mc( x264_t *h )
x264_mb_mc_0xywh( h, 2, 0, 2, 4 );
}
}
else if( h->mb.i_type == P_8x8 )
else if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
{
int i;
for( i = 0; i < 4; i++ )
......@@ -515,13 +735,55 @@ void x264_mb_mc( x264_t *h )
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
break;
case D_L1_8x8:
x264_mb_mc_1xywh( h, x, y, 2, 2 );
break;
case D_L1_8x4:
x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
break;
case D_L1_4x8:
x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
break;
case D_L1_4x4:
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
break;
case D_BI_8x8:
x264_mb_mc_01xywh( h, x, y, 2, 2 );
break;
case D_BI_8x4:
x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
break;
case D_BI_4x8:
x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
break;
case D_BI_4x4:
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
break;
case D_DIRECT_8x8:
x264_mb_mc_direct8x8( h, x, y );
break;
}
}
}
else if( h->mb.i_type == B_8x8 || h->mb.i_type == B_DIRECT )
else if( h->mb.i_type == B_SKIP || h->mb.i_type == B_DIRECT )
{
x264_log( h, X264_LOG_ERROR, "mc_luma with unsupported mb\n" );
return;
int i;
for( i = 0; i < 4; i++ )
{
const int x = 2*(i%2);
const int y = 2*(i/2);
x264_mb_mc_direct8x8( h, x, y );
}
}
else /* B_*x* */
{
......@@ -568,13 +830,14 @@ void x264_mb_mc( x264_t *h )
void x264_macroblock_cache_init( x264_t *h )
{
int i, j;
int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
int i_mb_count = h->mb.i_mb_count;
h->mb.i_mb_stride = h->sps->i_mb_width;
h->mb.type= x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.qp = x264_malloc( i_mb_count * sizeof( int8_t) );
h->mb.cbp = x264_malloc( i_mb_count * sizeof( int16_t) );
h->mb.skipbp = x264_malloc( i_mb_count * sizeof( int8_t) );
/* 0 -> 3 top(4), 4 -> 6 : left(3) */
h->mb.intra4x4_pred_mode = x264_malloc( i_mb_count * 7 * sizeof( int8_t ) );
......@@ -598,6 +861,14 @@ void x264_macroblock_cache_init( x264_t *h )
for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */
h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) );
h->mb.list1ref0.ref = NULL;
h->mb.list1ref0.mv = NULL;
if( h->param.i_bframe )
{
h->mb.list1ref0.ref = x264_malloc( 4 * i_mb_count * sizeof( int8_t ) );
h->mb.list1ref0.mv = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) );
}
/* init with not avaiable (for top right idx=7,15) */
memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) );
......@@ -614,12 +885,18 @@ void x264_macroblock_cache_end( x264_t *h )
x264_free( h->mb.mvd[0] );
x264_free( h->mb.mvd[1] );
}
if( h->param.i_bframe )
{
x264_free( h->mb.list1ref0.ref );
x264_free( h->mb.list1ref0.mv );
}
x264_free( h->mb.mv[0] );
x264_free( h->mb.mv[1] );
x264_free( h->mb.ref[0] );
x264_free( h->mb.ref[1] );
x264_free( h->mb.intra4x4_pred_mode );
x264_free( h->mb.non_zero_count );
x264_free( h->mb.skipbp );
x264_free( h->mb.cbp );
x264_free( h->mb.qp );
x264_free( h->mb.type );
......@@ -923,6 +1200,29 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
}
}
/* load skip */
if( h->param.b_cabac )
{
if( h->sh.i_type == SLICE_TYPE_B )
{
memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
if( i_left_xy >= 0 )
{
h->mb.cache.skip[x264_scan8[0] - 1] = h->mb.skipbp[i_left_xy] & 0x2;
h->mb.cache.skip[x264_scan8[8] - 1] = h->mb.skipbp[i_left_xy] & 0x8;
}
if( i_top_xy >= 0 )
{
h->mb.cache.skip[x264_scan8[0] - 8] = h->mb.skipbp[i_top_xy] & 0x4;
h->mb.cache.skip[x264_scan8[4] - 8] = h->mb.skipbp[i_top_xy] & 0x8;
}
}
else if( h->mb.i_mb_xy == 0 && h->sh.i_type == SLICE_TYPE_P )
{
memset( h->mb.cache.skip, 0, X264_SCAN8_SIZE * sizeof( int8_t ) );
}
}
}
}
......@@ -1036,7 +1336,7 @@ void x264_macroblock_cache_save( x264_t *h )
else
h->mb.chroma_pred_mode[i_mb_xy] = I_PRED_CHROMA_DC;
if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) )
if( !IS_INTRA( i_mb_type ) && !IS_SKIP( i_mb_type ) && !IS_DIRECT( i_mb_type ) )
{
int i_list;
for( i_list = 0; i_list < 2; i_list++ )
......@@ -1070,6 +1370,27 @@ void x264_macroblock_cache_save( x264_t *h )
}
}
}
if( h->sh.i_type == SLICE_TYPE_B )
{
if( i_mb_type == B_SKIP || i_mb_type == B_DIRECT )
h->mb.skipbp[i_mb_xy] = 0xf;
else if( i_mb_type == B_8x8 )
{
int skipbp = 0;
for( i = 0; i < 4; i++ )
skipbp |= ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 ) << i;
h->mb.skipbp[i_mb_xy] = skipbp;
}
else
h->mb.skipbp[i_mb_xy] = 0;
}
}
}
void x264_macroblock_direct_ref_save( x264_t *h )
{
/* Manipulation of ref numbers is unnecessary unless we allow
* ref list reordering, multiple B-frame delay, or B-frames as refs. */
memcpy( h->mb.list1ref0.ref, h->mb.ref[0], 4 * h->mb.i_mb_count * sizeof( int8_t ) );
memcpy( h->mb.list1ref0.mv, h->mb.mv[0], 2*16 * h->mb.i_mb_count * sizeof( int16_t ) );
}
......@@ -37,6 +37,7 @@ enum macroblock_position_e
/* XXX mb_type isn't the one written in the bitstream -> only internal usage */
#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_16x16 )
#define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP )
#define IS_DIRECT(type) ( (type) == B_DIRECT )
enum mb_class_e
{
I_4x4 = 0,
......@@ -118,6 +119,21 @@ enum mb_partition_e
D_16x16 = 16,
};
static const int x264_mb_partition_listX_table[2][17] =
{{
1, 1, 1, 1, /* D_L0_* */
0, 0, 0, 0, /* D_L1_* */
1, 1, 1, 1, /* D_BI_* */
0, /* D_DIRECT_8x8 */
0, 0, 0, 0 /* 8x8 .. 16x16 */
},
{
0, 0, 0, 0, /* D_L0_* */
1, 1, 1, 1, /* D_L1_* */
1, 1, 1, 1, /* D_BI_* */
0, /* D_DIRECT_8x8 */
0, 0, 0, 0 /* 8x8 .. 16x16 */
}};
static const int x264_mb_partition_count_table[17] =
{
/* sub L0 */
......@@ -137,6 +153,8 @@ void x264_macroblock_cache_load( x264_t *h, int, int );
void x264_macroblock_cache_save( x264_t *h );
void x264_macroblock_cache_end( x264_t *h );
void x264_macroblock_direct_ref_save( x264_t *h );
void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int i_qscale );
void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int i_qscale );
void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale );
......@@ -150,14 +168,23 @@ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] );
* h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] );
/* x264_mb_predict_mv:
* set mvp with predicted mv for all blocks except P_SKIP
* set mvp with predicted mv for all blocks except SKIP and DIRECT
* h->mb. need valid ref/partition/sub of current block to be valid
* and valid mv/ref from other blocks . */
void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] );
/* x264_mb_predict_mv_direct16x16:
* set h->mb.cache.mv and h->mb.cache.ref for B_SKIP or B_DIRECT
* h->mb. need only valid values from other blocks
* return 1 on success, 0 on failure */
int x264_mb_predict_mv_direct16x16( x264_t *h );
/* x264_mb_load_mv_direct8x8:
* set h->mb.cache.mv and h->mb.cache.ref for B_DIRECT
* must be called only after x264_mb_predict_mv_direct16x16 */
void x264_mb_load_mv_direct8x8( x264_t *h, int idx );
/* x264_mb_predict_mv_ref16x16:
* set mvc with D_16x16 prediction.
* uses all neighbors, even those that didn't end up using this ref.
* need only valid values from other blocks */
* h->mb. need only valid values from other blocks */
void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc );
......@@ -204,6 +231,17 @@ static inline void x264_macroblock_cache_mvd( x264_t *h, int x, int y, int width
}
}
}
static inline void x264_macroblock_cache_skip( x264_t *h, int x, int y, int width, int height, int b_skip )
{
int dy, dx;
for( dy = 0; dy < height; dy++ )
{
for( dx = 0; dx < width; dx++ )
{
h->mb.cache.skip[X264_SCAN8_0+x+dx+8*(y+dy)] = b_skip;
}
}
}
#endif
......@@ -91,6 +91,11 @@ typedef struct
x264_mb_analysis_list_t l1;
int i_cost16x16bi; /* used the same ref and mv as l0 and l1 (at least for now) */
int i_cost16x16direct;
int i_cost8x8bi;
int i_cost8x8direct[4];
int b_direct_available;
} x264_mb_analysis_t;
......@@ -163,12 +168,15 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
a->l1.i_cost4x4[i] = -1;
a->l1.i_cost8x4[i] = -1;
a->l1.i_cost4x8[i] = -1;
a->i_cost8x8direct[i] = -1;
}
a->l1.i_cost16x8 = -1;
a->l1.i_cost8x16 = -1;
a->i_cost16x16bi = -1;
a->i_cost16x16direct = -1;
a->i_cost8x8bi = -1;
}
}
}
......@@ -719,6 +727,27 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8
a->l0.i_cost4x8[i8x8] = a->l0.me4x8[i8x8][0].cost + a->l0.me4x8[i8x8][1].cost;
}
static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
{
/* Assumes that fdec still contains the results of
* x264_mb_predict_mv_direct16x16 and x264_mb_mc */
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t *p_fdec = h->mb.pic.p_fdec[0];
int i_stride= h->mb.pic.i_stride[0];
int i;
a->i_cost16x16direct = 0;
for( i = 0; i < 4; i++ )
{
const int x8 = i%2;
const int y8 = i/2;
const int off = 8 * x8 + 8 * i_stride * y8;
a->i_cost16x16direct +=
a->i_cost8x8direct[i] =
h->pixf.satd[PIXEL_8x8]( &p_fenc[off], i_stride, &p_fdec[off], i_stride );
}
}
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
......@@ -796,6 +825,121 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
bs_size_se( a->l1.me16x16.mv[1] - a->l1.me16x16.mvp[1] ) );
}
static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t pix[2][8*8];
uint8_t *p_fref[2] = { h->mb.pic.p_fref[0][a->l0.i_ref][0],
h->mb.pic.p_fref[1][a->l1.i_ref][0] };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
int mvc[2][5][2], i_mvc[2];
int i, j;
/* XXX Needed for x264_mb_predict_mv */
h->mb.i_partition = D_8x8;
a->i_cost8x8bi = 0;
i_mvc[0] = i_mvc[1] = 1;
mvc[0][0][0] = a->l0.me16x16.mv[0];
mvc[0][0][1] = a->l0.me16x16.mv[1];
mvc[1][0][0] = a->l1.me16x16.mv[0];
mvc[1][0][1] = a->l1.me16x16.mv[1];
for( i = 0; i < 4; i++ )
{
const int x8 = i%2;
const int y8 = i/2;
uint8_t *p_fenc_i = &p_fenc[8*(y8*h->mb.pic.i_stride[0]+x8)];
int i_part_cost;
int i_part_cost_bi = 0;
for( j = 0; j < 2; j++ )
{
x264_mb_analysis_list_t *l = j ? &a->l1 : &a->l0;
x264_me_t *m = &l->me8x8[i];
m->i_pixel = PIXEL_8x8;
m->lm = a->i_lambda;
m->p_fenc = p_fenc_i;
m->p_fref = &p_fref[j][8*(y8*h->mb.pic.i_stride[0]+x8)];
m->i_stride = h->mb.pic.i_stride[0];
m->i_mv_range = a->i_mv_range;
x264_mb_predict_mv( h, j, 4*i, 2, m->mvp );
x264_me_search( h, m, mvc[j], i_mvc[j] );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, j, m->mv[0], m->mv[1] );
l->i_cost8x8 += m->cost;
/* BI mode */
h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[j], 8,
m->mv[0], m->mv[1], 8, 8 );
/* FIXME: add ref cost */
i_part_cost_bi += a->i_lambda * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
bs_size_se( m->mv[1] - m->mvp[1] ) );
}
h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
i_part_cost = a->l0.me8x8[i].cost;
h->mb.i_sub_partition[i] = D_L0_8x8;
if( a->l1.me8x8[i].cost < i_part_cost )
{
i_part_cost = a->l1.me8x8[i].cost;
h->mb.i_sub_partition[i] = D_L1_8x8;
}
if( i_part_cost_bi < i_part_cost )
{
i_part_cost = i_part_cost_bi;
h->mb.i_sub_partition[i] = D_BI_8x8;
}
if( a->i_cost8x8direct[i] < i_part_cost && a->i_cost8x8direct[i] >= 0)
{
i_part_cost = a->i_cost8x8direct[i];
h->mb.i_sub_partition[i] = D_DIRECT_8x8;
}
a->i_cost8x8bi += i_part_cost;
/* XXX Needed for x264_mb_predict_mv */
if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
{
x264_mb_load_mv_direct8x8( h, i );
x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 0, 0, 0 );
x264_macroblock_cache_mvd( h, 2*x8, 2*y8, 2, 2, 1, 0, 0 );
x264_macroblock_cache_skip( h, 2*x8, 2*y8, 2, 2, 1 );
}
else
{
if( h->mb.i_sub_partition[i] == D_L1_8x8 )