Commit 1f0e78d8 authored by Fiona Glaser's avatar Fiona Glaser

Eliminate support for direct_8x8_inference=0

The benefit in the most extreme contrived situation was at most 0.001db PSNR, at the cost of slower decoding.
As this option was basically useless, it was a waste of code and prevented some other useful optimizations.
Remove some unused mc code related to sub-8x8 partitions.
Small deblocking speedup when p4x4 is used.
Also remove unused x264_nal_decode prototype from x264.h.
parent 71e87fae
......@@ -123,7 +123,6 @@ void x264_param_default( x264_param_t *param )
param->analyse.b_chroma_me = 1;
param->analyse.i_mv_range_thread = -1;
param->analyse.i_mv_range = -1; // set from level_idc
param->analyse.i_direct_8x8_inference = 1;
param->analyse.i_chroma_qp_offset = 0;
param->analyse.b_fast_pskip = 1;
param->analyse.b_dct_decimate = 1;
......@@ -458,8 +457,6 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->analyse.b_weighted_bipred = atobool(value);
OPT2("direct", "direct-pred")
b_error |= parse_enum( value, x264_direct_pred_names, &p->analyse.i_direct_mv_pred );
OPT("direct-8x8")
p->analyse.i_direct_8x8_inference = atoi(value);
OPT("chroma-qp-offset")
p->analyse.i_chroma_qp_offset = atoi(value);
OPT("me")
......
......@@ -440,7 +440,7 @@ struct x264_t
/* current value */
int i_type;
int i_partition;
int i_sub_partition[4];
DECLARE_ALIGNED_4( uint8_t i_sub_partition[4] );
int b_transform_8x8;
int i_cbp_luma;
......
......@@ -623,7 +623,6 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
const int b_interlaced = h->sh.b_mbaff;
const int mvy_limit = 4 >> b_interlaced;
const int qp_thresh = 15 - X264_MIN(h->sh.i_alpha_c0_offset, h->sh.i_beta_offset) - X264_MAX(0, h->param.analyse.i_chroma_qp_offset);
const int no_sub8x8 = !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
int mb_x;
int stridey = h->fdec->i_stride[0];
int stride2y = stridey << b_interlaced;
......@@ -641,6 +640,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
const int i_qp = h->mb.qp[mb_xy];
int i_edge_end = (h->mb.type[mb_xy] == P_SKIP) ? 1 : 4;
int no_sub8x8 = h->mb.type[mb_xy] != P_8x8 || !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
uint8_t *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
uint8_t *pixu = h->fdec->plane[1] + 8*mb_y*strideuv + 8*mb_x;
uint8_t *pixv = h->fdec->plane[2] + 8*mb_y*strideuv + 8*mb_x;
......
......@@ -157,7 +157,6 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
int i_mb_4x4 = 16 * h->mb.i_mb_stride * h->mb.i_mb_y + 4 * h->mb.i_mb_x;
int i_mb_8x8 = 4 * h->mb.i_mb_stride * h->mb.i_mb_y + 2 * h->mb.i_mb_x;
int i8, i4;
int b8x8;
const int type_col = h->fref1[0]->mb_type[ h->mb.i_mb_xy ];
x264_macroblock_cache_ref( h, 0, 0, 4, 4, 1, 0 );
......@@ -169,8 +168,6 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, 0 );
return 1;
}
b8x8 = h->sps->b_direct8x8_inference ||
(type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8);
for( i8 = 0; i8 < 4; i8++ )
{
......@@ -182,30 +179,12 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
if( i_ref >= 0 )
{
const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
x264_macroblock_cache_ref( h, 2*x8, 2*y8, 2, 2, 0, i_ref );
if( b8x8 )
{
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + 3*x8 + 3*y8 * h->mb.i_b4_stride];
const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
else
{
for( i4 = 0; i4 < 4; i4++ )
{
const int x4 = i4%2 + 2*x8;
const int y4 = i4/2 + 2*y8;
const int16_t *mv_col = h->fref1[0]->mv[0][ i_mb_4x4 + x4 + y4 * h->mb.i_b4_stride ];
const int l0x = ( dist_scale_factor * mv_col[0] + 128 ) >> 8;
const int l0y = ( dist_scale_factor * mv_col[1] + 128 ) >> 8;
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, pack16to32_mask(l0x, l0y) );
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
}
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, pack16to32_mask(l0x, l0y) );
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, pack16to32_mask(l0x-mv_col[0], l0y-mv_col[1]) );
}
else
{
......@@ -220,8 +199,7 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
if( h->param.i_threads > 1 )
{
int di = b8x8 ? 4 : 1;
for( i4=0; i4<16; i4+=di )
for( i4=0; i4<16; i4+=4 )
{
if( h->mb.cache.mv[0][x264_scan8[i4]][1] > h->mb.mv_max_spel[1]
|| h->mb.cache.mv[1][x264_scan8[i4]][1] > h->mb.mv_max_spel[1] )
......@@ -247,8 +225,7 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
int ref[2];
DECLARE_ALIGNED_8( int16_t mv[2][2] );
int i_list;
int i8, i4;
int b8x8;
int i8;
const int8_t *l1ref0 = &h->fref1[0]->ref[0][ h->mb.i_b8_xy ];
const int8_t *l1ref1 = &h->fref1[0]->ref[1][ h->mb.i_b8_xy ];
const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->fref1[0]->mv[0][ h->mb.i_b4_xy ];
......@@ -310,9 +287,6 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
if( IS_INTRA( type_col ) || (ref[0]&&ref[1]) )
return 1;
b8x8 = h->sps->b_direct8x8_inference ||
(type_col != P_8x8 && type_col != B_SKIP && type_col != B_DIRECT && type_col != B_8x8);
/* col_zero_flag */
for( i8=0; i8<4; i8++ )
{
......@@ -322,32 +296,13 @@ static int x264_mb_predict_mv_direct16x16_spatial( x264_t *h )
if( l1ref0[o8] == 0 || ( l1ref0[o8] < 0 && l1ref1[o8] == 0 ) )
{
const int16_t (*l1mv)[2] = (l1ref0[o8] == 0) ? l1mv0 : l1mv1;
if( b8x8 )
const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride];
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
const int16_t *mvcol = l1mv[3*x8 + 3*y8 * h->mb.i_b4_stride];
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
if( ref[0] == 0 )
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
if( ref[1] == 0 )
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
}
}
else
{
for( i4=0; i4<4; i4++ )
{
const int x4 = i4%2 + 2*x8;
const int y4 = i4/2 + 2*y8;
const int16_t *mvcol = l1mv[x4 + y4 * h->mb.i_b4_stride];
if( abs( mvcol[0] ) <= 1 && abs( mvcol[1] ) <= 1 )
{
if( ref[0] == 0 )
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, 0 );
if( ref[1] == 0 )
x264_macroblock_cache_mv( h, x4, y4, 1, 1, 1, 0 );
}
}
if( ref[0] == 0 )
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, 0 );
if( ref[1] == 0 )
x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 1, 0 );
}
}
}
......@@ -594,44 +549,13 @@ static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
{
const int i8 = x264_scan8[0] + x + 8*y;
/* FIXME: optimize based on current block size, not global settings? */
if( h->sps->b_direct8x8_inference )
{
if( h->mb.cache.ref[0][i8] >= 0 )
if( h->mb.cache.ref[1][i8] >= 0 )
x264_mb_mc_01xywh( h, x, y, 2, 2 );
else
x264_mb_mc_0xywh( h, x, y, 2, 2 );
if( h->mb.cache.ref[0][i8] >= 0 )
if( h->mb.cache.ref[1][i8] >= 0 )
x264_mb_mc_01xywh( h, x, y, 2, 2 );
else
x264_mb_mc_1xywh( h, x, y, 2, 2 );
}
x264_mb_mc_0xywh( h, x, y, 2, 2 );
else
{
if( h->mb.cache.ref[0][i8] >= 0 )
{
if( h->mb.cache.ref[1][i8] >= 0 )
{
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
}
else
{
x264_mb_mc_0xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_0xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_0xywh( h, x+1, y+1, 1, 1 );
}
}
else
{
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
}
}
x264_mb_mc_1xywh( h, x, y, 2, 2 );
}
void x264_mb_mc_8x8( x264_t *h, int i8 )
......@@ -660,37 +584,9 @@ void x264_mb_mc_8x8( x264_t *h, int i8 )
case D_L1_8x8:
x264_mb_mc_1xywh( h, x, y, 2, 2 );
break;
case D_L1_8x4:
x264_mb_mc_1xywh( h, x, y+0, 2, 1 );
x264_mb_mc_1xywh( h, x, y+1, 2, 1 );
break;
case D_L1_4x8:
x264_mb_mc_1xywh( h, x+0, y, 1, 2 );
x264_mb_mc_1xywh( h, x+1, y, 1, 2 );
break;
case D_L1_4x4:
x264_mb_mc_1xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_1xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_1xywh( h, x+1, y+1, 1, 1 );
break;
case D_BI_8x8:
x264_mb_mc_01xywh( h, x, y, 2, 2 );
break;
case D_BI_8x4:
x264_mb_mc_01xywh( h, x, y+0, 2, 1 );
x264_mb_mc_01xywh( h, x, y+1, 2, 1 );
break;
case D_BI_4x8:
x264_mb_mc_01xywh( h, x+0, y, 1, 2 );
x264_mb_mc_01xywh( h, x+1, y, 1, 2 );
break;
case D_BI_4x4:
x264_mb_mc_01xywh( h, x+0, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+0, 1, 1 );
x264_mb_mc_01xywh( h, x+0, y+1, 1, 1 );
x264_mb_mc_01xywh( h, x+1, y+1, 1, 1 );
break;
case D_DIRECT_8x8:
x264_mb_mc_direct8x8( h, x, y );
break;
......
......@@ -455,36 +455,14 @@ static inline int x264_mb_transform_8x8_allowed( x264_t *h )
// large partitions are allowed
// direct and 8x8 are conditional
static const uint8_t partition_tab[X264_MBTYPE_MAX] = {
0,0,0,0,1,2,0,2,1,1,1,1,1,1,1,1,1,2,0,
0,0,0,0,1,2,0,1,1,1,1,1,1,1,1,1,1,1,0,
};
int p, i;
if( !h->pps->b_transform_8x8_mode )
return 0;
p = partition_tab[h->mb.i_type];
if( p < 2 )
return p;
else if( h->mb.i_type == B_DIRECT )
return h->sps->b_direct8x8_inference;
else if( h->mb.i_type == P_8x8 )
{
if( !(h->param.analyse.inter & X264_ANALYSE_PSUB8x8) )
return 1;
for( i=0; i<4; i++ )
if( h->mb.i_sub_partition[i] != D_L0_8x8 )
return 0;
return 1;
}
else // B_8x8
{
// x264 currently doesn't use sub-8x8 B partitions, so don't check for them
if( h->sps->b_direct8x8_inference )
return 1;
for( i=0; i<4; i++ )
if( h->mb.i_sub_partition[i] == D_DIRECT_8x8 )
return 0;
return 1;
}
if( h->mb.i_type != P_8x8 )
return partition_tab[h->mb.i_type];
return *(uint32_t*)h->mb.i_sub_partition == D_L0_8x8*0x01010101;
}
#endif
......
......@@ -549,8 +549,6 @@ static int x264_validate_parameters( x264_t *h )
h->param.analyse.i_mv_range = l->mv_range >> h->param.b_interlaced;
else
h->param.analyse.i_mv_range = x264_clip3(h->param.analyse.i_mv_range, 32, 512 >> h->param.b_interlaced);
if( h->param.analyse.i_direct_8x8_inference < 0 )
h->param.analyse.i_direct_8x8_inference = l->direct8x8;
}
if( h->param.i_threads > 1 )
......@@ -595,7 +593,6 @@ static int x264_validate_parameters( x264_t *h )
BOOLIFY( b_deblocking_filter );
BOOLIFY( b_interlaced );
BOOLIFY( analyse.b_transform_8x8 );
BOOLIFY( analyse.i_direct_8x8_inference );
BOOLIFY( analyse.b_chroma_me );
BOOLIFY( analyse.b_fast_pskip );
BOOLIFY( rc.b_stat_write );
......
......@@ -133,9 +133,7 @@ void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1;
sps->b_frame_mbs_only = ! param->b_interlaced;
sps->b_mb_adaptive_frame_field = param->b_interlaced;
sps->b_direct8x8_inference = param->analyse.i_direct_8x8_inference
|| ! sps->b_frame_mbs_only
|| !(param->analyse.inter & X264_ANALYSE_PSUB8x8);
sps->b_direct8x8_inference = 1;
sps->crop.i_left = 0;
sps->crop.i_top = 0;
......@@ -565,8 +563,6 @@ int x264_validate_levels( x264_t *h, int verbose )
if( h->param.i_fps_den > 0 )
CHECK( "MB rate", l->mbps, (int64_t)mbs * h->param.i_fps_num / h->param.i_fps_den );
if( h->sps->b_direct8x8_inference < l->direct8x8 )
ERROR( "direct 8x8 inference (0) < level requirement (1)\n" );
/* TODO check the rest of the limits */
return ret;
......
......@@ -233,11 +233,6 @@ static void Help( x264_param_t *defaults, int b_longhelp )
H0( " --direct <string> Direct MV prediction mode [\"%s\"]\n"
" - none, spatial, temporal, auto\n",
strtable_lookup( x264_direct_pred_names, defaults->analyse.i_direct_mv_pred ) );
H1( " --direct-8x8 <-1|0|1> Direct prediction size [%d]\n"
" - 0: 4x4\n"
" - 1: 8x8\n"
" - -1: smallest possible according to level\n",
defaults->analyse.i_direct_8x8_inference );
H0( " -w, --weightb Weighted prediction for B-frames\n" );
H0( " --me <string> Integer pixel motion estimation method [\"%s\"]\n",
strtable_lookup( x264_motion_est_names, defaults->analyse.i_me_method ) );
......@@ -425,7 +420,6 @@ static int Parse( int argc, char **argv,
{ "analyse", required_argument, NULL, 0 },
{ "partitions", required_argument, NULL, 'A' },
{ "direct", required_argument, NULL, 0 },
{ "direct-8x8", required_argument, NULL, 0 },
{ "weightb", no_argument, NULL, 'w' },
{ "me", required_argument, NULL, 0 },
{ "merange", required_argument, NULL, 0 },
......
......@@ -35,7 +35,7 @@
#include <stdarg.h>
#define X264_BUILD 65
#define X264_BUILD 66
/* x264_t:
* opaque handler for encoder */
......@@ -228,7 +228,6 @@ typedef struct x264_param_t
int b_transform_8x8;
int b_weighted_bipred; /* implicit weighting for B-frames */
int i_direct_mv_pred; /* spatial vs temporal mv prediction */
int i_direct_8x8_inference; /* forbid 4x4 direct partitions. -1 = auto, based on level */
int i_chroma_qp_offset;
int i_me_method; /* motion estimation algorithm to use (X264_ME_*) */
......@@ -409,10 +408,6 @@ typedef struct
* XXX: it currently doesn't check for overflow */
int x264_nal_encode( void *, int *, int b_annexeb, x264_nal_t *nal );
/* x264_nal_decode:
* decode a buffer nal into a x264_nal_t */
int x264_nal_decode( x264_nal_t *nal, void *, int );
/****************************************************************************
* Encoder functions:
****************************************************************************/
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment