Commit 835ccc3c authored by Fiona Glaser's avatar Fiona Glaser

Macroblock-tree ratecontrol

On by default; can be turned off with --no-mbtree.
Uses a large lookahead to track temporal propagation of data and weight quality accordingly.
Requires a very large separate statsfile (2 bytes per macroblock) in multi-pass mode.
Doesn't work with b-pyramid yet.
Note that MB-tree inherently measures quality different from the standard qcomp method, so bitrates produced by CRF may change somewhat.
This makes the "medium" preset a bit slower.  Accordingly, make "fast" slower as well, and introduce a new preset "faster" between "fast" and "veryfast".
All presets "fast" and above will have MB-tree on.
Add a new option, --rc-lookahead, to control the distance MB tree looks ahead to perform propagation analysis.
Default is 40; larger values will be slower and require more memory but give more accurate results.
This value will be used in the future to control ratecontrol lookahead (VBV).
Add a new option, --no-psy, to disable all psy optimizations that don't improve PSNR or SSIM.
This disables psy-RD/trellis, but also other more subtle internal psy optimizations that can't be controlled directly via external parameters.
Quality improvement from MB-tree is about 2-70% depending on content.
Strength of MB-tree adjustments can be tweaked using qcompress; higher values mean lower MB-tree strength.
Note that MB-tree may perform slightly suboptimally on fades; this will be fixed by weighted prediction, which is coming soon.
parent 93cc2893
......@@ -95,6 +95,7 @@ void x264_param_default( x264_param_t *param )
param->rc.f_pb_factor = 1.3;
param->rc.i_aq_mode = X264_AQ_VARIANCE;
param->rc.f_aq_strength = 1.0;
param->rc.i_lookahead = 40;
param->rc.b_stat_write = 0;
param->rc.psz_stat_out = "x264_2pass.log";
......@@ -104,6 +105,7 @@ void x264_param_default( x264_param_t *param )
param->rc.f_qblur = 0.5;
param->rc.f_complexity_blur = 20;
param->rc.i_zones = 0;
param->rc.b_mb_tree = 1;
/* Log */
param->pf_log = x264_log_default;
......@@ -117,6 +119,7 @@ void x264_param_default( x264_param_t *param )
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
param->analyse.i_me_method = X264_ME_HEX;
param->analyse.f_psy_rd = 1.0;
param->analyse.b_psy = 1;
param->analyse.f_psy_trellis = 0;
param->analyse.i_me_range = 16;
param->analyse.i_subpel_refine = 7;
......@@ -493,6 +496,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->analyse.f_psy_trellis = 0;
}
}
OPT("psy")
p->analyse.b_psy = atobool(value);
OPT("chroma-me")
p->analyse.b_chroma_me = atobool(value);
OPT("mixed-refs")
......@@ -524,6 +529,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->rc.f_rf_constant = atof(value);
p->rc.i_rc_method = X264_RC_CRF;
}
OPT("rc-lookahead")
p->rc.i_lookahead = atoi(value);
OPT2("qpmin", "qp-min")
p->rc.i_qp_min = atoi(value);
OPT2("qpmax", "qp-max")
......@@ -559,6 +566,8 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
}
OPT("qcomp")
p->rc.f_qcompress = atof(value);
OPT("mbtree")
p->rc.b_mb_tree = atobool(value);
OPT("qblur")
p->rc.f_qblur = atof(value);
OPT2("cplxblur", "cplx-blur")
......@@ -843,7 +852,9 @@ char *x264_param2string( x264_param_t *p, int b_res )
s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
s += sprintf( s, " psy=%d", p->analyse.b_psy );
if( p->analyse.b_psy )
s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
s += sprintf( s, " chroma_me=%d", p->analyse.b_chroma_me );
......@@ -868,9 +879,12 @@ char *x264_param2string( x264_param_t *p, int b_res )
s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d",
p->i_keyint_max, p->i_keyint_min, p->i_scenecut_threshold );
s += sprintf( s, " rc=%s", p->rc.i_rc_method == X264_RC_ABR ?
if( p->rc.b_mb_tree )
s += sprintf( s, " rc_lookahead=%d", p->rc.i_lookahead );
s += sprintf( s, " rc=%s mbtree=%d", p->rc.i_rc_method == X264_RC_ABR ?
( p->rc.b_stat_read ? "2pass" : p->rc.i_vbv_buffer_size ? "cbr" : "abr" )
: p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp" );
: p->rc.i_rc_method == X264_RC_CRF ? "crf" : "cqp", p->rc.b_mb_tree );
if( p->rc.i_rc_method == X264_RC_ABR || p->rc.i_rc_method == X264_RC_CRF )
{
if( p->rc.i_rc_method == X264_RC_CRF )
......@@ -892,7 +906,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
if( !(p->rc.i_rc_method == X264_RC_CQP && p->rc.i_qp_constant == 0) )
{
s += sprintf( s, " ip_ratio=%.2f", p->rc.f_ip_factor );
if( p->i_bframe )
if( p->i_bframe && !p->rc.b_mb_tree )
s += sprintf( s, " pb_ratio=%.2f", p->rc.f_pb_factor );
s += sprintf( s, " aq=%d", p->rc.i_aq_mode );
if( p->rc.i_aq_mode )
......
......@@ -51,6 +51,7 @@
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
#define X264_PCM_COST (386*8)
#define X264_LOOKAHEAD_MAX 250
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
......@@ -152,6 +153,49 @@ static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop )
return amvd0 + (amvd1<<16);
}
static const uint8_t exp2_lut[64] = {
1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
};
static ALWAYS_INLINE int x264_exp2fix8( float x )
{
int i, f;
x += 8;
if( x <= 0 ) return 0;
if( x >= 16 ) return 0xffff;
i = x;
f = (x-i)*64;
return (exp2_lut[f]+256) << i >> 8;
}
static const float log2_lut[128] = {
0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};
static ALWAYS_INLINE float x264_log2( uint32_t x )
{
int lz = x264_clz( x );
return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
}
/****************************************************************************
*
****************************************************************************/
......@@ -327,11 +371,11 @@ struct x264_t
struct
{
/* Frames to be encoded (whose types have been decided) */
x264_frame_t *current[X264_BFRAME_MAX*4+3];
x264_frame_t *current[X264_LOOKAHEAD_MAX+3];
/* Temporary buffer (frames types not yet decided) */
x264_frame_t *next[X264_BFRAME_MAX*4+3];
x264_frame_t *next[X264_LOOKAHEAD_MAX+3];
/* Unused frames */
x264_frame_t *unused[X264_BFRAME_MAX*4 + X264_THREAD_MAX*2 + 16+4];
x264_frame_t *unused[X264_LOOKAHEAD_MAX + X264_THREAD_MAX*2 + 16+4];
/* For adaptive B decision */
x264_frame_t *last_nonb;
......
......@@ -96,6 +96,15 @@ x264_frame_t *x264_frame_new( x264_t *h )
memset( frame->lowres_mvs[j][i], 0, 2*h->mb.i_mb_count*sizeof(int16_t) );
CHECKED_MALLOC( frame->lowres_mv_costs[j][i], h->mb.i_mb_count*sizeof(int) );
}
CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
memset( frame->i_intra_cost, -1, i_mb_count * sizeof(uint16_t) );
CHECKED_MALLOC( frame->i_propagate_cost, i_mb_count * sizeof(uint32_t) );
for( j = 0; j <= h->param.i_bframe+1; j++ )
for( i = 0; i <= h->param.i_bframe+1; i++ )
{
CHECKED_MALLOC( frame->lowres_costs[j][i], i_mb_count * sizeof(uint16_t) );
CHECKED_MALLOC( frame->lowres_inter_types[j][i], i_mb_count * sizeof(uint8_t) );
}
}
if( h->param.analyse.i_me_method >= X264_ME_ESA )
......@@ -116,7 +125,6 @@ x264_frame_t *x264_frame_new( x264_t *h )
CHECKED_MALLOC( frame->mb_type, i_mb_count * sizeof(int8_t));
CHECKED_MALLOC( frame->mv[0], 2*16 * i_mb_count * sizeof(int16_t) );
CHECKED_MALLOC( frame->ref[0], 4 * i_mb_count * sizeof(int8_t) );
CHECKED_MALLOC( frame->i_intra_cost, i_mb_count * sizeof(uint16_t) );
if( h->param.i_bframe )
{
CHECKED_MALLOC( frame->mv[1], 2*16 * i_mb_count * sizeof(int16_t) );
......
......@@ -63,6 +63,8 @@ typedef struct
int8_t *mb_type;
int16_t (*mv[2])[2];
int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
uint8_t (*lowres_inter_types[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
int *lowres_mv_costs[2][X264_BFRAME_MAX+1];
int8_t *ref[2];
int i_ref[2];
......@@ -83,6 +85,7 @@ typedef struct
float *f_qp_offset;
int b_intra_calculated;
uint16_t *i_intra_cost;
uint32_t *i_propagate_cost;
uint16_t *i_inv_qscale_factor;
/* threading */
......
......@@ -147,7 +147,9 @@
#ifdef WORDS_BIGENDIAN
#define endian_fix(x) (x)
#define endian_fix32(x) (x)
#elif defined(__GNUC__) && defined(HAVE_MMX)
#define endian_fix16(x) (x)
#else
#if defined(__GNUC__) && defined(HAVE_MMX)
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
asm("bswap %0":"+r"(x));
......@@ -171,6 +173,11 @@ static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
return endian_fix32(x);
}
#endif
static ALWAYS_INLINE uint16_t endian_fix16( uint16_t x )
{
return (x<<8)|(x>>8);
}
#endif
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 3)
#define x264_clz(x) __builtin_clz(x)
......
......@@ -276,8 +276,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
}
h->mb.i_psy_rd_lambda = a->i_lambda;
/* Adjusting chroma lambda based on QP offset hurts PSNR, so we'll leave it as part of psy-RD. */
h->mb.i_chroma_lambda2_offset = h->mb.i_psy_rd ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
/* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
h->mb.i_me_method = h->param.analyse.i_me_method;
h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine;
......
......@@ -42,7 +42,7 @@
#define bs_write_ue bs_write_ue_big
static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out );
......@@ -441,6 +441,7 @@ static int x264_validate_parameters( x264_t *h )
h->param.rc.i_qp_min = x264_clip3( (int)(X264_MIN3( qp_p, qp_i, qp_b )), 0, 51 );
h->param.rc.i_qp_max = x264_clip3( (int)(X264_MAX3( qp_p, qp_i, qp_b ) + .999), 0, 51 );
h->param.rc.i_aq_mode = 0;
h->param.rc.b_mb_tree = 0;
}
h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, 51 );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
......@@ -473,6 +474,15 @@ static int x264_validate_parameters( x264_t *h )
if( !h->param.i_bframe )
h->param.i_bframe_adaptive = X264_B_ADAPT_NONE;
h->param.analyse.b_weighted_bipred = h->param.analyse.b_weighted_bipred && h->param.i_bframe > 0;
h->param.rc.i_lookahead = x264_clip3( h->param.rc.i_lookahead, 0, X264_LOOKAHEAD_MAX );
h->param.rc.i_lookahead = X264_MIN( h->param.rc.i_lookahead, h->param.i_keyint_max );
if( h->param.rc.b_stat_read )
h->param.rc.i_lookahead = 0;
else if( !h->param.rc.i_lookahead )
h->param.rc.b_mb_tree = 0;
if( h->param.rc.f_qcompress == 1 )
h->param.rc.b_mb_tree = 0;
h->mb.b_direct_auto_write = h->param.analyse.i_direct_mv_pred == X264_DIRECT_PRED_AUTO
&& h->param.i_bframe
&& ( h->param.rc.b_stat_write || !h->param.rc.b_stat_read );
......@@ -513,6 +523,11 @@ static int x264_validate_parameters( x264_t *h )
if( !h->param.b_cabac )
h->param.analyse.i_trellis = 0;
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
if( !h->param.analyse.b_psy )
{
h->param.analyse.f_psy_rd = 0;
h->param.analyse.f_psy_trellis = 0;
}
if( !h->param.analyse.i_trellis )
h->param.analyse.f_psy_trellis = 0;
h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
......@@ -537,6 +552,17 @@ static int x264_validate_parameters( x264_t *h )
h->param.rc.f_aq_strength = x264_clip3f( h->param.rc.f_aq_strength, 0, 3 );
if( h->param.rc.f_aq_strength == 0 )
h->param.rc.i_aq_mode = 0;
/* MB-tree requires AQ to be on, even if the strength is zero. */
if( !h->param.rc.i_aq_mode && h->param.rc.b_mb_tree )
{
h->param.rc.i_aq_mode = 1;
h->param.rc.f_aq_strength = 0;
}
if( h->param.rc.b_mb_tree && h->param.b_bframe_pyramid )
{
x264_log( h, X264_LOG_WARNING, "b-pyramid + mb-tree is not supported\n" );
h->param.b_bframe_pyramid = 0;
}
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) )
h->param.analyse.i_subpel_refine = 9;
......@@ -723,6 +749,9 @@ x264_t *x264_encoder_open ( x264_param_t *param )
h->frames.i_delay = X264_MAX(h->param.i_bframe,3)*4 + h->param.i_threads - 1;
else
h->frames.i_delay = h->param.i_bframe + h->param.i_threads - 1;
if( h->param.rc.b_mb_tree )
h->frames.i_delay = X264_MAX( h->frames.i_delay, h->param.rc.i_lookahead );
h->frames.i_max_ref0 = h->param.i_frame_reference;
h->frames.i_max_ref1 = h->sps->vui.i_num_reorder_frames;
h->frames.i_max_dpb = h->sps->vui.i_max_dec_frame_buffering;
......@@ -730,7 +759,8 @@ x264_t *x264_encoder_open ( x264_param_t *param )
&& ( h->param.rc.i_rc_method == X264_RC_ABR
|| h->param.rc.i_rc_method == X264_RC_CRF
|| h->param.i_bframe_adaptive
|| h->param.i_scenecut_threshold );
|| h->param.i_scenecut_threshold
|| h->param.rc.b_mb_tree );
h->frames.b_have_lowres |= (h->param.rc.b_stat_read && h->param.rc.i_vbv_buffer_size > 0);
h->frames.b_have_sub8x8_esa = !!(h->param.analyse.inter & X264_ANALYSE_PSUB8x8);
......@@ -1443,7 +1473,12 @@ int x264_encoder_encode( x264_t *h,
if( h->frames.b_have_lowres )
x264_frame_init_lowres( h, fenc );
if( h->param.rc.i_aq_mode )
if( h->param.rc.b_mb_tree && h->param.rc.b_stat_read )
{
if( x264_macroblock_tree_read( h, fenc ) )
return -1;
}
else if( h->param.rc.i_aq_mode )
x264_adaptive_quant_frame( h, fenc );
if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
......@@ -1461,7 +1496,8 @@ int x264_encoder_encode( x264_t *h,
/* 2: Select frame types */
if( h->frames.next[0] == NULL )
{
x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
return -1;
return 0;
}
......@@ -1621,11 +1657,12 @@ int x264_encoder_encode( x264_t *h,
else
x264_slices_write( h );
x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out );
if( x264_encoder_frame_end( thread_oldest, thread_current, pp_nal, pi_nal, pic_out ) < 0 )
return -1;
return 0;
}
static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out )
{
......@@ -1640,7 +1677,7 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
if( !h->out.i_nal )
{
pic_out->i_type = X264_TYPE_AUTO;
return;
return 0;
}
x264_frame_push_unused( thread_current, h->fenc );
......@@ -1670,7 +1707,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
/* update rc */
x264_emms();
x264_ratecontrol_end( h, h->out.i_frame_size * 8 );
if( x264_ratecontrol_end( h, h->out.i_frame_size * 8 ) < 0 )
return -1;
/* restore CPU state (before using float again) */
x264_emms();
......@@ -1784,6 +1822,8 @@ static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
if( h->param.psz_dump_yuv )
x264_frame_dump( h );
return 0;
}
static void x264_print_intra( int64_t *i_mb_count, double i_count, int b_print_pcm, char *intra )
......
......@@ -71,6 +71,7 @@ struct x264_ratecontrol_t
double fps;
double bitrate;
double rate_tolerance;
double qcompress;
int nmb; /* number of macroblocks in a frame */
int qp_constant[5];
......@@ -106,6 +107,10 @@ struct x264_ratecontrol_t
/* 2pass stuff */
FILE *p_stat_file_out;
char *psz_stat_file_tmpname;
FILE *p_mbtree_stat_file_out;
char *psz_mbtree_stat_file_tmpname;
char *psz_mbtree_stat_file_name;
FILE *p_mbtree_stat_file_in;
int num_entries; /* number of ratecontrol_entry_ts */
ratecontrol_entry_t *entry; /* FIXME: copy needed data and free this once init is done */
......@@ -118,6 +123,7 @@ struct x264_ratecontrol_t
double lmin[5]; /* min qscale by frame type */
double lmax[5];
double lstep; /* max change (multiply) in qscale per frame */
uint16_t *qp_buffer; /* Global buffer for converting MB-tree quantizer data. */
/* MBRC stuff */
double frame_size_estimated;
......@@ -191,49 +197,6 @@ static NOINLINE uint32_t ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame
return var;
}
static const float log2_lut[128] = {
0.00000, 0.01123, 0.02237, 0.03342, 0.04439, 0.05528, 0.06609, 0.07682,
0.08746, 0.09803, 0.10852, 0.11894, 0.12928, 0.13955, 0.14975, 0.15987,
0.16993, 0.17991, 0.18982, 0.19967, 0.20945, 0.21917, 0.22882, 0.23840,
0.24793, 0.25739, 0.26679, 0.27612, 0.28540, 0.29462, 0.30378, 0.31288,
0.32193, 0.33092, 0.33985, 0.34873, 0.35755, 0.36632, 0.37504, 0.38370,
0.39232, 0.40088, 0.40939, 0.41785, 0.42626, 0.43463, 0.44294, 0.45121,
0.45943, 0.46761, 0.47573, 0.48382, 0.49185, 0.49985, 0.50779, 0.51570,
0.52356, 0.53138, 0.53916, 0.54689, 0.55459, 0.56224, 0.56986, 0.57743,
0.58496, 0.59246, 0.59991, 0.60733, 0.61471, 0.62205, 0.62936, 0.63662,
0.64386, 0.65105, 0.65821, 0.66534, 0.67243, 0.67948, 0.68650, 0.69349,
0.70044, 0.70736, 0.71425, 0.72110, 0.72792, 0.73471, 0.74147, 0.74819,
0.75489, 0.76155, 0.76818, 0.77479, 0.78136, 0.78790, 0.79442, 0.80090,
0.80735, 0.81378, 0.82018, 0.82655, 0.83289, 0.83920, 0.84549, 0.85175,
0.85798, 0.86419, 0.87036, 0.87652, 0.88264, 0.88874, 0.89482, 0.90087,
0.90689, 0.91289, 0.91886, 0.92481, 0.93074, 0.93664, 0.94251, 0.94837,
0.95420, 0.96000, 0.96578, 0.97154, 0.97728, 0.98299, 0.98868, 0.99435,
};
static const uint8_t exp2_lut[64] = {
1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 44, 47,
50, 53, 57, 60, 64, 67, 71, 74, 78, 81, 85, 89, 93, 96, 100, 104,
108, 112, 116, 120, 124, 128, 132, 137, 141, 145, 150, 154, 159, 163, 168, 172,
177, 182, 186, 191, 196, 201, 206, 211, 216, 221, 226, 232, 237, 242, 248, 253,
};
static ALWAYS_INLINE float x264_log2( uint32_t x )
{
int lz = x264_clz( x );
return log2_lut[(x<<lz>>24)&0x7f] + (31 - lz);
}
static ALWAYS_INLINE int x264_exp2fix8( float x )
{
int i, f;
x += 8;
if( x <= 0 ) return 0;
if( x >= 16 ) return 0xffff;
i = x;
f = (x-i)*64;
return (exp2_lut[f]+256) << i >> 8;
}
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
{
/* constants chosen to result in approximately the same overall bitrate as without AQ.
......@@ -241,6 +204,17 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
int mb_x, mb_y;
float strength;
float avg_adj = 0.f;
/* Need to init it anyways for MB tree. */
if( h->param.rc.f_aq_strength == 0 )
{
int mb_xy;
memset( frame->f_qp_offset, 0, h->mb.i_mb_count * sizeof(float) );
if( h->frames.b_have_lowres )
for( mb_xy = 0; mb_xy < h->mb.i_mb_count; mb_xy++ )
frame->i_inv_qscale_factor[mb_xy] = 256;
return;
}
if( h->param.rc.i_aq_mode == X264_AQ_AUTOVARIANCE )
{
for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
......@@ -257,6 +231,7 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
}
else
strength = h->param.rc.f_aq_strength * 1.0397f;
for( mb_y = 0; mb_y < h->sps->i_mb_height; mb_y++ )
for( mb_x = 0; mb_x < h->sps->i_mb_width; mb_x++ )
{
......@@ -291,6 +266,47 @@ void x264_adaptive_quant( x264_t *h )
h->mb.i_qp = x264_clip3( h->rc->f_qpm + h->fenc->f_qp_offset[h->mb.i_mb_xy] + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
}
int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame )
{
x264_ratecontrol_t *rc = h->rc;
uint8_t i_type_actual = rc->entry[frame->i_frame].pict_type;
int i;
if( i_type_actual != SLICE_TYPE_B )
{
uint8_t i_type;
if( !fread( &i_type, 1, 1, rc->p_mbtree_stat_file_in ) )
goto fail;
if( i_type != i_type_actual )
{
x264_log(h, X264_LOG_ERROR, "MB-tree frametype %d doesn't match actual frametype %d.\n", i_type,i_type_actual);
return -1;
}
if( fread( rc->qp_buffer, sizeof(uint16_t), h->mb.i_mb_count, rc->p_mbtree_stat_file_in ) != h->mb.i_mb_count )
goto fail;
for( i = 0; i < h->mb.i_mb_count; i++ )
frame->f_qp_offset[i] = ((float)(int16_t)endian_fix16( rc->qp_buffer[i] )) * (1/256.0);
}
else
x264_adaptive_quant_frame( h, frame );
return 0;
fail:
x264_log(h, X264_LOG_ERROR, "Incomplete MB-tree stats file.\n");
return -1;
}
static char *x264_strcat_filename( char *input, char *suffix )
{
char *output = x264_malloc( strlen( input ) + strlen( suffix ) + 1 );
strcpy( output, input );
strcat( output, suffix );
return output;
}
int x264_ratecontrol_new( x264_t *h )
{
x264_ratecontrol_t *rc;
......@@ -310,6 +326,14 @@ int x264_ratecontrol_new( x264_t *h )
else
rc->fps = 25.0;
if( h->param.rc.b_mb_tree )
{
h->param.rc.f_pb_factor = 1;
rc->qcompress = 1;
}
else
rc->qcompress = h->param.rc.f_qcompress;
rc->bitrate = h->param.rc.i_bitrate * 1000.;
rc->rate_tolerance = h->param.rc.f_rate_tolerance;
rc->nmb = h->mb.i_mb_count;
......@@ -379,17 +403,19 @@ int x264_ratecontrol_new( x264_t *h )
rc->accum_p_norm = .01;
rc->accum_p_qp = ABR_INIT_QP * rc->accum_p_norm;
/* estimated ratio that produces a reasonable QP for the first I-frame */
rc->cplxr_sum = .01 * pow( 7.0e5, h->param.rc.f_qcompress ) * pow( h->mb.i_mb_count, 0.5 );
rc->cplxr_sum = .01 * pow( 7.0e5, rc->qcompress ) * pow( h->mb.i_mb_count, 0.5 );
rc->wanted_bits_window = 1.0 * rc->bitrate / rc->fps;
rc->last_non_b_pict_type = SLICE_TYPE_I;
}
if( h->param.rc.i_rc_method == X264_RC_CRF )
{
/* arbitrary rescaling to make CRF somewhat similar to QP */
/* Arbitrary rescaling to make CRF somewhat similar to QP.
* Try to compensate for MB-tree's effects as well. */
double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
rc->rate_factor_constant = pow( base_cplx, 1 - h->param.rc.f_qcompress )
/ qp2qscale( h->param.rc.f_rf_constant );
double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*13.5 : 0;
rc->rate_factor_constant = pow( base_cplx, 1 - rc->qcompress )
/ qp2qscale( h->param.rc.f_rf_constant + mbtree_offset );
}
rc->ip_offset = 6.0 * log(h->param.rc.f_ip_factor) / log(2.0);
......@@ -437,6 +463,17 @@ int x264_ratecontrol_new( x264_t *h )
x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open stats file\n");
return -1;
}
if( h->param.rc.b_mb_tree )
{
char *mbtree_stats_in = x264_strcat_filename( h->param.rc.psz_stat_in, ".mbtree" );
rc->p_mbtree_stat_file_in = fopen( mbtree_stats_in, "rb" );
x264_free( mbtree_stats_in );
if( !rc->p_mbtree_stat_file_in )
{
x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
return -1;
}
}
/* check whether 1st pass options were compatible with current options */
if( !strncmp( stats_buf, "#options:", 9 ) )
......@@ -483,6 +520,9 @@ int x264_ratecontrol_new( x264_t *h )
x264_log( h, X264_LOG_ERROR, "b_adapt method specified in stats file not valid\n" );
return -1;
}
if( h->param.rc.b_mb_tree && ( p = strstr( opts, "rc_lookahead=" ) ) && sscanf( p, "rc_lookahead=%d", &i ) )
h->param.rc.i_lookahead = i;
}
/* find number of pics */
......@@ -585,10 +625,7 @@ int x264_ratecontrol_new( x264_t *h )
if( h->param.rc.b_stat_write )
{
char *p;
rc->psz_stat_file_tmpname = x264_malloc( strlen(h->param.rc.psz_stat_out) + 6 );
strcpy( rc->psz_stat_file_tmpname, h->param.rc.psz_stat_out );
strcat( rc->psz_stat_file_tmpname, ".temp" );
rc->psz_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".temp" );
rc->p_stat_file_out = fopen( rc->psz_stat_file_tmpname, "wb" );
if( rc->p_stat_file_out == NULL )
......@@ -600,6 +637,25 @@ int x264_ratecontrol_new( x264_t *h )
p = x264_param2string( &h->param, 1 );
fprintf( rc->p_stat_file_out, "#options: %s\n", p );
x264_free( p );
if( h->param.rc.b_mb_tree && !h->param.rc.b_stat_read )
{
rc->psz_mbtree_stat_file_tmpname = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree.temp" );
rc->psz_mbtree_stat_file_name = x264_strcat_filename( h->param.rc.psz_stat_out, ".mbtree" );
rc->p_mbtree_stat_file_out = fopen( rc->psz_mbtree_stat_file_tmpname, "wb" );
if( rc->p_mbtree_stat_file_out == NULL )
{
x264_log(h, X264_LOG_ERROR, "ratecontrol_init: can't open mbtree stats file\n");
return -1;
}
}
}
if( h->param.rc.b_mb_tree && (h->param.rc.b_stat_read || h->param.rc.b_stat_write) )
{
rc->qp_buffer = x264_malloc( h->mb.i_mb_count * sizeof(uint16_t));
if( !rc->qp_buffer )
return -1;
}
for( i=0; i<h->param.i_threads; i++ )
......@@ -738,9 +794,10 @@ void x264_ratecontrol_summary( x264_t *h )
if( rc->b_abr && h->param.rc.i_rc_method == X264_RC_ABR && rc->cbr_decay > .9999 )
{
double base_cplx = h->mb.i_mb_count * (h->param.i_bframe ? 120 : 80);
double mbtree_offset = h->param.rc.b_mb_tree ? (1.0-h->param.rc.f_qcompress)*12.5 : 0;
x264_log( h, X264_LOG_INFO, "final ratefactor: %.2f\n",
qscale2qp( pow( base_cplx, 1 - h->param.rc.f_qcompress )
* rc->cplxr_sum / rc->wanted_bits_window ) );
qscale2qp( pow( base_cplx, 1 - rc->qcompress )
* rc->cplxr_sum / rc->wanted_bits_window ) - mbtree_offset );
}
}
......@@ -760,9 +817,22 @@ void x264_ratecontrol_delete( x264_t *h )
}
x264_free( rc->psz_stat_file_tmpname );
}
if( rc->p_mbtree_stat_file_out )
{
fclose( rc->p_mbtree_stat_file_out );
if( h->i_frame >= rc->num_entries )
if( rename( rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name ) != 0 )
{
x264_log( h, X264_LOG_ERROR, "failed to rename \"%s\" to \"%s\"\n",
rc->psz_mbtree_stat_file_tmpname, rc->psz_mbtree_stat_file_name );
}
x264_free( rc->psz_mbtree_stat_file_tmpname );
x264_free( rc->psz_mbtree_stat_file_name );
}
x264_free( rc->pred );
x264_free( rc->pred_b_from_p );
x264_free( rc->entry );
x264_free( rc->qp_buffer );
if( rc->zones )
{
x264_free( rc->zones[0].param );
......@@ -1086,7 +1156,7 @@ int x264_ratecontrol_slice_type( x264_t *h, int frame_num )
}
/* After encoding one frame, save stats and update ratecontrol state */
void x264_ratecontrol_end( x264_t *h, int bits )
int x264_ratecontrol_end( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int *mbs = h->stat.frame.i_mb_count;
......@@ -1114,7 +1184,7 @@ void x264_ratecontrol_end( x264_t *h, int bits )
( dir_frame>0 ? 's' : dir_frame<0 ? 't' :
dir_avg>0 ? 's' : dir_avg<0 ? 't' : '-' )
: '-';
fprintf( rc->p_stat_file_out,
if( fprintf( rc->p_stat_file_out,
"in:%d out:%d type:%c q:%.2f tex:%d mv:%d misc:%d imb:%d pmb:%d smb:%d d:%c;\n",
h->fenc->i_frame, h->i_frame,
c_type, rc->qpa_rc,
......@@ -1124,7 +1194,22 @@ void x264_ratecontrol_end( x264_t *h, int bits )
h->stat.frame.i_mb_count_i,
h->stat.frame.i_mb_count_p,
h->stat.frame.i_mb_count_skip,
c_direct);
c_direct) < 0 )
goto fail;
/* Don't re-write the data in multi-pass mode. */
if( h->param.rc.b_mb_tree && h->fenc->b_kept_as_ref && !h->param.rc.b_stat_read )
{
uint8_t i_type = h->sh.i_type;
int i;
/* Values are stored as big-endian FIX8.8 */
for( i = 0; i < h->mb.i_mb_count; i++ )