Commit 80458ffc authored by Fiona Glaser's avatar Fiona Glaser

Move adaptive quantization to before ratecontrol, eliminate qcomp bias

This change improves VBV accuracy and improves bit distribution in CRF and 2pass.
Instead of being applied after ratecontrol, AQ becomes part of the complexity measure that ratecontrol uses.
This allows for modularity for changes to AQ; a new AQ algorithm can be introduced simply by introducing a new aq_mode and a corresponding if in adaptive_quant_frame.
This also allows quantizer field smoothing, since quantizers are calculated beofrehand rather during encoding.
Since there is no more reason for it, aq_mode 1 is removed.  The new mode 1 is in a sense a merger of the old modes 1 and 2.
WARNING: This change redefines CRF when using AQ, so output bitrate for a given CRF may be significantly different from before this change!
parent f89e0d06
......@@ -93,7 +93,7 @@ void x264_param_default( x264_param_t *param )
param->rc.i_qp_step = 4;
param->rc.f_ip_factor = 1.4;
param->rc.f_pb_factor = 1.3;
param->rc.i_aq_mode = X264_AQ_GLOBAL;
param->rc.i_aq_mode = X264_AQ_VARIANCE;
param->rc.f_aq_strength = 1.0;
param->rc.b_stat_write = 0;
......
......@@ -114,6 +114,9 @@ x264_frame_t *x264_frame_new( x264_t *h )
for( j = 0; j < h->param.i_bframe + 2; j++ )
CHECKED_MALLOC( frame->i_row_satds[i][j], i_lines/16 * sizeof(int) );
if( h->param.rc.i_aq_mode )
CHECKED_MALLOC( frame->f_qp_offset, h->mb.i_mb_count * sizeof(float) );
x264_pthread_mutex_init( &frame->mutex, NULL );
x264_pthread_cond_init( &frame->cv, NULL );
......
......@@ -71,12 +71,14 @@ typedef struct
* contains the SATD cost of the lowres frame encoded in various modes
* FIXME: how big an array do we need? */
int i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int i_cost_est_aq[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int i_satd; // the i_cost_est of the selected frametype
int i_intra_mbs[X264_BFRAME_MAX+2];
int *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int *i_row_satd;
int *i_row_bits;
int *i_row_qp;
float *f_qp_offset;
/* threading */
int i_lines_completed; /* in pixels */
......
......@@ -487,12 +487,9 @@ static int x264_validate_parameters( x264_t *h )
if( !h->param.b_cabac )
h->param.analyse.i_trellis = 0;
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 2 );
h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
if( h->param.rc.f_aq_strength <= 0 )
h->param.rc.i_aq_mode = 0;
/* VAQ effectively replaces qcomp, so qcomp is raised towards 1 to compensate. */
if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
h->param.rc.f_qcompress = x264_clip3f(h->param.rc.f_qcompress + h->param.rc.f_aq_strength / 0.7, 0, 1);
h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 );
{
......@@ -1362,6 +1359,9 @@ int x264_encoder_encode( x264_t *h,
if( h->frames.b_have_lowres )
x264_frame_init_lowres( h, fenc );
if( h->param.rc.i_aq_mode )
x264_adaptive_quant_frame( h, fenc );
if( h->frames.i_input <= h->frames.i_delay + 1 - h->param.i_threads )
{
/* Nothing yet to encode */
......
......@@ -127,10 +127,6 @@ struct x264_ratecontrol_t
int bframes; /* # consecutive B-frames before this P-frame */
int bframe_bits; /* total cost of those frames */
/* AQ stuff */
float aq_threshold;
int *ac_energy;
int i_zones;
x264_zone_t *zones;
x264_zone_t *prev_zone;
......@@ -172,64 +168,40 @@ static inline double qscale2bits(ratecontrol_entry_t *rce, double qscale)
}
// Find the total AC energy of the block in all planes.
static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, int *satd )
static NOINLINE int ac_energy_mb( x264_t *h, int mb_x, int mb_y, x264_frame_t *frame )
{
/* This function contains annoying hacks because GCC has a habit of reordering emms
* and putting it after floating point ops. As a result, we put the emms at the end of the
* function and make sure that its always called before the float math. Noinline makes
* sure no reordering goes on. */
/* FIXME: This array is larger than necessary because a bug in GCC causes an all-zero
* array to be placed in .bss despite .bss not being correctly aligned on some platforms (win32?) */
DECLARE_ALIGNED_16( static uint8_t zero[17] ) = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
unsigned int var=0, sad, i;
if( satd || h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
for( i=0; i<3; i++ )
{
for( i=0; i<3; i++ )
{
int w = i ? 8 : 16;
int stride = h->fenc->i_stride[i];
int offset = h->mb.b_interlaced
? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
: w * (mb_x + mb_y * stride);
int pix = i ? PIXEL_8x8 : PIXEL_16x16;
stride <<= h->mb.b_interlaced;
var += h->pixf.var[pix]( h->fenc->plane[i]+offset, stride, &sad );
// SATD to represent the block's overall complexity (bit cost) for intra encoding.
// exclude the DC coef, because nothing short of an actual intra prediction will estimate DC cost.
if( var && satd )
*satd += h->pixf.satd[pix]( zero, 0, h->fenc->plane[i]+offset, stride ) - sad/2;
}
var = X264_MAX(var,1);
int w = i ? 8 : 16;
int stride = frame->i_stride[i];
int offset = h->mb.b_interlaced
? w * (mb_x + (mb_y&~1) * stride) + (mb_y&1) * stride
: w * (mb_x + mb_y * stride);
int pix = i ? PIXEL_8x8 : PIXEL_16x16;
stride <<= h->mb.b_interlaced;
var += h->pixf.var[pix]( frame->plane[i]+offset, stride, &sad );
}
else var = h->rc->ac_energy[h->mb.i_mb_xy];
var = X264_MAX(var,1);
x264_emms();
return var;
}
static void x264_autosense_aq( x264_t *h )
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame )
{
double total = 0;
double n = 0;
int mb_x, mb_y;
// FIXME: Some of the SATDs might be already calculated elsewhere (ratecontrol?). Can we reuse them?
// FIXME: Is chroma SATD necessary?
for( mb_y=0; mb_y<h->sps->i_mb_height; mb_y++ )
for( mb_x=0; mb_x<h->sps->i_mb_width; mb_x++ )
{
int satd=0;
int energy = ac_energy_mb( h, mb_x, mb_y, &satd );
h->rc->ac_energy[mb_x + mb_y * h->sps->i_mb_width] = energy;
/* Weight the energy value by the SATD value of the MB.
* This represents the fact that the more complex blocks in a frame should
* be weighted more when calculating the optimal threshold. This also helps
* diminish the negative effect of large numbers of simple blocks in a frame,
* such as in the case of a letterboxed film. */
total += logf(energy) * satd;
n += satd;
int energy = ac_energy_mb( h, mb_x, mb_y, frame );
/* 10 constant chosen to result in approximately the same overall bitrate as without AQ. */
float qp_adj = h->param.rc.f_aq_strength * 1.5 * (logf(energy) - 10.0);
frame->f_qp_offset[mb_x + mb_y*h->mb.i_mb_stride] = qp_adj;
}
x264_emms();
/* Calculate and store the threshold. */
h->rc->aq_threshold = n ? total/n : 15;
}
/*****************************************************************************
......@@ -241,13 +213,11 @@ static void x264_autosense_aq( x264_t *h )
*****************************************************************************/
void x264_adaptive_quant( x264_t *h )
{
int energy = ac_energy_mb( h, h->mb.i_mb_x, h->mb.i_mb_y, NULL );
/* Adjust the QP based on the AC energy of the macroblock. */
float qp = h->rc->f_qpm;
float qp_adj = 1.5 * (logf(energy) - h->rc->aq_threshold);
if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
qp_adj = x264_clip3f( qp_adj, -5, 5 );
h->mb.i_qp = x264_clip3( qp + qp_adj * h->param.rc.f_aq_strength + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
float qp, qp_adj;
x264_emms();
qp = h->rc->f_qpm;
qp_adj = h->fenc->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride];
h->mb.i_qp = x264_clip3( qp + qp_adj + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
/* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB,
* to lower the bit cost of the qp_delta. */
if( abs(h->mb.i_qp - h->mb.i_last_qp) == 1 )
......@@ -554,8 +524,6 @@ int x264_ratecontrol_new( x264_t *h )
h->thread[i]->rc = rc+i;
if( i )
rc[i] = rc[0];
if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
rc[i].ac_energy = x264_malloc( h->mb.i_mb_count * sizeof(int) );
}
return 0;
......@@ -717,8 +685,6 @@ void x264_ratecontrol_delete( x264_t *h )
x264_free( rc->zones[i].param );
x264_free( rc->zones );
}
for( i=0; i<h->param.i_threads; i++ )
x264_free( rc[i].ac_energy );
x264_free( rc );
}
......@@ -842,14 +808,6 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp )
if( h->sh.i_type != SLICE_TYPE_B )
rc->last_non_b_pict_type = h->sh.i_type;
/* Adaptive AQ thresholding algorithm. */
if( h->param.rc.i_aq_mode == X264_AQ_GLOBAL )
/* Arbitrary value for "center" of the AQ curve.
* Chosen so that any given value of CRF has on average similar bitrate with and without AQ. */
h->rc->aq_threshold = logf(5000);
else if( h->param.rc.i_aq_mode == X264_AQ_LOCAL )
x264_autosense_aq(h);
}
static double predict_row_size( x264_t *h, int y, int qp )
......
......@@ -27,6 +27,8 @@
int x264_ratecontrol_new ( x264_t * );
void x264_ratecontrol_delete( x264_t * );
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame );
void x264_adaptive_quant( x264_t * );
void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
void x264_ratecontrol_start( x264_t *, int i_force_qp );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
......@@ -34,7 +36,6 @@ void x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_qp( x264_t * );
void x264_ratecontrol_end( x264_t *, int bits );
void x264_ratecontrol_summary( x264_t * );
void x264_adaptive_quant( x264_t * );
void x264_ratecontrol_set_estimated_size( x264_t *, int bits );
int x264_ratecontrol_get_estimated_size( x264_t const *);
int x264_rc_analyse_slice( x264_t *h );
......
......@@ -248,6 +248,8 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
int b_intra_penalty )
{
int i_score = 0;
/* Don't use the AQ'd scores for slicetype decision. */
int i_score_aq = 0;
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
......@@ -276,9 +278,15 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
if( p1 != p0 )
dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
if( h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
{
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
}
/* the edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
if( h->param.rc.i_vbv_buffer_size )
else if( h->param.rc.i_vbv_buffer_size )
{
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
{
......@@ -286,33 +294,45 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
{
int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
row_satd[ h->mb.i_mb_y ] += i_mb_cost;
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
{
x264_emms();
i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
}
row_satd[ h->mb.i_mb_y ] += i_mb_cost_aq;
if( h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
{
/* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
i_score += i_mb_cost;
i_score_aq += i_mb_cost_aq;
}
}
}
}
else if( h->sps->i_mb_width > 2 && h->sps->i_mb_height > 2 )
else
{
for( h->mb.i_mb_y = 1; h->mb.i_mb_y < h->sps->i_mb_height - 1; h->mb.i_mb_y++ )
for( h->mb.i_mb_x = 1; h->mb.i_mb_x < h->sps->i_mb_width - 1; h->mb.i_mb_x++ )
i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
}
else
{
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++ )
i_score += x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
{
int i_mb_cost = x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor );
int i_mb_cost_aq = i_mb_cost;
if( h->param.rc.i_aq_mode )
{
x264_emms();
i_mb_cost_aq *= pow(2.0,-(frames[b]->f_qp_offset[h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride])/6.0);
}
i_score += i_mb_cost;
i_score_aq += i_mb_cost_aq;
}
}
if( b != p1 )
i_score = i_score * 100 / (120 + h->param.i_bframe_bias);
frames[b]->i_cost_est[b-p0][p1-b] = i_score;
frames[b]->i_cost_est_aq[b-p0][p1-b] = i_score_aq;
// fprintf( stderr, "frm %d %c(%d,%d): %6d %6d imb:%d \n", frames[b]->i_frame,
// (p1==0?'I':b<p1?'B':'P'), b-p0, p1-b, i_score, frames[b]->i_cost_est[0][0], frames[b]->i_intra_mbs[b-p0] );
x264_emms();
......@@ -538,6 +558,11 @@ int x264_rc_analyse_slice( x264_t *h )
frames[b] = h->fenc;
cost = x264_slicetype_frame_cost( h, &a, frames, p0, p1, b, 0 );
/* In AQ, use the weighted score instead. */
if( h->param.rc.i_aq_mode )
cost = frames[b]->i_cost_est[b-p0][p1-b];
h->fenc->i_row_satd = h->fenc->i_row_satds[b-p0][p1-b];
h->fdec->i_row_satd = h->fdec->i_row_satds[b-p0][p1-b];
h->fdec->i_satd = cost;
......
......@@ -194,10 +194,9 @@ static void Help( x264_param_t *defaults, int b_longhelp )
H0( " --ipratio <float> QP factor between I and P [%.2f]\n", defaults->rc.f_ip_factor );
H0( " --pbratio <float> QP factor between P and B [%.2f]\n", defaults->rc.f_pb_factor );
H1( " --chroma-qp-offset <integer> QP difference between chroma and luma [%d]\n", defaults->analyse.i_chroma_qp_offset );
H0( " --aq-mode <integer> How AQ distributes bits [%d]\n"
H1( " --aq-mode <integer> AQ method [%d]\n"
" - 0: Disabled\n"
" - 1: Avoid moving bits between frames\n"
" - 2: Move bits between frames\n", defaults->rc.i_aq_mode );
" - 1: Variance AQ (complexity mask)\n", defaults->rc.i_aq_mode );
H0( " --aq-strength <float> Reduces blocking and blurring in flat and\n"
" textured areas. [%.1f]\n"
" - 0.5: weak AQ\n"
......
......@@ -35,7 +35,7 @@
#include <stdarg.h>
#define X264_BUILD 61
#define X264_BUILD 62
/* x264_t:
* opaque handler for encoder */
......@@ -85,8 +85,7 @@ typedef struct x264_t x264_t;
#define X264_RC_CRF 1
#define X264_RC_ABR 2
#define X264_AQ_NONE 0
#define X264_AQ_LOCAL 1
#define X264_AQ_GLOBAL 2
#define X264_AQ_VARIANCE 1
static const char * const x264_direct_pred_names[] = { "none", "spatial", "temporal", "auto", 0 };
static const char * const x264_motion_est_names[] = { "dia", "hex", "umh", "esa", "tesa", 0 };
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment