Commit 2535ba17 authored by Fiona Glaser's avatar Fiona Glaser

Add row-reencoding support to VBV for improved accuracy

Extremely accurate, possibly 100% so (I can't get it to fail even with difficult VBVs).
Does not yet support rows split on slice boundaries (occurs often with slice-max-size/mbs).
Still inaccurate with sliced threads, but better than before.
parent bc473ddf
......@@ -360,13 +360,11 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
{
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
{
CHECKED_MALLOC( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
if( !PARAM_INTERLACED )
h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
}
for( int i = 0; i <= PARAM_INTERLACED; i++ )
{
......@@ -404,7 +402,7 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
{
for( int i = 0; i <= PARAM_INTERLACED; i++ )
x264_free( h->deblock_strength[i] );
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int i = 0; i < (PARAM_INTERLACED ? 5 : 2); i++ )
for( int j = 0; j < (CHROMA444 ? 3 : 2); j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
}
......@@ -563,7 +561,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
? 16 * mb_x + height * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + height * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
int fdec_idx = b_mbaff ? (mb_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : !(mb_y&1);
pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
/* ref_pix_offset[0] references the current field and [1] the opposite field. */
......@@ -576,20 +574,16 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
h->mc.load_deinterleave_chroma_fenc( h->mb.pic.p_fenc[1], h->mb.pic.p_fenc_plane[1], i_stride2, height );
memcpy( h->mb.pic.p_fdec[1]-FDEC_STRIDE, intra_fdec, 8*sizeof(pixel) );
memcpy( h->mb.pic.p_fdec[2]-FDEC_STRIDE, intra_fdec+8, 8*sizeof(pixel) );
if( b_mbaff )
{
h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
}
h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = intra_fdec[-1-8];
h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = intra_fdec[-1];
}
else
{
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mb.pic.p_fenc_plane[i], i_stride2, 16 );
memcpy( h->mb.pic.p_fdec[i]-FDEC_STRIDE, intra_fdec, 24*sizeof(pixel) );
if( b_mbaff )
h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
h->mb.pic.p_fdec[i][-FDEC_STRIDE-1] = intra_fdec[-1];
}
if( b_mbaff )
if( b_mbaff || h->mb.b_reencode_mb )
{
for( int j = 0; j < height; j++ )
if( b_chroma )
......@@ -1638,7 +1632,7 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int
* For progressive mbs this is the bottom two rows, and for interlaced the
* bottom row of each field. We also store samples needed for the next
* mbpair in intra_border_backup[2]. */
int backup_dst = !b_mbaff ? 0 : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
int backup_dst = !b_mbaff ? (mb_y&1) : (mb_y&1) ? 1 : MB_INTERLACED ? 0 : 2;
memcpy( &h->intra_border_backup[backup_dst][0][mb_x*16 ], h->mb.pic.p_fdec[0]+FDEC_STRIDE*15, 16*sizeof(pixel) );
if( CHROMA444 )
{
......@@ -1672,14 +1666,6 @@ static void ALWAYS_INLINE x264_macroblock_backup_intra( x264_t *h, int mb_x, int
}
}
}
else
{
/* In progressive we update intra_border_backup in-place, so the topleft neighbor will
* no longer exist there when load_pic_pointers wants it. Move it within p_fdec instead. */
h->mb.pic.p_fdec[0][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[0][-FDEC_STRIDE+15];
h->mb.pic.p_fdec[1][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[1][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
h->mb.pic.p_fdec[2][-FDEC_STRIDE-1] = h->mb.pic.p_fdec[2][-FDEC_STRIDE+(15>>CHROMA_H_SHIFT)];
}
}
void x264_macroblock_cache_save( x264_t *h )
......
......@@ -334,17 +334,17 @@ static void x264_slice_header_write( bs_t *s, x264_slice_header_t *sh, int i_nal
}
/* If we are within a reasonable distance of the end of the memory allocated for the bitstream, */
/* reallocate, adding an arbitrary amount of space (100 kilobytes). */
/* reallocate, adding an arbitrary amount of space. */
static int x264_bitstream_check_buffer( x264_t *h )
{
uint8_t *bs_bak = h->out.p_bitstream;
int max_mb_size = 2500 << SLICE_MBAFF;
if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_mb_size)) ||
(h->out.bs.p_end - h->out.bs.p < max_mb_size) )
int max_row_size = (2500 << SLICE_MBAFF) * h->mb.i_mb_width;
if( (h->param.b_cabac && (h->cabac.p_end - h->cabac.p < max_row_size)) ||
(h->out.bs.p_end - h->out.bs.p < max_row_size) )
{
h->out.i_bitstream += 100000;
h->out.i_bitstream += max_row_size;
CHECKED_MALLOC( h->out.p_bitstream, h->out.i_bitstream );
h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - 100000) & ~15 );
h->mc.memcpy_aligned( h->out.p_bitstream, bs_bak, (h->out.i_bitstream - max_row_size) & ~15 );
intptr_t delta = h->out.p_bitstream - bs_bak;
h->out.bs.p_start += delta;
......@@ -580,7 +580,7 @@ static int x264_validate_parameters( x264_t *h, int b_open )
}
h->param.rc.i_qp_max = x264_clip3( h->param.rc.i_qp_max, 0, QP_MAX );
h->param.rc.i_qp_min = x264_clip3( h->param.rc.i_qp_min, 0, h->param.rc.i_qp_max );
h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 0, QP_MAX );
h->param.rc.i_qp_step = x264_clip3( h->param.rc.i_qp_step, 2, QP_MAX );
h->param.rc.i_bitrate = x264_clip3( h->param.rc.i_bitrate, 0, 2000000 );
h->param.rc.i_vbv_buffer_size = x264_clip3( h->param.rc.i_vbv_buffer_size, 0, 2000000 );
h->param.rc.i_vbv_max_bitrate = x264_clip3( h->param.rc.i_vbv_max_bitrate, 0, 2000000 );
......@@ -2059,12 +2059,20 @@ typedef struct
bs_t bs;
x264_cabac_t cabac;
x264_frame_stat_t stat;
int last_qp;
int last_dqp;
int field_decoding_flag;
} x264_bs_bak_t;
static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak, int i_skip, int full )
{
if( full )
{
bak->stat = h->stat.frame;
bak->last_qp = h->mb.i_last_qp;
bak->last_dqp = h->mb.i_last_dqp;
bak->field_decoding_flag = h->mb.field_decoding_flag;
}
else
{
bak->stat.i_mv_bits = h->stat.frame.i_mv_bits;
......@@ -2093,7 +2101,12 @@ static ALWAYS_INLINE void x264_bitstream_backup( x264_t *h, x264_bs_bak_t *bak,
static ALWAYS_INLINE void x264_bitstream_restore( x264_t *h, x264_bs_bak_t *bak, int *skip, int full )
{
if( full )
{
h->stat.frame = bak->stat;
h->mb.i_last_qp = bak->last_qp;
h->mb.i_last_dqp = bak->last_dqp;
h->mb.field_decoding_flag = bak->field_decoding_flag;
}
else
{
h->stat.frame.i_mv_bits = bak->stat.i_mv_bits;
......@@ -2128,8 +2141,9 @@ static int x264_slice_write( x264_t *h )
int starting_bits = bs_pos(&h->out.bs);
int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1;
int b_hpel = h->fdec->b_kept_as_ref;
int orig_last_mb = h->sh.i_last_mb;
uint8_t *last_emu_check;
x264_bs_bak_t bs_bak[1];
x264_bs_bak_t bs_bak[2];
b_deblock &= b_hpel || h->param.psz_dump_yuv;
bs_realign( &h->out.bs );
......@@ -2175,17 +2189,18 @@ static int x264_slice_write( x264_t *h )
mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width;
int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
if( !(i_mb_y & SLICE_MBAFF) )
if( i_mb_x == 0 )
{
if( x264_bitstream_check_buffer( h ) )
return -1;
if( back_up_bitstream )
x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
if( !(i_mb_y & SLICE_MBAFF) && h->param.rc.i_vbv_buffer_size )
x264_bitstream_backup( h, &bs_bak[1], i_skip, 1 );
if( !h->mb.b_reencode_mb )
x264_fdec_filter_row( h, i_mb_y, 1 );
}
if( i_mb_x == 0 && !h->mb.b_reencode_mb )
x264_fdec_filter_row( h, i_mb_y, 1 );
if( !(i_mb_y & SLICE_MBAFF) && back_up_bitstream )
x264_bitstream_backup( h, &bs_bak[0], i_skip, 0 );
if( PARAM_INTERLACED )
{
......@@ -2290,14 +2305,10 @@ reencode:
break;
}
else
{
h->sh.i_last_mb = mb_xy;
h->mb.b_reencode_mb = 0;
}
}
else
h->mb.b_reencode_mb = 0;
}
h->mb.b_reencode_mb = 0;
#if HAVE_VISUALIZE
if( h->param.b_visualize )
......@@ -2307,6 +2318,17 @@ reencode:
/* save cache */
x264_macroblock_cache_save( h );
if( x264_ratecontrol_mb( h, mb_size ) < 0 )
{
x264_bitstream_restore( h, &bs_bak[1], &i_skip, 1 );
h->mb.b_reencode_mb = 1;
i_mb_x = 0;
i_mb_y = i_mb_y - SLICE_MBAFF;
h->mb.i_mb_prev_xy = i_mb_y * h->mb.i_mb_stride - 1;
h->sh.i_last_mb = orig_last_mb;
continue;
}
/* accumulate mb stats */
h->stat.frame.i_mb_count[h->mb.i_type]++;
......@@ -2381,8 +2403,6 @@ reencode:
if( b_deblock )
x264_macroblock_deblock_strength( h );
x264_ratecontrol_mb( h, mb_size );
if( mb_xy == h->sh.i_last_mb )
break;
......
......@@ -87,7 +87,9 @@ struct x264_ratecontrol_t
int qp; /* qp for current frame */
float qpm; /* qp for current macroblock: precise float for AQ */
float qpa_rc; /* average of macroblocks' qp before aq */
float qpa_rc_prev;
int qpa_aq; /* average of macroblocks' qp after aq */
int qpa_aq_prev;
float qp_novbv; /* QP for the current frame if 1-pass VBV was disabled. */
/* VBV stuff */
......@@ -1335,8 +1337,8 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
q = x264_clip3f( q, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
rc->qpa_rc =
rc->qpa_aq = 0;
rc->qpa_rc = rc->qpa_rc_prev =
rc->qpa_aq = rc->qpa_aq_prev = 0;
rc->qp = x264_clip3( q + 0.5f, 0, QP_MAX );
h->fdec->f_qp_avg_rc =
h->fdec->f_qp_avg_aq =
......@@ -1400,7 +1402,7 @@ static float predict_row_size_sum( x264_t *h, int y, float qp )
* eliminate all use of qp in row ratecontrol: make it entirely qscale-based.
* make this function stop being needlessly O(N^2)
* update more often than once per row? */
void x264_ratecontrol_mb( x264_t *h, int bits )
int x264_ratecontrol_mb( x264_t *h, int bits )
{
x264_ratecontrol_t *rc = h->rc;
const int y = h->mb.i_mb_y;
......@@ -1409,13 +1411,13 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
rc->qpa_aq += h->mb.i_qp;
if( h->mb.i_mb_x != h->mb.i_mb_width - 1 )
return;
return 0;
x264_emms();
rc->qpa_rc += rc->qpm * h->mb.i_mb_width;
if( !rc->b_vbv )
return;
return 0;
float qscale = qp2qscale( rc->qpm );
h->fdec->f_row_qp[y] = rc->qpm;
......@@ -1427,19 +1429,38 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
/* update ratecontrol per-mbpair in MBAFF */
if( SLICE_MBAFF && !(y&1) )
return;
return 0;
/* FIXME: We don't currently support the case where there's a slice
* boundary in between. */
int can_reencode_row = h->sh.i_first_mb <= ((h->mb.i_mb_y - SLICE_MBAFF) * h->mb.i_mb_stride);
/* tweak quality based on difference from predicted size */
float prev_row_qp = h->fdec->f_row_qp[y];
float qp_absolute_max = h->param.rc.i_qp_max;
if( rc->rate_factor_max_increment )
qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
float step_size = 0.5f;
float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
float size_of_other_slices = 0;
if( h->param.b_sliced_threads )
{
float size_of_other_slices_planned = 0;
for( int i = 0; i < h->param.i_threads; i++ )
if( h != h->thread[i] )
{
size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
}
float weight = rc->slice_size_planned / rc->frame_size_planned;
size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
}
if( y < h->i_threadslice_end-1 )
{
float prev_row_qp = h->fdec->f_row_qp[y];
float qp_min = X264_MAX( prev_row_qp - h->param.rc.i_qp_step, h->param.rc.i_qp_min );
float qp_absolute_max = h->param.rc.i_qp_max;
if( rc->rate_factor_max_increment )
qp_absolute_max = X264_MIN( qp_absolute_max, rc->qp_novbv + rc->rate_factor_max_increment );
float qp_max = X264_MIN( prev_row_qp + h->param.rc.i_qp_step, qp_absolute_max );
float step_size = 0.5f;
/* B-frames shouldn't use lower QP than their reference frames. */
if( h->sh.i_type == SLICE_TYPE_B )
{
......@@ -1447,31 +1468,14 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
rc->qpm = X264_MAX( rc->qpm, qp_min );
}
float buffer_left_planned = rc->buffer_fill - rc->frame_size_planned;
float slice_size_planned = h->param.b_sliced_threads ? rc->slice_size_planned : rc->frame_size_planned;
float max_frame_error = X264_MAX( 0.05f, 1.0f / h->mb.i_mb_height );
float size_of_other_slices = 0;
if( h->param.b_sliced_threads )
{
float size_of_other_slices_planned = 0;
for( int i = 0; i < h->param.i_threads; i++ )
if( h != h->thread[i] )
{
size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
size_of_other_slices_planned += h->thread[i]->rc->slice_size_planned;
}
float weight = rc->slice_size_planned / rc->frame_size_planned;
size_of_other_slices = (size_of_other_slices - size_of_other_slices_planned) * weight + size_of_other_slices_planned;
}
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
float b1 = predict_row_size_sum( h, y, rc->qpm ) + size_of_other_slices;
/* Don't modify the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* Don't increase the row QPs until a sufficent amount of the bits of the frame have been processed, in case a flat */
/* area at the top of the frame was measured inaccurately. */
if( row_bits_so_far( h, y ) < 0.05f * slice_size_planned )
return;
qp_max = qp_absolute_max = prev_row_qp;
if( h->sh.i_type != SLICE_TYPE_I )
rc_tol *= 0.5f;
......@@ -1507,9 +1511,39 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
}
h->rc->frame_size_estimated = b1 - size_of_other_slices;
/* If the current row was large enough to cause a large QP jump, try re-encoding it. */
if( rc->qpm > qp_max && prev_row_qp < qp_max && can_reencode_row )
{
/* Bump QP to halfway in between... close enough. */
rc->qpm = x264_clip3f( (prev_row_qp + rc->qpm)*0.5f, prev_row_qp + 1.0f, qp_max );
rc->qpa_rc = rc->qpa_rc_prev;
rc->qpa_aq = rc->qpa_aq_prev;
h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
return -1;
}
}
else
{
h->rc->frame_size_estimated = predict_row_size_sum( h, y, rc->qpm );
/* Last-ditch attempt: if the last row of the frame underflowed the VBV,
* try again. */
if( (h->rc->frame_size_estimated + size_of_other_slices) > (rc->buffer_fill - rc->buffer_rate * max_frame_error) &&
rc->qpm < qp_max && can_reencode_row )
{
rc->qpm = qp_max;
rc->qpa_rc = rc->qpa_rc_prev;
rc->qpa_aq = rc->qpa_aq_prev;
h->fdec->i_row_bits[y] = h->fdec->i_row_bits[y-SLICE_MBAFF] = 0;
return -1;
}
}
rc->qpa_rc_prev = rc->qpa_rc;
rc->qpa_aq_prev = rc->qpa_aq;
return 0;
}
int x264_ratecontrol_qp( x264_t *h )
......
......@@ -51,7 +51,7 @@ void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
void x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_mb( x264_t *, int bits );
int x264_ratecontrol_qp( x264_t * );
int x264_ratecontrol_mb_qp( x264_t *h );
int x264_ratecontrol_end( x264_t *, int bits, int *filler );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment