Commit f92aa4ec authored by Fiona Glaser's avatar Fiona Glaser

Chroma weighted prediction

Like luma weighted prediction, dramatically improves compression in fades.
Up to 4-8db chroma PSNR gain in extreme cases (short, perfect fade-outs).
On actual videos, helps up to ~1% overall.
One example video with a decent number of fades (ef OP): 0.8% bitrate reduction overall, 7% bitrate reduction just counting chroma.
Fixes a lot of artifacts in fades at lower bitrates.

Original patch by Dylan Yudaken <dyudaken@gmail.com>.
parent fa28f5b9
......@@ -807,7 +807,7 @@ struct x264_t
int i_direct_score[2];
int i_direct_frames[2];
/* num p-frames weighted */
int i_wpred[3];
int i_wpred[2];
} stat;
......
......@@ -1477,49 +1477,67 @@ static void x264_weighted_pred_init( x264_t *h )
int i_padv = PADV << h->param.b_interlaced;
int denom = -1;
int weightluma = 0;
int weightplane[2] = { 0, 0 };
int buffer_next = 0;
//FIXME: when chroma support is added, move this into loop
h->sh.weight[0][1].weightfn = h->sh.weight[0][2].weightfn = NULL;
h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
for( int j = 0; j < h->i_ref0; j++ )
for( int i = 0; i < 3; i++ )
{
if( h->fenc->weight[j][0].weightfn )
for( int j = 0; j < h->i_ref0; j++ )
{
h->sh.weight[j][0] = h->fenc->weight[j][0];
// if weight is useless, don't write it to stream
if( h->sh.weight[j][0].i_scale == 1<<h->sh.weight[j][0].i_denom && h->sh.weight[j][0].i_offset == 0 )
h->sh.weight[j][0].weightfn = NULL;
else
if( h->fenc->weight[j][i].weightfn )
{
if( !weightluma )
h->sh.weight[j][i] = h->fenc->weight[j][i];
// if weight is useless, don't write it to stream
if( h->sh.weight[j][i].i_scale == 1<<h->sh.weight[j][i].i_denom && h->sh.weight[j][i].i_offset == 0 )
h->sh.weight[j][i].weightfn = NULL;
else
{
weightluma = 1;
h->sh.weight[0][0].i_denom = denom = h->sh.weight[j][0].i_denom;
assert( x264_clip3( denom, 0, 7 ) == denom );
if( !weightplane[!!i] )
{
weightplane[!!i] = 1;
h->sh.weight[0][!!i].i_denom = denom = h->sh.weight[j][i].i_denom;
assert( x264_clip3( denom, 0, 7 ) == denom );
}
assert( h->sh.weight[j][i].i_denom == denom );
if( !i )
{
h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] + h->fenc->i_stride[0] * i_padv + PADH;
//scale full resolution frame
if( h->param.i_threads == 1 )
{
pixel *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
int stride = h->fenc->i_stride[0];
int width = h->fenc->i_width[0] + PADH*2;
int height = h->fenc->i_lines[0] + i_padv*2;
x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
h->fenc->i_lines_weighted = height;
}
}
}
assert( h->sh.weight[j][0].i_denom == denom );
assert( x264_clip3( h->sh.weight[j][0].i_scale, 0, 127 ) == h->sh.weight[j][0].i_scale );
assert( x264_clip3( h->sh.weight[j][0].i_offset, -128, 127 ) == h->sh.weight[j][0].i_offset );
h->fenc->weighted[j] = h->mb.p_weight_buf[buffer_next++] +
h->fenc->i_stride[0] * i_padv + PADH;
}
}
}
//scale full resolution frame
if( h->sh.weight[j][0].weightfn && h->param.i_threads == 1 )
if( weightplane[1] )
for( int i = 0; i < h->i_ref0; i++ )
{
pixel *src = h->fref0[j]->filtered[0] - h->fref0[j]->i_stride[0]*i_padv - PADH;
pixel *dst = h->fenc->weighted[j] - h->fenc->i_stride[0]*i_padv - PADH;
int stride = h->fenc->i_stride[0];
int width = h->fenc->i_width[0] + PADH*2;
int height = h->fenc->i_lines[0] + i_padv*2;
x264_weight_scale_plane( h, dst, stride, src, stride, width, height, &h->sh.weight[j][0] );
h->fenc->i_lines_weighted = height;
if( h->sh.weight[i][1].weightfn && !h->sh.weight[i][2].weightfn )
{
h->sh.weight[i][2].i_scale = 1 << h->sh.weight[0][1].i_denom;
h->sh.weight[i][2].i_offset = 0;
}
else if( h->sh.weight[i][2].weightfn && !h->sh.weight[i][1].weightfn )
{
h->sh.weight[i][1].i_scale = 1 << h->sh.weight[0][1].i_denom;
h->sh.weight[i][1].i_offset = 0;
}
}
}
if( !weightluma )
if( !weightplane[0] )
h->sh.weight[0][0].i_denom = 0;
if( !weightplane[1] )
h->sh.weight[0][1].i_denom = h->sh.weight[0][2].i_denom = 0;
}
static inline void x264_reference_build_list( x264_t *h, int i_poc )
......@@ -2858,13 +2876,10 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
{
h->stat.i_consecutive_bframes[h->fdec->i_frame - h->fref0[0]->i_frame - 1]++;
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
for( int i = 0; i < 3; i++ )
for( int j = 0; j < h->i_ref0; j++ )
if( h->sh.weight[0][i].i_denom != 0 )
{
h->stat.i_wpred[i]++;
break;
}
{
h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn;
h->stat.i_wpred[1] += !!h->sh.weight[0][1].weightfn || !!h->sh.weight[0][2].weightfn;
}
}
if( h->sh.i_type == SLICE_TYPE_B )
{
......@@ -3210,8 +3225,9 @@ void x264_encoder_close ( x264_t *h )
fixed_pred_modes[3][3] * 100.0 / sum_pred_modes[3] );
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->stat.i_frame_count[SLICE_TYPE_P] > 0 )
x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%%\n",
h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
x264_log( h, X264_LOG_INFO, "Weighted P-Frames: Y:%.1f%% UV:%.1f%%\n",
h->stat.i_wpred[0] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P],
h->stat.i_wpred[1] * 100.0 / h->stat.i_frame_count[SLICE_TYPE_P] );
for( int i_list = 0; i_list < 2; i_list++ )
for( int i_slice = 0; i_slice < 2; i_slice++ )
......
......@@ -1110,7 +1110,15 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
uint64_t cost; \
M32( cache_mv ) = pack16to32_mask(mx,my); \
if( m->i_pixel <= PIXEL_8x8 ) \
{ \
h->mc.mc_chroma( pixu, pixv, FDEC_STRIDE, m->p_fref[4], m->i_stride[1], mx, my + mvy_offset, bw>>1, bh>>1 ); \
if( m->weight[1].weightfn ) \
m->weight[1].weightfn[x264_pixel_size[i_pixel].w>>3]( pixu, FDEC_STRIDE, pixu, FDEC_STRIDE, \
&m->weight[1], x264_pixel_size[i_pixel].h>>1 ); \
if( m->weight[2].weightfn ) \
m->weight[2].weightfn[x264_pixel_size[i_pixel].w>>3]( pixv, FDEC_STRIDE, pixv, FDEC_STRIDE, \
&m->weight[2], x264_pixel_size[i_pixel].h>>1 ); \
} \
cost = x264_rd_cost_part( h, i_lambda2, i4, m->i_pixel ); \
COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
} \
......
......@@ -53,8 +53,8 @@ typedef struct
int s_count;
float blurred_complexity;
char direct_mode;
int16_t weight[2];
int16_t i_weight_denom;
int16_t weight[3][2];
int16_t i_weight_denom[2];
int refcount[16];
int refs;
int i_duration;
......@@ -227,11 +227,11 @@ static ALWAYS_INLINE uint32_t ac_energy_plane( x264_t *h, int mb_x, int mb_y, x2
{
ALIGNED_ARRAY_16( pixel, pix,[FENC_STRIDE*8] );
h->mc.load_deinterleave_8x8x2_fenc( pix, frame->plane[1] + offset, stride );
return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, i )
+ ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, i );
return ac_energy_var( h->pixf.var[PIXEL_8x8]( pix, FENC_STRIDE ), 6, frame, 1 )
+ ac_energy_var( h->pixf.var[PIXEL_8x8]( pix+FENC_STRIDE/2, FENC_STRIDE ), 6, frame, 2 );
}
else
return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, i );
return ac_energy_var( h->pixf.var[PIXEL_16x16]( frame->plane[0] + offset, stride ), 8, frame, 0 );
}
// Find the total AC energy of the block in all planes.
......@@ -854,11 +854,19 @@ int x264_ratecontrol_new( x264_t *h )
rce->refs = ref;
/* find weights */
rce->i_weight_denom = -1;
rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
char *w = strchr( p, 'w' );
if( w )
if( sscanf( w, "w:%hd,%hd,%hd", &rce->i_weight_denom, &rce->weight[0], &rce->weight[1] ) != 3 )
rce->i_weight_denom = -1;
{
int count = sscanf( w, "w:%hd,%hd,%hd,%hd,%hd,%hd,%hd,%hd",
&rce->i_weight_denom[0], &rce->weight[0][0], &rce->weight[0][1],
&rce->i_weight_denom[1], &rce->weight[1][0], &rce->weight[1][1],
&rce->weight[2][0], &rce->weight[2][1] );
if( count == 3 )
rce->i_weight_denom[1] = -1;
else if ( count != 8 )
rce->i_weight_denom[0] = rce->i_weight_denom[1] = -1;
}
if( pict_type != 'b' )
rce->kept_as_ref = 1;
......@@ -1485,8 +1493,15 @@ void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm )
ratecontrol_entry_t *rce = &h->rc->entry[frm->i_frame];
if( h->param.analyse.i_weighted_pred <= 0 )
return;
if( rce->i_weight_denom >= 0 )
SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0], rce->i_weight_denom, rce->weight[1] );
if( rce->i_weight_denom[0] >= 0 )
SET_WEIGHT( frm->weight[0][0], 1, rce->weight[0][0], rce->i_weight_denom[0], rce->weight[0][1] );
if( rce->i_weight_denom[1] >= 0 )
{
SET_WEIGHT( frm->weight[0][1], 1, rce->weight[1][0], rce->i_weight_denom[1], rce->weight[1][1] );
SET_WEIGHT( frm->weight[0][2], 1, rce->weight[2][0], rce->i_weight_denom[1], rce->weight[2][1] );
}
}
/* After encoding one frame, save stats and update ratecontrol state */
......@@ -1543,9 +1558,19 @@ int x264_ratecontrol_end( x264_t *h, int bits, int *filler )
goto fail;
}
if( h->sh.weight[0][0].weightfn )
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.weight[0][0].weightfn )
{
if( fprintf( rc->p_stat_file_out, "w:%"PRId32",%"PRId32",%"PRId32, h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
if( fprintf( rc->p_stat_file_out, "w:%d,%d,%d",
h->sh.weight[0][0].i_denom, h->sh.weight[0][0].i_scale, h->sh.weight[0][0].i_offset ) < 0 )
goto fail;
if( h->sh.weight[0][1].weightfn || h->sh.weight[0][2].weightfn )
{
if( fprintf( rc->p_stat_file_out, ",%d,%d,%d,%d,%d\n",
h->sh.weight[0][1].i_denom, h->sh.weight[0][1].i_scale, h->sh.weight[0][1].i_offset,
h->sh.weight[0][2].i_scale, h->sh.weight[0][2].i_offset ) < 0 )
goto fail;
}
else if( fprintf( rc->p_stat_file_out, "\n" ) < 0 )
goto fail;
}
......
......@@ -98,7 +98,61 @@ static NOINLINE pixel *x264_weight_cost_init_luma( x264_t *h, x264_frame_t *fenc
return ref->lowres[0];
}
static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
/* How data is organized for chroma weightp:
* [U: ref] [U: fenc]
* [V: ref] [V: fenc]
* fenc = ref + offset
* v = u + stride * chroma height
* We'll need more room if we do 4:2:2 or 4:4:4. */
static NOINLINE void x264_weight_cost_init_chroma( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, pixel *dstu, pixel *dstv )
{
int ref0_distance = fenc->i_frame - ref->i_frame - 1;
int i_stride = fenc->i_stride[1];
int i_offset = i_stride / 2;
int i_lines = fenc->i_lines[1];
int i_width = fenc->i_width[1];
int cw = h->mb.i_mb_width << 3;
int ch = h->mb.i_mb_height << 3;
if( fenc->lowres_mvs[0][ref0_distance][0][0] != 0x7FFF )
{
for( int y = 0, mb_xy = 0, pel_offset_y = 0; y < i_lines; y += 8, pel_offset_y = y*i_stride )
for( int x = 0, pel_offset_x = 0; x < i_width; x += 8, mb_xy++, pel_offset_x += 8 )
{
pixel *pixu = dstu + pel_offset_y + pel_offset_x;
pixel *pixv = dstv + pel_offset_y + pel_offset_x;
pixel *src1 = ref->plane[1] + pel_offset_y + pel_offset_x*2; /* NV12 */
int mvx = fenc->lowres_mvs[0][ref0_distance][mb_xy][0];
int mvy = fenc->lowres_mvs[0][ref0_distance][mb_xy][1];
h->mc.mc_chroma( pixu, pixv, i_stride, src1, i_stride, mvx, mvy, 8, 8 );
}
}
else
h->mc.plane_copy_deinterleave( dstu, i_stride, dstv, i_stride, ref->plane[1], i_stride, cw, ch );
h->mc.plane_copy_deinterleave( dstu+i_offset, i_stride, dstv+i_offset, i_stride, fenc->plane[1], i_stride, cw, ch );
x264_emms();
}
static int x264_weight_slice_header_cost( x264_t *h, x264_weight_t *w, int b_chroma )
{
/* Add cost of weights in the slice header. */
int lambda = x264_lambda_tab[X264_LOOKAHEAD_QP];
int numslices;
if( h->param.i_slice_count )
numslices = h->param.i_slice_count;
else if( h->param.i_slice_max_mbs )
numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
else
numslices = 1;
/* FIXME: find a way to account for --slice-max-size?
* Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
* Cut denom cost in half if chroma, since it's shared between the two chroma planes. */
int denom_cost = bs_size_ue( w[0].i_denom ) * (2 - b_chroma);
return lambda * numslices * ( 10 + denom_cost + 2 * (bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset )) );
}
static NOINLINE unsigned int x264_weight_cost_luma( x264_t *h, x264_frame_t *fenc, pixel *src, x264_weight_t *w )
{
unsigned int cost = 0;
int i_stride = fenc->i_stride_lowres;
......@@ -117,18 +171,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pi
w->weightfn[8>>2]( buf, 8, &src[pixoff], i_stride, w, 8 );
cost += X264_MIN( h->pixf.mbcmp[PIXEL_8x8]( buf, 8, &fenc_plane[pixoff], i_stride ), fenc->i_intra_cost[i_mb] );
}
/* Add cost of weights in the slice header. */
int numslices;
if( h->param.i_slice_count )
numslices = h->param.i_slice_count;
else if( h->param.i_slice_max_mbs )
numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
else
numslices = 1;
/* FIXME: find a way to account for --slice-max-size?
* Multiply by 2 as there will be a duplicate. 10 bits added as if there is a weighted frame, then an additional duplicate is used.
* Since using lowres frames, assume lambda = 1. */
cost += numslices * ( 10 + 2 * ( bs_size_ue( w[0].i_denom ) + bs_size_se( w[0].i_scale ) + bs_size_se( w[0].i_offset ) ) );
cost += x264_weight_slice_header_cost( h, w, 0 );
}
else
for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
......@@ -138,6 +181,44 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pi
return cost;
}
static NOINLINE unsigned int x264_weight_cost_chroma( x264_t *h, x264_frame_t *fenc, pixel *ref, x264_weight_t *w )
{
unsigned int cost = 0;
int i_stride = fenc->i_stride[1];
int i_offset = i_stride / 2;
int i_lines = fenc->i_lines[1];
int i_width = fenc->i_width[1];
pixel *src = ref + i_offset;
ALIGNED_ARRAY_8( pixel, buf, [8*8] );
int pixoff = 0;
ALIGNED_8( pixel flat[8] ) = {0};
if( w )
{
for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
for( int x = 0; x < i_width; x += 8, pixoff += 8 )
{
w->weightfn[8>>2]( buf, 8, &ref[pixoff], i_stride, w, 8 );
/* The naive and seemingly sensible algorithm is to use mbcmp as in luma.
* But testing shows that for chroma the DC coefficient is by far the most
* important part of the coding cost. Thus a more useful chroma weight is
* obtained by comparing each block's DC coefficient instead of the actual
* pixels.
*
* FIXME: add a (faster) asm sum function to replace sad. */
cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( buf, 8, flat, 0 ) -
h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
}
cost += x264_weight_slice_header_cost( h, w, 1 );
}
else
for( int y = 0; y < i_lines; y += 8, pixoff = y*i_stride )
for( int x = 0; x < i_width; x += 8, pixoff += 8 )
cost += abs( h->pixf.sad_aligned[PIXEL_8x8]( &ref[pixoff], i_stride, flat, 0 ) -
h->pixf.sad_aligned[PIXEL_8x8]( &src[pixoff], i_stride, flat, 0 ) );
x264_emms();
return cost;
}
void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead )
{
float fenc_mean, ref_mean, fenc_var, ref_var;
......@@ -150,66 +231,110 @@ void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int
float guess_scale;
int found;
x264_weight_t *weights = fenc->weight[0];
SET_WEIGHT( weights[1], 0, 1, 0, 0 );
SET_WEIGHT( weights[2], 0, 1, 0, 0 );
/* Don't check chroma in lookahead, or if there wasn't a luma weight. */
for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
{
fenc_var = round( sqrt( fenc->i_pixel_ssd[plane] ) );
ref_var = round( sqrt( ref->i_pixel_ssd[plane] ) );
fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
ref_mean = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]);
fenc_var = round( sqrt( fenc->i_pixel_ssd[0] ) );
ref_var = round( sqrt( ref->i_pixel_ssd[0] ) );
fenc_mean = (float)fenc->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
ref_mean = (float) ref->i_pixel_sum[0] / (fenc->i_lines[0] * fenc->i_width[0]);
//early termination
if( fabs( ref_mean - fenc_mean ) < 0.5 && fabsf( 1 - (float)fenc_var / ref_var ) < epsilon )
{
SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
continue;
}
//early termination
if( fabs( ref_mean - fenc_mean ) < 0.5 && fabs( 1 - fenc_var / ref_var ) < epsilon )
{
SET_WEIGHT( weights[0], 0, 1, 0, 0 );
return;
}
guess_scale = ref_var ? (float)fenc_var/ref_var : 0;
guess_scale = ref_var ? fenc_var/ref_var : 0;
x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[0] );
if( plane )
{
weights[plane].i_denom = 6;
weights[plane].i_scale = x264_clip3( round(guess_scale * 64.0), 0, 255 );
if( weights[plane].i_scale > 127 )
{
weights[1].weightfn = weights[2].weightfn = NULL;
break;
}
}
else
x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
found = 0;
mindenom = weights[0].i_denom;
minscale = weights[0].i_scale;
minoff = 0;
offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
found = 0;
mindenom = weights[plane].i_denom;
minscale = weights[plane].i_scale;
minoff = 0;
if( !fenc->b_intra_calculated )
{
x264_mb_analysis_t a;
x264_lowres_context_init( h, &a );
x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
}
pixel *mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
origscore = minscore = x264_weight_cost( h, fenc, mcbuf, 0 );
pixel *mcbuf;
if( !plane )
{
if( !fenc->b_intra_calculated )
{
x264_mb_analysis_t a;
x264_lowres_context_init( h, &a );
x264_slicetype_frame_cost( h, &a, &fenc, 0, 0, 0, 0 );
}
mcbuf = x264_weight_cost_init_luma( h, fenc, ref, h->mb.p_weight_buf[0] );
origscore = minscore = x264_weight_cost_luma( h, fenc, mcbuf, 0 );
}
else
{
pixel *dstu = h->mb.p_weight_buf[0];
pixel *dstv = h->mb.p_weight_buf[0]+fenc->i_stride[1]*fenc->i_lines[1];
/* Only initialize chroma data once. */
if( plane == 1 )
x264_weight_cost_init_chroma( h, fenc, ref, dstu, dstv );
mcbuf = plane == 1 ? dstu : dstv;
origscore = minscore = x264_weight_cost_chroma( h, fenc, mcbuf, 0 );
}
if( !minscore )
{
SET_WEIGHT( weights[0], 0, 1, 0, 0 );
return;
}
if( !minscore )
continue;
// This gives a slight improvement due to rounding errors but only tests
// one offset on lookahead.
// TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
{
SET_WEIGHT( weights[0], 1, minscale, mindenom, i_off );
unsigned int s = x264_weight_cost( h, fenc, mcbuf, &weights[0] );
COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
// This gives a slight improvement due to rounding errors but only tests
// one offset on lookahead.
// TODO: currently searches only offset +1. try other offsets/multipliers/combinations thereof?
offset_search = x264_clip3( floor( fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f*b_lookahead ), -128, 126 );
for( int i_off = offset_search; i_off <= offset_search+!b_lookahead; i_off++ )
{
SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
unsigned int s;
if( plane )
s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
else
s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
}
x264_emms();
/* FIXME: More analysis can be done here on SAD vs. SATD termination. */
/* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
{
SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
continue;
}
else
SET_WEIGHT( weights[plane], 1, minscale, mindenom, minoff );
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn && !plane )
fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
}
x264_emms();
/* FIXME: More analysis can be done here on SAD vs. SATD termination. */
/* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
if( !found || (minscale == 1<<mindenom && minoff == 0) || (float)minscore / origscore > 0.998 )
//FIXME, what is the correct way to deal with this?
if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
{
SET_WEIGHT( weights[0], 0, 1, 0, 0 );
return;
int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
int i;
for( i = 1; i <= 2; i++ )
{
weights[i].i_scale = x264_clip3( weights[i].i_scale >> ( weights[i].i_denom - denom ), 0, 255 );
weights[i].i_denom = denom;
}
}
else
SET_WEIGHT( weights[0], 1, minscale, mindenom, minoff );
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_FAKE && weights[0].weightfn )
fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
if( weights[0].weightfn && b_lookahead )
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment