Commit 2d0c47a5 authored by Fiona Glaser's avatar Fiona Glaser
Browse files

weightp: improve scale/offset search, chroma

Rescale the scale factor if the offset clips. This makes weightp more effective
in fades to/from white (and an other situation that requires big offsets).

Search more than 1 scale factor and more than 1 offset, depending on --subme.

Try to find the optimal chroma denominator instead of hardcoding it.

Overall improvement: a few percent in fade-heavy clips, such as a sample from
Avatar: TLA.
parent 732e4f7e
......@@ -60,7 +60,7 @@ static void x264_weight_get_h264( int weight_nonh264, int offset, x264_weight_t
w->i_offset = offset;
w->i_denom = 7;
w->i_scale = weight_nonh264;
while( w->i_denom > 0 && (w->i_scale > 127 || !(w->i_scale & 1)) )
while( w->i_denom > 0 && (w->i_scale > 127) )
{
w->i_denom--;
w->i_scale >>= 1;
......@@ -286,21 +286,40 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
SET_WEIGHT( weights[1], 0, 1, 0, 0 );
SET_WEIGHT( weights[2], 0, 1, 0, 0 );
int chroma_initted = 0;
float guess_scale[3];
float fenc_mean[3];
float ref_mean[3];
for( int plane = 0; plane <= 2*!b_lookahead; plane++ )
{
float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
float ref_var = ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
guess_scale[plane] = sqrtf( fenc_var / ref_var );
fenc_mean[plane] = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
ref_mean[plane] = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
}
int chroma_denom = 7;
if( !b_lookahead )
{
/* make sure both our scale factors fit */
while( chroma_denom > 0 )
{
float thresh = 127.f / (1<<chroma_denom);
if( guess_scale[1] < thresh && guess_scale[2] < thresh )
break;
chroma_denom--;
}
}
/* Don't check chroma in lookahead, or if there wasn't a luma weight. */
for( int plane = 0; plane <= 2 && !( plane && ( !weights[0].weightfn || b_lookahead ) ); plane++ )
{
int cur_offset, start_offset, end_offset;
int minoff, minscale, mindenom;
unsigned int minscore, origscore;
int found;
float fenc_var = fenc->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
float ref_var = ref->i_pixel_ssd[plane] + !ref->i_pixel_ssd[plane];
float guess_scale = sqrtf( fenc_var / ref_var );
float fenc_mean = (float)fenc->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
float ref_mean = (float) ref->i_pixel_sum[plane] / (fenc->i_lines[!!plane] * fenc->i_width[!!plane]) / (1 << (BIT_DEPTH - 8));
//early termination
if( fabsf( ref_mean - fenc_mean ) < 0.5f && fabsf( 1.f - guess_scale ) < epsilon )
if( fabsf( ref_mean[plane] - fenc_mean[plane] ) < 0.5f && fabsf( 1.f - guess_scale[plane] ) < epsilon )
{
SET_WEIGHT( weights[plane], 0, 1, 0, 0 );
continue;
......@@ -308,8 +327,8 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
if( plane )
{
weights[plane].i_denom = 6;
weights[plane].i_scale = x264_clip3( round( guess_scale * 64 ), 0, 255 );
weights[plane].i_denom = chroma_denom;
weights[plane].i_scale = x264_clip3( round( guess_scale[plane] * (1<<chroma_denom) ), 0, 255 );
if( weights[plane].i_scale > 127 )
{
weights[1].weightfn = weights[2].weightfn = NULL;
......@@ -317,7 +336,7 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
}
}
else
x264_weight_get_h264( round( guess_scale * 128 ), 0, &weights[plane] );
x264_weight_get_h264( round( guess_scale[plane] * 128 ), 0, &weights[plane] );
found = 0;
mindenom = weights[plane].i_denom;
......@@ -357,33 +376,65 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
if( !minscore )
continue;
// This gives a slight improvement due to rounding errors but only tests one offset in lookahead.
// Currently only searches within +/- 1 of the best offset found so far.
// TODO: Try other offsets/multipliers/combinations thereof?
cur_offset = fenc_mean - ref_mean * minscale / (1 << mindenom) + 0.5f * b_lookahead;
start_offset = x264_clip3( cur_offset - !b_lookahead, -128, 127 );
end_offset = x264_clip3( cur_offset + !b_lookahead, -128, 127 );
for( int i_off = start_offset; i_off <= end_offset; i_off++ )
/* Picked somewhat arbitrarily */
static const uint8_t weight_check_distance[][2] =
{
{0,0},{0,0},{0,1},{0,1},
{0,1},{0,1},{0,1},{1,1},
{1,1},{2,1},{2,1},{4,2}
};
int scale_dist = b_lookahead ? 0 : weight_check_distance[h->param.analyse.i_subpel_refine][0];
int offset_dist = b_lookahead ? 0 : weight_check_distance[h->param.analyse.i_subpel_refine][1];
int start_scale = x264_clip3( minscale - scale_dist, 0, 127 );
int end_scale = x264_clip3( minscale + scale_dist, 0, 127 );
for( int i_scale = start_scale; i_scale <= end_scale; i_scale++ )
{
SET_WEIGHT( weights[plane], 1, minscale, mindenom, i_off );
unsigned int s;
if( plane )
int cur_scale = i_scale;
int cur_offset = fenc_mean[plane] - ref_mean[plane] * cur_scale / (1 << mindenom) + 0.5f * b_lookahead;
if( cur_offset < - 128 || cur_offset > 127 )
{
if( CHROMA444 )
s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
else
s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
/* Rescale considering the constraints on cur_offset. We do it in this order
* because scale has a much wider range than offset (because of denom), so
* it should almost never need to be clamped. */
cur_offset = x264_clip3( cur_offset, -128, 127 );
cur_scale = (1 << mindenom) * (fenc_mean[plane] - cur_offset) / ref_mean[plane] + 0.5f;
cur_scale = x264_clip3( cur_scale, 0, 127 );
}
else
s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
COPY3_IF_LT( minscore, s, minoff, i_off, found, 1 );
int start_offset = x264_clip3( cur_offset - offset_dist, -128, 127 );
int end_offset = x264_clip3( cur_offset + offset_dist, -128, 127 );
for( int i_off = start_offset; i_off <= end_offset; i_off++ )
{
SET_WEIGHT( weights[plane], 1, cur_scale, mindenom, i_off );
unsigned int s;
if( plane )
{
if( CHROMA444 )
s = x264_weight_cost_chroma444( h, fenc, mcbuf, &weights[plane], plane );
else
s = x264_weight_cost_chroma( h, fenc, mcbuf, &weights[plane] );
}
else
s = x264_weight_cost_luma( h, fenc, mcbuf, &weights[plane] );
COPY4_IF_LT( minscore, s, minscale, cur_scale, minoff, i_off, found, 1 );
// Don't check any more offsets if the previous one had a lower cost than the current one
if( minoff == start_offset && i_off != start_offset )
break;
// Don't check any more offsets if the previous one had a lower cost than the current one
if( minoff == start_offset && i_off != start_offset )
break;
}
}
x264_emms();
/* Use a smaller denominator if possible */
if( !plane )
{
while( mindenom > 0 && !(minscale&1) )
{
mindenom--;
minscale >>= 1;
}
}
/* FIXME: More analysis can be done here on SAD vs. SATD termination. */
/* 0.2% termination derived experimentally to avoid weird weights in frames that are mostly intra. */
if( !found || (minscale == 1 << mindenom && minoff == 0) || (float)minscore / origscore > 0.998f )
......@@ -398,18 +449,29 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
fenc->f_weighted_cost_delta[i_delta_index] = (float)minscore / origscore;
}
//FIXME, what is the correct way to deal with this?
if( weights[1].weightfn && weights[2].weightfn && weights[1].i_denom != weights[2].i_denom )
/* Optimize and unify denominator */
if( weights[1].weightfn || weights[2].weightfn )
{
int denom = X264_MIN( weights[1].i_denom, weights[2].i_denom );
int i;
for( i = 1; i <= 2; i++ )
int denom = weights[1].weightfn ? weights[1].i_denom : weights[2].i_denom;
int both_weighted = weights[1].weightfn && weights[2].weightfn;
/* If only one plane is weighted, the other has an implicit scale of 1<<denom.
* With denom==7, this comes out to 128, which is invalid, so don't allow that. */
while( (!both_weighted && denom==7) ||
(denom > 0 && !(weights[1].weightfn && (weights[1].i_scale&1))
&& !(weights[2].weightfn && (weights[2].i_scale&1))) )
{
weights[i].i_scale = x264_clip3( weights[i].i_scale >> ( weights[i].i_denom - denom ), 0, 255 );
weights[i].i_denom = denom;
h->mc.weight_cache( h, &weights[i] );
denom--;
for( int i = 1; i <= 2; i++ )
if( weights[i].weightfn )
{
weights[i].i_scale >>= 1;
weights[i].i_denom = denom;
}
}
}
for( int i = 1; i <= 2; i++ )
if( weights[i].weightfn )
h->mc.weight_cache( h, &weights[i] );
if( weights[0].weightfn && b_lookahead )
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment