Commit e6025413 authored by Fiona Glaser's avatar Fiona Glaser

VBV emergency mode

Allow ratecontrol to select "quantizers" above the maximum.
These "quantizers" progressively decimate the source to avoid VBV underflow.
x264 is now VBV compliant even with input as evil as /dev/random.
parent 68cda11b
......@@ -59,12 +59,13 @@ do {\
#define X264_PCM_COST (384*BIT_DEPTH+16)
#define X264_LOOKAHEAD_MAX 250
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX (51+QP_BD_OFFSET)
#define QP_MAX_MAX (51+2*6)
#define LAMBDA_MAX (91 << (BIT_DEPTH-8))
#define QP_MAX_SPEC (51+QP_BD_OFFSET)
#define QP_MAX (QP_MAX_SPEC+18)
#define QP_MAX_MAX (51+2*6+18)
#define PIXEL_MAX ((1 << BIT_DEPTH)-1)
// arbitrary, but low because SATD scores are 1/4 normal
#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
#define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
......@@ -460,12 +461,11 @@ struct x264_t
udctcoef (*quant8_mf[2])[64]; /* [2][52][64] */
udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */
udctcoef (*quant8_bias[2])[64]; /* [2][52][64] */
udctcoef (*nr_offset_emergency)[3][64];
/* mv/ref cost arrays. Indexed by lambda instead of
* qp because, due to rounding, some quantizers share
* lambdas. This saves memory. */
uint16_t *cost_mv[LAMBDA_MAX+1];
uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
/* mv/ref cost arrays. */
uint16_t *cost_mv[QP_MAX+1];
uint16_t *cost_mv_fpel[QP_MAX+1][4];
const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
......@@ -812,9 +812,14 @@ struct x264_t
} stat;
ALIGNED_16( uint32_t nr_residual_sum[2][64] );
ALIGNED_16( udctcoef nr_offset[2][64] );
uint32_t nr_count[2];
/* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4 */
udctcoef (*nr_offset)[64];
uint32_t (*nr_residual_sum)[64];
uint32_t *nr_count;
ALIGNED_16( udctcoef nr_offset_denoise[3][64] );
ALIGNED_16( uint32_t nr_residual_sum_buf[2][3][64] );
uint32_t nr_count_buf[2][3];
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
......
......@@ -143,7 +143,7 @@ static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
{
for( int i = 1; i < size; i++ )
for( int i = 0; i < size; i++ )
{
int level = dct[i];
int sign = level>>31;
......
......@@ -23,6 +23,8 @@
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
#define _ISOC99_SOURCE
#include <math.h>
#include "common.h"
#define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
......@@ -185,6 +187,48 @@ int x264_cqm_init( x264_t *h )
}
}
/* Emergency mode denoising. */
x264_emms();
CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
for( int cat = 0; cat <= 2; cat++ )
{
int dct8x8 = cat == 1;
int size = dct8x8 ? 64 : 16;
udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
/* Denoise chroma first (due to h264's chroma QP offset, then luma, then DC. */
int dc_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int luma_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int chroma_threshold = 0;
for( int i = 0; i < size; i++ )
{
int max = (1 << (7 + BIT_DEPTH)) - 1;
/* True "emergency mode": remove all DCT coefficients */
if( q == QP_MAX - QP_MAX_SPEC - 1 )
{
nr_offset[i] = max;
continue;
}
int thresh = i == 0 ? dc_threshold : cat == 2 ? chroma_threshold : luma_threshold;
if( q < thresh )
{
nr_offset[i] = 0;
continue;
}
double pos = (double)(q-thresh+1) / (QP_MAX - QP_MAX_SPEC - thresh);
/* XXX: this math is largely tuned for /dev/random input. */
double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i]
: h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i];
/* Formula chosen as an exponential scale to vaguely mimic the effects
* of a higher quantizer. */
double bias = (pow( 2, pos*(QP_MAX - QP_MAX_SPEC)/10. )*0.003-0.003) * start;
nr_offset[i] = X264_MIN( bias + 0.5, max );
}
}
if( !h->mb.b_lossless )
{
while( h->chroma_qp_table[h->param.rc.i_qp_min] <= max_chroma_qp_err )
......@@ -229,6 +273,7 @@ void x264_cqm_delete( x264_t *h )
{
CQM_DELETE( 4, 4 );
CQM_DELETE( 8, 2 );
x264_free( h->nr_offset_emergency );
}
static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
......
......@@ -745,8 +745,7 @@ DEQUANT_DC sse2 , w
; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
;-----------------------------------------------------------------------------
%macro DENOISE_DCT 1-2 0
cglobal denoise_dct_%1, 4,5,%2
mov r4d, [r0] ; backup DC coefficient
cglobal denoise_dct_%1, 4,4,%2
pxor m6, m6
.loop:
sub r3, mmsize/2
......@@ -773,8 +772,7 @@ cglobal denoise_dct_%1, 4,5,%2
mova [r1+r3*4+0*mmsize], m4
mova [r1+r3*4+1*mmsize], m5
jg .loop
mov [r0], r4d ; restore DC coefficient
RET
REP_RET
%endmacro
%define PABSD PABSD_MMX
......@@ -795,8 +793,7 @@ DENOISE_DCT ssse3, 8
; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
;-----------------------------------------------------------------------------
%macro DENOISE_DCT 1-2 0
cglobal denoise_dct_%1, 4,5,%2
movzx r4d, word [r0]
cglobal denoise_dct_%1, 4,4,%2
pxor m6, m6
.loop:
sub r3, mmsize
......@@ -823,8 +820,7 @@ cglobal denoise_dct_%1, 4,5,%2
mova [r1+r3*4+2*mmsize], m3
mova [r1+r3*4+3*mmsize], m1
jg .loop
mov [r0], r4w
RET
REP_RET
%endmacro
%define PABSW PABSW_MMX
......
......@@ -147,18 +147,25 @@ const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299, /* 80-81 */
};
/* lambda2 = pow(lambda,2) * .9 * 256 */
/* Capped to avoid overflow */
const int x264_lambda2_tab[QP_MAX_MAX+1] = {
14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */
14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */
};
const uint8_t x264_exp2_lut[64] = {
......@@ -196,32 +203,42 @@ const float x264_log2_lz_lut[32] = {
// I'm just matching the behaviour of deadzone quant.
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
// inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
{ 46, 58, 73, 92, 117, 147,
185, 233, 294, 370, 466, 587,
740, 932, 1174, 1480, 1864, 2349,
2959, 3728, 4697, 5918, 7457, 9395,
11837, 14914, 18790, 23674, 29828, 37581,
47349, 59656, 75163, 94699, 119313, 150326,
189399, 238627, 300652, 378798, 477255, 601304,
757596, 954511, 1202608, 1515192, 1909022, 2405217,
3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
12121539,15272182,19241743,24243077,30544363,38483486,
48486154,61088726,76966972,96972308 },
{
46, 58, 73, 92, 117, 147,
185, 233, 294, 370, 466, 587,
740, 932, 1174, 1480, 1864, 2349,
2959, 3728, 4697, 5918, 7457, 9395,
11837, 14914, 18790, 23674, 29828, 37581,
47349, 59656, 75163, 94699, 119313, 150326,
189399, 238627, 300652, 378798, 477255, 601304,
757596, 954511, 1202608, 1515192, 1909022, 2405217,
3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
48486154, 61088726, 76966972, 96972308,
122177453,134217727,134217727,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
},
// intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
{ 27, 34, 43, 54, 68, 86,
108, 136, 172, 216, 273, 343,
433, 545, 687, 865, 1090, 1374,
1731, 2180, 2747, 3461, 4361, 5494,
6922, 8721, 10988, 13844, 17442, 21976,
27688, 34885, 43953, 55377, 69771, 87906,
110755, 139543, 175813, 221511, 279087, 351627,
443023, 558174, 703255, 886046, 1116348, 1406511,
1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
7088374, 8930791,11252092,14176748,17861583,22504184,
28353495,35723165,45008368,56706990 }
{
27, 34, 43, 54, 68, 86,
108, 136, 172, 216, 273, 343,
433, 545, 687, 865, 1090, 1374,
1731, 2180, 2747, 3461, 4361, 5494,
6922, 8721, 10988, 13844, 17442, 21976,
27688, 34885, 43953, 55377, 69771, 87906,
110755, 139543, 175813, 221511, 279087, 351627,
443023, 558174, 703255, 886046, 1116348, 1406511,
1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
7088374, 8930791, 11252092, 14176748, 17861583, 22504184,
28353495, 35723165, 45008368, 56706990,
71446330, 90016736,113413980,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
}
};
static const uint16_t x264_chroma_lambda2_offset_tab[] = {
#define MAX_CHROMA_LAMBDA_OFFSET 36
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = {
16, 20, 25, 32, 40, 50,
64, 80, 101, 128, 161, 203,
256, 322, 406, 512, 645, 812,
......@@ -247,35 +264,35 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33];
static uint16_t x264_cost_ref[QP_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
int x264_analyse_init_costs( x264_t *h, int qp )
{
int lambda = x264_lambda_tab[qp];
if( h->cost_mv[lambda] )
if( h->cost_mv[qp] )
return 0;
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv[lambda] += 2*4*2048;
CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv[qp] += 2*4*2048;
for( int i = 0; i <= 2*4*2048; i++ )
{
h->cost_mv[lambda][-i] =
h->cost_mv[lambda][i] = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f;
h->cost_mv[qp][-i] =
h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
for( int j = 0; j < 33; j++ )
x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0;
x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
x264_pthread_mutex_unlock( &cost_ref_mutex );
if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] )
if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
{
for( int j = 0; j < 4; j++ )
{
CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv_fpel[lambda][j] += 2*2048;
CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv_fpel[qp][j] += 2*2048;
for( int i = -2*2048; i < 2*2048; i++ )
h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j];
h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
}
}
return 0;
......@@ -285,7 +302,7 @@ fail:
void x264_analyse_free_costs( x264_t *h )
{
for( int i = 0; i < LAMBDA_MAX+1; i++ )
for( int i = 0; i < QP_MAX+1; i++ )
{
if( h->cost_mv[i] )
x264_free( h->cost_mv[i] - 2*4*2048 );
......@@ -326,34 +343,51 @@ void x264_analyse_weight_frame( x264_t *h, int end )
/* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{
a->p_cost_mv = h->cost_mv[a->i_lambda];
a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
a->p_cost_mv = h->cost_mv[a->i_qp];
a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
}
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp )
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
{
/* conduct the analysis using this lamda and QP */
a->i_qp = h->mb.i_qp = i_qp;
h->mb.i_chroma_qp = h->chroma_qp_table[i_qp];
a->i_lambda = x264_lambda_tab[i_qp];
a->i_lambda2 = x264_lambda2_tab[i_qp];
int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
a->i_lambda = x264_lambda_tab[qp];
a->i_lambda2 = x264_lambda2_tab[qp];
h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
if( h->param.analyse.i_trellis )
{
h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp];
h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp];
h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp];
h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp];
h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
}
h->mb.i_psy_rd_lambda = a->i_lambda;
/* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256;
int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
if( qp > QP_MAX_SPEC )
{
h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
h->nr_residual_sum = h->nr_residual_sum_buf[1];
h->nr_count = h->nr_count_buf[1];
h->mb.b_noise_reduction = 1;
qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
}
else
{
h->nr_offset = h->nr_offset_denoise;
h->nr_residual_sum = h->nr_residual_sum_buf[0];
h->nr_count = h->nr_count_buf[0];
h->mb.b_noise_reduction = 0;
}
a->i_qp = h->mb.i_qp = qp;
h->mb.i_chroma_qp = h->chroma_qp_table[qp];
}
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
{
int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
......@@ -363,10 +397,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
x264_mb_analyse_init_qp( h, a, i_qp );
x264_mb_analyse_init_qp( h, a, qp );
h->mb.b_transform_8x8 = 0;
h->mb.b_noise_reduction = 0;
/* I: Intra part */
a->i_satd_i16x16 =
......@@ -3477,7 +3510,8 @@ intra_analysis:
x264_mb_analyse_qp_rd( h, &analysis );
h->mb.b_trellis = h->param.analyse.i_trellis;
h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
x264_psy_trellis_init( h, 0 );
if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
......
......@@ -164,8 +164,8 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
sh->i_cabac_init_idc = param->i_cabac_init_idc;
sh->i_qp = i_qp;
sh->i_qp_delta = i_qp - pps->i_pic_init_qp;
sh->i_qp = SPEC_QP(i_qp);
sh->i_qp_delta = sh->i_qp - pps->i_pic_init_qp;
sh->b_sp_for_swidth = 0;
sh->i_qs_delta = 0;
......@@ -1065,7 +1065,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf );
for( qp = h->param.rc.i_qp_min; qp <= h->param.rc.i_qp_max; qp++ )
int qp_max = h->param.rc.i_qp_max == QP_MAX_SPEC ? QP_MAX : h->param.rc.i_qp_max;
for( qp = h->param.rc.i_qp_min; qp <= qp_max; qp++ )
if( x264_analyse_init_costs( h, qp ) )
goto fail;
if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
......@@ -1073,7 +1074,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */
if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] )
if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
{
x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
goto fail;
......
......@@ -84,6 +84,8 @@ static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( h->mb.b_trellis )
return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
else
......@@ -93,6 +95,8 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, i
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
if( h->mb.b_trellis )
return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
else
......@@ -218,6 +222,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
for( int i = 0; i < 16; i++ )
{
/* copy dc coeff */
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[0], h->nr_offset[0], 16 );
dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0];
dct4x4[i][0] = 0;
......@@ -328,11 +334,12 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
int b_decimate = b_inter && h->mb.b_dct_decimate;
ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
h->mb.i_cbp_chroma = 0;
h->nr_count[2] += h->mb.b_noise_reduction * 4;
/* Early termination: check variance of chroma residual before encoding.
* Don't bother trying early termination at low QPs.
* Values are experimentally derived. */
if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) )
if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
{
int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
int ssd[2];
......@@ -401,6 +408,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
}
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
if( h->mb.b_noise_reduction )
for( int i = 0; i < 4; i++ )
h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct2x2dc( dct2x2, dct4x4 );
/* calculate dct coeffs */
for( int i = 0; i < 4; i++ )
......@@ -748,8 +758,6 @@ void x264_macroblock_encode( x264_t *h )
for( int idx = 0; idx < 4; idx++ )
{
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
if( nz )
......@@ -807,8 +815,6 @@ void x264_macroblock_encode( x264_t *h )
{
int idx = i8x8 * 4 + i4x4;
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
......@@ -942,6 +948,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
/* encode one 4x4 block */
for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
continue;
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
......@@ -984,7 +992,17 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
/* The vast majority of chroma checks will terminate during the DC check or the higher
* threshold check, so we can save time by doing a DC-only DCT. */
h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
if( h->mb.b_noise_reduction )
{
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{
h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct2x2[i4x4] = dct4x4[i4x4][0];
}
}
else
h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );
if( h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4PC][i_qp][0]>>1, h->quant4_bias[CQM_4PC][i_qp][0]<<1 ) )
return 0;
......@@ -993,12 +1011,15 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
if( ssd < thresh*4 )
continue;
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
if( !h->mb.b_noise_reduction )
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
/* calculate dct coeffs */
for( int i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
{
dct4x4[i4x4][0] = 0;
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PC][i_qp], h->quant4_bias[CQM_4PC][i_qp] ) )
continue;
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
......@@ -1019,12 +1040,16 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
void x264_noise_reduction_update( x264_t *h )
{
for( int cat = 0; cat < 2; cat++ )
h->nr_offset = h->nr_offset_denoise;
h->nr_residual_sum = h->nr_residual_sum_buf[0];
h->nr_count = h->nr_count_buf[0];
for( int cat = 0; cat < 3; cat++ )
{
int size = cat ? 64 : 16;
const uint16_t *weight = cat ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
int dct8x8 = cat == 1;
int size = dct8x8 ? 64 : 16;
const uint16_t *weight = dct8x8 ? x264_dct8_weight2_tab : x264_dct4_weight2_tab;
if( h->nr_count[cat] > (cat ? (1<<16) : (1<<18)) )
if( h->nr_count[cat] > (dct8x8 ? (1<<16) : (1<<18)) )
{
for( int i = 0; i < size; i++ )
h->nr_residual_sum[cat][i] >>= 1;
......@@ -1036,6 +1061,9 @@ void x264_noise_reduction_update( x264_t *h )
((uint64_t)h->param.analyse.i_noise_reduction * h->nr_count[cat]
+ h->nr_residual_sum[cat][i]/2)
/ ((uint64_t)h->nr_residual_sum[cat][i] * weight[i]/256 + 1);
/* Don't denoise DC coefficients */
h->nr_offset[cat][0] = 0;
}
}
......@@ -1146,8 +1174,9 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
ALIGNED_ARRAY_16( dctcoef, dct4x4,[16] );
p_fenc = h->mb.pic.p_fenc[1+ch] + 4*x + 4*y*FENC_STRIDE;
p_fdec = h->mb.pic.p_fdec[1+ch] + 4*x + 4*y*FDEC_STRIDE;
h->dctf.sub4x4_dct( dct4x4, p_fenc, p_fdec );
if( h->mb.b_noise_reduction );
h->quantf.denoise_dct( dct4x4, h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct4x4[0] = 0;
if( h->mb.b_trellis )
......
......@@ -595,7 +595,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
int delta = x264_pixel_size[sad_size].w;
int16_t *xs = h->scratch_buffer;
int xn;
uint16_t *cost_fpel_mvx = h->cost_mv_fpel[x264_lambda_tab[h->mb.i_qp]][-m->mvp[0]&3] + (-m->mvp[0]>>2);
uint16_t *cost_fpel_mvx = h->cost_mv_fpel[h->mb.i_qp][-m->mvp[0]&3] + (-m->mvp[0]>>2);
h->pixf.sad_x4[sad_size]( zero, p_fenc, p_fenc+delta,
p_fenc+delta*FENC_STRIDE, p_fenc+delta+delta*FENC_STRIDE,
......
......@@ -71,7 +71,7 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
extern uint16_t *x264_cost_mv_fpel[LAMBDA_MAX+1][4];
extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
#define COPY1_IF_LT(x,y)\
if((y)<(x))\
......
......@@ -1442,8 +1442,14 @@ int x264_ratecontrol_mb_qp( x264_t *h )
x264_emms();
float qp = h->rc->qpm;
if( h->param.rc.i_aq_mode )
/* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
qp += h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
{
/* MB-tree currently doesn't adjust quantizers in unreferenced frames. */
float qp_offset = h->fdec->b_kept_as_ref ? h->fenc->f_qp_offset[h->mb.i_mb_xy] : h->fenc->f_qp_offset_aq[h->mb.i_mb_xy];
/* Scale AQ's effect towards zero in emergency mode. */
if( qp > QP_MAX_SPEC )
qp_offset *= (QP_MAX - qp) / (QP_MAX_SPEC - QP_MAX);
qp += qp_offset;
}
return x264_clip3( qp + .5, h->param.rc.i_qp_min, h->param.rc.i_qp_max );
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment