Commit e6025413 authored by Fiona Glaser's avatar Fiona Glaser

VBV emergency mode

Allow ratecontrol to select "quantizers" above the maximum.
These "quantizers" progressively decimate the source to avoid VBV underflow.
x264 is now VBV compliant even with input as evil as /dev/random.
parent 68cda11b
...@@ -59,12 +59,13 @@ do {\ ...@@ -59,12 +59,13 @@ do {\
#define X264_PCM_COST (384*BIT_DEPTH+16) #define X264_PCM_COST (384*BIT_DEPTH+16)
#define X264_LOOKAHEAD_MAX 250 #define X264_LOOKAHEAD_MAX 250
#define QP_BD_OFFSET (6*(BIT_DEPTH-8)) #define QP_BD_OFFSET (6*(BIT_DEPTH-8))
#define QP_MAX (51+QP_BD_OFFSET) #define QP_MAX_SPEC (51+QP_BD_OFFSET)
#define QP_MAX_MAX (51+2*6) #define QP_MAX (QP_MAX_SPEC+18)
#define LAMBDA_MAX (91 << (BIT_DEPTH-8)) #define QP_MAX_MAX (51+2*6+18)
#define PIXEL_MAX ((1 << BIT_DEPTH)-1) #define PIXEL_MAX ((1 << BIT_DEPTH)-1)
// arbitrary, but low because SATD scores are 1/4 normal // arbitrary, but low because SATD scores are 1/4 normal
#define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET) #define X264_LOOKAHEAD_QP (12+QP_BD_OFFSET)
#define SPEC_QP(x) X264_MIN((x), QP_MAX_SPEC)
// number of pixels (per thread) in progress at any given time. // number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety // 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
...@@ -460,12 +461,11 @@ struct x264_t ...@@ -460,12 +461,11 @@ struct x264_t
udctcoef (*quant8_mf[2])[64]; /* [2][52][64] */ udctcoef (*quant8_mf[2])[64]; /* [2][52][64] */
udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */ udctcoef (*quant4_bias[4])[16]; /* [4][52][16] */
udctcoef (*quant8_bias[2])[64]; /* [2][52][64] */ udctcoef (*quant8_bias[2])[64]; /* [2][52][64] */
udctcoef (*nr_offset_emergency)[3][64];
/* mv/ref cost arrays. Indexed by lambda instead of /* mv/ref cost arrays. */
* qp because, due to rounding, some quantizers share uint16_t *cost_mv[QP_MAX+1];
* lambdas. This saves memory. */ uint16_t *cost_mv_fpel[QP_MAX+1][4];
uint16_t *cost_mv[LAMBDA_MAX+1];
uint16_t *cost_mv_fpel[LAMBDA_MAX+1][4];
const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */ const uint8_t *chroma_qp_table; /* includes both the nonlinear luma->chroma mapping and chroma_qp_offset */
...@@ -812,9 +812,14 @@ struct x264_t ...@@ -812,9 +812,14 @@ struct x264_t
} stat; } stat;
ALIGNED_16( uint32_t nr_residual_sum[2][64] ); /* 0 = luma 4x4, 1 = luma 8x8, 2 = chroma 4x4 */
ALIGNED_16( udctcoef nr_offset[2][64] ); udctcoef (*nr_offset)[64];
uint32_t nr_count[2]; uint32_t (*nr_residual_sum)[64];
uint32_t *nr_count;
ALIGNED_16( udctcoef nr_offset_denoise[3][64] );
ALIGNED_16( uint32_t nr_residual_sum_buf[2][3][64] );
uint32_t nr_count_buf[2][3];
/* Buffers that are allocated per-thread even in sliced threads. */ /* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */ void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
......
...@@ -143,7 +143,7 @@ static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp ) ...@@ -143,7 +143,7 @@ static void dequant_4x4_dc( dctcoef dct[16], int dequant_mf[6][16], int i_qp )
static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size ) static void x264_denoise_dct( dctcoef *dct, uint32_t *sum, udctcoef *offset, int size )
{ {
for( int i = 1; i < size; i++ ) for( int i = 0; i < size; i++ )
{ {
int level = dct[i]; int level = dct[i];
int sign = level>>31; int sign = level>>31;
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
* For more information, contact us at licensing@x264.com. * For more information, contact us at licensing@x264.com.
*****************************************************************************/ *****************************************************************************/
#define _ISOC99_SOURCE
#include <math.h>
#include "common.h" #include "common.h"
#define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s)) #define SHIFT(x,s) ((s)<=0 ? (x)<<-(s) : ((x)+(1<<((s)-1)))>>(s))
...@@ -185,6 +187,48 @@ int x264_cqm_init( x264_t *h ) ...@@ -185,6 +187,48 @@ int x264_cqm_init( x264_t *h )
} }
} }
/* Emergency mode denoising. */
x264_emms();
CHECKED_MALLOC( h->nr_offset_emergency, sizeof(*h->nr_offset_emergency)*(QP_MAX-QP_MAX_SPEC) );
for( int q = 0; q < QP_MAX - QP_MAX_SPEC; q++ )
for( int cat = 0; cat <= 2; cat++ )
{
int dct8x8 = cat == 1;
int size = dct8x8 ? 64 : 16;
udctcoef *nr_offset = h->nr_offset_emergency[q][cat];
/* Denoise chroma first (due to h264's chroma QP offset, then luma, then DC. */
int dc_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int luma_threshold = (QP_MAX-QP_MAX_SPEC)*2/3;
int chroma_threshold = 0;
for( int i = 0; i < size; i++ )
{
int max = (1 << (7 + BIT_DEPTH)) - 1;
/* True "emergency mode": remove all DCT coefficients */
if( q == QP_MAX - QP_MAX_SPEC - 1 )
{
nr_offset[i] = max;
continue;
}
int thresh = i == 0 ? dc_threshold : cat == 2 ? chroma_threshold : luma_threshold;
if( q < thresh )
{
nr_offset[i] = 0;
continue;
}
double pos = (double)(q-thresh+1) / (QP_MAX - QP_MAX_SPEC - thresh);
/* XXX: this math is largely tuned for /dev/random input. */
double start = dct8x8 ? h->unquant8_mf[CQM_8PY][QP_MAX_SPEC][i]
: h->unquant4_mf[CQM_4PY][QP_MAX_SPEC][i];
/* Formula chosen as an exponential scale to vaguely mimic the effects
* of a higher quantizer. */
double bias = (pow( 2, pos*(QP_MAX - QP_MAX_SPEC)/10. )*0.003-0.003) * start;
nr_offset[i] = X264_MIN( bias + 0.5, max );
}
}
if( !h->mb.b_lossless ) if( !h->mb.b_lossless )
{ {
while( h->chroma_qp_table[h->param.rc.i_qp_min] <= max_chroma_qp_err ) while( h->chroma_qp_table[h->param.rc.i_qp_min] <= max_chroma_qp_err )
...@@ -229,6 +273,7 @@ void x264_cqm_delete( x264_t *h ) ...@@ -229,6 +273,7 @@ void x264_cqm_delete( x264_t *h )
{ {
CQM_DELETE( 4, 4 ); CQM_DELETE( 4, 4 );
CQM_DELETE( 8, 2 ); CQM_DELETE( 8, 2 );
x264_free( h->nr_offset_emergency );
} }
static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name, static int x264_cqm_parse_jmlist( x264_t *h, const char *buf, const char *name,
......
...@@ -745,8 +745,7 @@ DEQUANT_DC sse2 , w ...@@ -745,8 +745,7 @@ DEQUANT_DC sse2 , w
; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size ) ; void denoise_dct( int32_t *dct, uint32_t *sum, uint32_t *offset, int size )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro DENOISE_DCT 1-2 0 %macro DENOISE_DCT 1-2 0
cglobal denoise_dct_%1, 4,5,%2 cglobal denoise_dct_%1, 4,4,%2
mov r4d, [r0] ; backup DC coefficient
pxor m6, m6 pxor m6, m6
.loop: .loop:
sub r3, mmsize/2 sub r3, mmsize/2
...@@ -773,8 +772,7 @@ cglobal denoise_dct_%1, 4,5,%2 ...@@ -773,8 +772,7 @@ cglobal denoise_dct_%1, 4,5,%2
mova [r1+r3*4+0*mmsize], m4 mova [r1+r3*4+0*mmsize], m4
mova [r1+r3*4+1*mmsize], m5 mova [r1+r3*4+1*mmsize], m5
jg .loop jg .loop
mov [r0], r4d ; restore DC coefficient REP_RET
RET
%endmacro %endmacro
%define PABSD PABSD_MMX %define PABSD PABSD_MMX
...@@ -795,8 +793,7 @@ DENOISE_DCT ssse3, 8 ...@@ -795,8 +793,7 @@ DENOISE_DCT ssse3, 8
; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size ) ; void denoise_dct( int16_t *dct, uint32_t *sum, uint16_t *offset, int size )
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro DENOISE_DCT 1-2 0 %macro DENOISE_DCT 1-2 0
cglobal denoise_dct_%1, 4,5,%2 cglobal denoise_dct_%1, 4,4,%2
movzx r4d, word [r0]
pxor m6, m6 pxor m6, m6
.loop: .loop:
sub r3, mmsize sub r3, mmsize
...@@ -823,8 +820,7 @@ cglobal denoise_dct_%1, 4,5,%2 ...@@ -823,8 +820,7 @@ cglobal denoise_dct_%1, 4,5,%2
mova [r1+r3*4+2*mmsize], m3 mova [r1+r3*4+2*mmsize], m3
mova [r1+r3*4+3*mmsize], m1 mova [r1+r3*4+3*mmsize], m1
jg .loop jg .loop
mov [r0], r4w REP_RET
RET
%endmacro %endmacro
%define PABSW PABSW_MMX %define PABSW PABSW_MMX
......
...@@ -147,18 +147,25 @@ const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = { ...@@ -147,18 +147,25 @@ const uint16_t x264_lambda_tab[QP_MAX_MAX+1] = {
25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */ 25, 29, 32, 36, 40, 45, 51, 57, /* 40-47 */
64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */ 64, 72, 81, 91, 102, 114, 128, 144, /* 48-55 */
161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */ 161, 181, 203, 228, 256, 287, 323, 362, /* 56-63 */
406, 456, 512, 575, 645, 724, 813, 912, /* 64-71 */
1024,1149,1290,1448,1625,1825,2048,2299, /* 72-79 */
2048,2299, /* 80-81 */
}; };
/* lambda2 = pow(lambda,2) * .9 * 256 */ /* lambda2 = pow(lambda,2) * .9 * 256 */
/* Capped to avoid overflow */
const int x264_lambda2_tab[QP_MAX_MAX+1] = { const int x264_lambda2_tab[QP_MAX_MAX+1] = {
14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */ 14, 18, 22, 28, 36, 45, 57, 72, /* 0- 7 */
91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */ 91, 115, 145, 182, 230, 290, 365, 460, /* 8-15 */
580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */ 580, 731, 921, 1161, 1462, 1843, 2322, 2925, /* 16-23 */
3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */ 3686, 4644, 5851, 7372, 9289, 11703, 14745, 18578, /* 24-31 */
23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */ 23407, 29491, 37156, 46814, 58982, 74313, 93628, 117964, /* 32-39 */
148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */ 148626, 187257, 235929, 297252, 374514, 471859, 594505, 749029, /* 40-47 */
943718,1189010,1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */ 943718, 1189010, 1498059, 1887436, 2378021, 2996119, 3774873, 4756042, /* 48-55 */
5992238,7549747,9512085,11984476,15099494,19024170,23968953,30198988, /* 56-63 */ 5992238, 7549747, 9512085, 11984476, 15099494, 19024170,23968953,30198988, /* 56-63 */
38048341, 47937906, 60397977, 76096683, 95875813,120795955, /* 64-69 */
134217727,134217727,134217727,134217727,134217727,134217727, /* 70-75 */
134217727,134217727,134217727,134217727,134217727,134217727, /* 76-81 */
}; };
const uint8_t x264_exp2_lut[64] = { const uint8_t x264_exp2_lut[64] = {
...@@ -196,32 +203,42 @@ const float x264_log2_lz_lut[32] = { ...@@ -196,32 +203,42 @@ const float x264_log2_lz_lut[32] = {
// I'm just matching the behaviour of deadzone quant. // I'm just matching the behaviour of deadzone quant.
static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = { static const int x264_trellis_lambda2_tab[2][QP_MAX_MAX+1] = {
// inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS) // inter lambda = .85 * .85 * 2**(qp/3. + 10 - LAMBDA_BITS)
{ 46, 58, 73, 92, 117, 147, {
185, 233, 294, 370, 466, 587, 46, 58, 73, 92, 117, 147,
740, 932, 1174, 1480, 1864, 2349, 185, 233, 294, 370, 466, 587,
2959, 3728, 4697, 5918, 7457, 9395, 740, 932, 1174, 1480, 1864, 2349,
11837, 14914, 18790, 23674, 29828, 37581, 2959, 3728, 4697, 5918, 7457, 9395,
47349, 59656, 75163, 94699, 119313, 150326, 11837, 14914, 18790, 23674, 29828, 37581,
189399, 238627, 300652, 378798, 477255, 601304, 47349, 59656, 75163, 94699, 119313, 150326,
757596, 954511, 1202608, 1515192, 1909022, 2405217, 189399, 238627, 300652, 378798, 477255, 601304,
3030384, 3818045, 4810435, 6060769, 7636091, 9620872, 757596, 954511, 1202608, 1515192, 1909022, 2405217,
12121539,15272182,19241743,24243077,30544363,38483486, 3030384, 3818045, 4810435, 6060769, 7636091, 9620872,
48486154,61088726,76966972,96972308 }, 12121539, 15272182, 19241743, 24243077, 30544363, 38483486,
48486154, 61088726, 76966972, 96972308,
122177453,134217727,134217727,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
},
// intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS) // intra lambda = .65 * .65 * 2**(qp/3. + 10 - LAMBDA_BITS)
{ 27, 34, 43, 54, 68, 86, {
108, 136, 172, 216, 273, 343, 27, 34, 43, 54, 68, 86,
433, 545, 687, 865, 1090, 1374, 108, 136, 172, 216, 273, 343,
1731, 2180, 2747, 3461, 4361, 5494, 433, 545, 687, 865, 1090, 1374,
6922, 8721, 10988, 13844, 17442, 21976, 1731, 2180, 2747, 3461, 4361, 5494,
27688, 34885, 43953, 55377, 69771, 87906, 6922, 8721, 10988, 13844, 17442, 21976,
110755, 139543, 175813, 221511, 279087, 351627, 27688, 34885, 43953, 55377, 69771, 87906,
443023, 558174, 703255, 886046, 1116348, 1406511, 110755, 139543, 175813, 221511, 279087, 351627,
1772093, 2232697, 2813022, 3544186, 4465396, 5626046, 443023, 558174, 703255, 886046, 1116348, 1406511,
7088374, 8930791,11252092,14176748,17861583,22504184, 1772093, 2232697, 2813022, 3544186, 4465396, 5626046,
28353495,35723165,45008368,56706990 } 7088374, 8930791, 11252092, 14176748, 17861583, 22504184,
28353495, 35723165, 45008368, 56706990,
71446330, 90016736,113413980,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
134217727,134217727,134217727,134217727,134217727,134217727,
}
}; };
static const uint16_t x264_chroma_lambda2_offset_tab[] = { #define MAX_CHROMA_LAMBDA_OFFSET 36
static const uint16_t x264_chroma_lambda2_offset_tab[MAX_CHROMA_LAMBDA_OFFSET+1] = {
16, 20, 25, 32, 40, 50, 16, 20, 25, 32, 40, 50,
64, 80, 101, 128, 161, 203, 64, 80, 101, 128, 161, 203,
256, 322, 406, 512, 645, 812, 256, 322, 406, 512, 645, 812,
...@@ -247,35 +264,35 @@ static const uint8_t i_sub_mb_p_cost_table[4] = { ...@@ -247,35 +264,35 @@ static const uint8_t i_sub_mb_p_cost_table[4] = {
static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ); static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a );
static uint16_t x264_cost_ref[LAMBDA_MAX+1][3][33]; static uint16_t x264_cost_ref[QP_MAX+1][3][33];
static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER; static UNUSED x264_pthread_mutex_t cost_ref_mutex = X264_PTHREAD_MUTEX_INITIALIZER;
int x264_analyse_init_costs( x264_t *h, int qp ) int x264_analyse_init_costs( x264_t *h, int qp )
{ {
int lambda = x264_lambda_tab[qp]; int lambda = x264_lambda_tab[qp];
if( h->cost_mv[lambda] ) if( h->cost_mv[qp] )
return 0; return 0;
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */ /* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
CHECKED_MALLOC( h->cost_mv[lambda], (4*4*2048 + 1) * sizeof(uint16_t) ); CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv[lambda] += 2*4*2048; h->cost_mv[qp] += 2*4*2048;
for( int i = 0; i <= 2*4*2048; i++ ) for( int i = 0; i <= 2*4*2048; i++ )
{ {
h->cost_mv[lambda][-i] = h->cost_mv[qp][-i] =
h->cost_mv[lambda][i] = lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f; h->cost_mv[qp][i] = X264_MIN( lambda * (log2f(i+1)*2 + 0.718f + !!i) + .5f, (1<<16)-1 );
} }
x264_pthread_mutex_lock( &cost_ref_mutex ); x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ ) for( int i = 0; i < 3; i++ )
for( int j = 0; j < 33; j++ ) for( int j = 0; j < 33; j++ )
x264_cost_ref[lambda][i][j] = i ? lambda * bs_size_te( i, j ) : 0; x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
x264_pthread_mutex_unlock( &cost_ref_mutex ); x264_pthread_mutex_unlock( &cost_ref_mutex );
if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[lambda][0] ) if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
{ {
for( int j = 0; j < 4; j++ ) for( int j = 0; j < 4; j++ )
{ {
CHECKED_MALLOC( h->cost_mv_fpel[lambda][j], (4*2048 + 1) * sizeof(uint16_t) ); CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv_fpel[lambda][j] += 2*2048; h->cost_mv_fpel[qp][j] += 2*2048;
for( int i = -2*2048; i < 2*2048; i++ ) for( int i = -2*2048; i < 2*2048; i++ )
h->cost_mv_fpel[lambda][j][i] = h->cost_mv[lambda][i*4+j]; h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
} }
} }
return 0; return 0;
...@@ -285,7 +302,7 @@ fail: ...@@ -285,7 +302,7 @@ fail:
void x264_analyse_free_costs( x264_t *h ) void x264_analyse_free_costs( x264_t *h )
{ {
for( int i = 0; i < LAMBDA_MAX+1; i++ ) for( int i = 0; i < QP_MAX+1; i++ )
{ {
if( h->cost_mv[i] ) if( h->cost_mv[i] )
x264_free( h->cost_mv[i] - 2*4*2048 ); x264_free( h->cost_mv[i] - 2*4*2048 );
...@@ -326,34 +343,51 @@ void x264_analyse_weight_frame( x264_t *h, int end ) ...@@ -326,34 +343,51 @@ void x264_analyse_weight_frame( x264_t *h, int end )
/* initialize an array of lambda*nbits for all possible mvs */ /* initialize an array of lambda*nbits for all possible mvs */
static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a ) static void x264_mb_analyse_load_costs( x264_t *h, x264_mb_analysis_t *a )
{ {
a->p_cost_mv = h->cost_mv[a->i_lambda]; a->p_cost_mv = h->cost_mv[a->i_qp];
a->p_cost_ref[0] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)]; a->p_cost_ref[0] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l0_active-1,0,2)];
a->p_cost_ref[1] = x264_cost_ref[a->i_lambda][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)]; a->p_cost_ref[1] = x264_cost_ref[a->i_qp][x264_clip3(h->sh.i_num_ref_idx_l1_active-1,0,2)];
} }
static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int i_qp ) static void x264_mb_analyse_init_qp( x264_t *h, x264_mb_analysis_t *a, int qp )
{ {
/* conduct the analysis using this lamda and QP */ int effective_chroma_qp = h->chroma_qp_table[SPEC_QP(qp)] + X264_MAX( qp - QP_MAX_SPEC, 0 );
a->i_qp = h->mb.i_qp = i_qp; a->i_lambda = x264_lambda_tab[qp];
h->mb.i_chroma_qp = h->chroma_qp_table[i_qp]; a->i_lambda2 = x264_lambda2_tab[qp];
a->i_lambda = x264_lambda_tab[i_qp];
a->i_lambda2 = x264_lambda2_tab[i_qp];
h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd; h->mb.b_trellis = h->param.analyse.i_trellis > 1 && a->i_mbrd;
if( h->param.analyse.i_trellis ) if( h->param.analyse.i_trellis )
{ {
h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][h->mb.i_qp]; h->mb.i_trellis_lambda2[0][0] = x264_trellis_lambda2_tab[0][qp];
h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][h->mb.i_qp]; h->mb.i_trellis_lambda2[0][1] = x264_trellis_lambda2_tab[1][qp];
h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][h->mb.i_chroma_qp]; h->mb.i_trellis_lambda2[1][0] = x264_trellis_lambda2_tab[0][effective_chroma_qp];
h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][h->mb.i_chroma_qp]; h->mb.i_trellis_lambda2[1][1] = x264_trellis_lambda2_tab[1][effective_chroma_qp];
} }
h->mb.i_psy_rd_lambda = a->i_lambda; h->mb.i_psy_rd_lambda = a->i_lambda;
/* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */ /* Adjusting chroma lambda based on QP offset hurts PSNR but improves visual quality. */
h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[h->mb.i_qp-h->mb.i_chroma_qp+12] : 256; int chroma_offset_idx = X264_MIN( qp-effective_chroma_qp+12, MAX_CHROMA_LAMBDA_OFFSET );
h->mb.i_chroma_lambda2_offset = h->param.analyse.b_psy ? x264_chroma_lambda2_offset_tab[chroma_offset_idx] : 256;
if( qp > QP_MAX_SPEC )
{
h->nr_offset = h->nr_offset_emergency[qp-QP_MAX_SPEC-1];
h->nr_residual_sum = h->nr_residual_sum_buf[1];
h->nr_count = h->nr_count_buf[1];
h->mb.b_noise_reduction = 1;
qp = QP_MAX_SPEC; /* Out-of-spec QPs are just used for calculating lambda values. */
}
else
{
h->nr_offset = h->nr_offset_denoise;
h->nr_residual_sum = h->nr_residual_sum_buf[0];
h->nr_count = h->nr_count_buf[0];
h->mb.b_noise_reduction = 0;
}
a->i_qp = h->mb.i_qp = qp;
h->mb.i_chroma_qp = h->chroma_qp_table[qp];
} }
static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp )
{ {
int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B); int subme = h->param.analyse.i_subpel_refine - (h->sh.i_type == SLICE_TYPE_B);
...@@ -363,10 +397,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -363,10 +397,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10); a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10);
h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1; h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1;
x264_mb_analyse_init_qp( h, a, i_qp ); x264_mb_analyse_init_qp( h, a, qp );
h->mb.b_transform_8x8 = 0; h->mb.b_transform_8x8 = 0;
h->mb.b_noise_reduction = 0;
/* I: Intra part */ /* I: Intra part */
a->i_satd_i16x16 = a->i_satd_i16x16 =
...@@ -3477,7 +3510,8 @@ intra_analysis: ...@@ -3477,7 +3510,8 @@ intra_analysis:
x264_mb_analyse_qp_rd( h, &analysis ); x264_mb_analyse_qp_rd( h, &analysis );
h->mb.b_trellis = h->param.analyse.i_trellis; h->mb.b_trellis = h->param.analyse.i_trellis;
h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction; h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type ));
if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 ) if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
x264_psy_trellis_init( h, 0 ); x264_psy_trellis_init( h, 0 );
if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction ) if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
......
...@@ -164,8 +164,8 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh, ...@@ -164,8 +164,8 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
sh->i_cabac_init_idc = param->i_cabac_init_idc; sh->i_cabac_init_idc = param->i_cabac_init_idc;
sh->i_qp = i_qp; sh->i_qp = SPEC_QP(i_qp);
sh->i_qp_delta = i_qp - pps->i_pic_init_qp; sh->i_qp_delta = sh->i_qp - pps->i_pic_init_qp;
sh->b_sp_for_swidth = 0; sh->b_sp_for_swidth = 0;
sh->i_qs_delta = 0; sh->i_qs_delta = 0;
...@@ -1065,7 +1065,8 @@ x264_t *x264_encoder_open( x264_param_t *param ) ...@@ -1065,7 +1065,8 @@ x264_t *x264_encoder_open( x264_param_t *param )
p += sprintf( p, " none!" ); p += sprintf( p, " none!" );
x264_log( h, X264_LOG_INFO, "%s\n", buf ); x264_log( h, X264_LOG_INFO, "%s\n", buf );
for( qp = h->param.rc.i_qp_min; qp <= h->param.rc.i_qp_max; qp++ ) int qp_max = h->param.rc.i_qp_max == QP_MAX_SPEC ? QP_MAX : h->param.rc.i_qp_max;
for( qp = h->param.rc.i_qp_min; qp <= qp_max; qp++ )
if( x264_analyse_init_costs( h, qp ) ) if( x264_analyse_init_costs( h, qp ) )
goto fail; goto fail;
if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) ) if( x264_analyse_init_costs( h, X264_LOOKAHEAD_QP ) )
...@@ -1073,7 +1074,7 @@ x264_t *x264_encoder_open( x264_param_t *param ) ...@@ -1073,7 +1074,7 @@ x264_t *x264_encoder_open( x264_param_t *param )
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 }; static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */ /* Checks for known miscompilation issues. */
if( h->cost_mv[x264_lambda_tab[X264_LOOKAHEAD_QP]][2013] != cost_mv_correct[BIT_DEPTH-8] ) if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
{ {
x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" ); x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
goto fail; goto fail;
......
...@@ -84,6 +84,8 @@ static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] ) ...@@ -84,6 +84,8 @@ static inline void dct2x2dc( dctcoef d[4], dctcoef dct4x4[4][16] )
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx ) static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int idx )
{ {
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY; int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
if( h->mb.b_noise_reduction && ctx_block_cat != DCT_LUMA_AC )
h->quantf.denoise_dct( dct, h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( h->mb.b_trellis ) if( h->mb.b_trellis )
return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx ); return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, 0, idx );
else else
...@@ -93,6 +95,8 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, i ...@@ -93,6 +95,8 @@ static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, i
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx ) static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int b_intra, int idx )
{ {
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY; int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct, h->nr_residual_sum[1], h->nr_offset[1], 64 );
if( h->mb.b_trellis ) if( h->mb.b_trellis )
return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx ); return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
else else
...@@ -218,6 +222,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp ) ...@@ -218,6 +222,8 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
for( int i = 0; i < 16; i++ ) for( int i = 0; i < 16; i++ )
{ {
/* copy dc coeff */ /* copy dc coeff */
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[0], h->nr_offset[0], 16 );
dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0]; dct_dc4x4[block_idx_xy_1d[i]] = dct4x4[i][0];
dct4x4[i][0] = 0; dct4x4[i][0] = 0;
...@@ -328,11 +334,12 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ) ...@@ -328,11 +334,12 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
int b_decimate = b_inter && h->mb.b_dct_decimate; int b_decimate = b_inter && h->mb.b_dct_decimate;
ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] ); ALIGNED_ARRAY_16( dctcoef, dct2x2,[4] );
h->mb.i_cbp_chroma = 0; h->mb.i_cbp_chroma = 0;
h->nr_count[2] += h->mb.b_noise_reduction * 4;
/* Early termination: check variance of chroma residual before encoding. /* Early termination: check variance of chroma residual before encoding.
* Don't bother trying early termination at low QPs. * Don't bother trying early termination at low QPs.
* Values are experimentally derived. */ * Values are experimentally derived. */
if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) ) if( b_decimate && i_qp >= (h->mb.b_trellis ? 12 : 18) && !h->mb.b_noise_reduction )
{ {
int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6; int thresh = (x264_lambda2_tab[i_qp] + 32) >> 6;
int ssd[2]; int ssd[2];
...@@ -401,6 +408,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp ) ...@@ -401,6 +408,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
} }
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst ); h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
if( h->mb.b_noise_reduction )
for( int i = 0; i < 4; i++ )
h->quantf.denoise_dct( dct4x4[i], h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct2x2dc( dct2x2, dct4x4 ); dct2x2dc( dct2x2, dct4x4 );
/* calculate dct coeffs */ /* calculate dct coeffs */
for( int i = 0; i < 4; i++ ) for( int i = 0; i < 4; i++ )
...@@ -748,8 +758,6 @@ void x264_macroblock_encode( x264_t *h ) ...@@ -748,8 +758,6 @@ void x264_macroblock_encode( x264_t *h )
for( int idx = 0; idx < 4; idx++ ) for( int idx = 0; idx < 4; idx++ )
{ {
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx ); nz = x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
if( nz ) if( nz )
...@@ -807,8 +815,6 @@ void x264_macroblock_encode( x264_t *h ) ...@@ -807,8 +815,6 @@ void x264_macroblock_encode( x264_t *h )
{ {
int idx = i8x8 * 4 + i4x4; int idx = i8x8 * 4 + i4x4;
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx ); nz = x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
h->mb.cache.non_zero_count[x264_scan8[idx]] = nz; h->mb.cache.non_zero_count[x264_scan8[idx]] = nz;
...@@ -942,6 +948,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir ) ...@@ -942,6 +948,8 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
/* encode one 4x4 block */ /* encode one 4x4 block */
for( int i4x4 = 0; i4x4 < 4; i4x4++ ) for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{ {
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) ) if( !h->quantf.quant_4x4( dct4x4[i4x4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ) )
continue; continue;
h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] ); h->zigzagf.scan_4x4( dctscan, dct4x4[i4x4] );
...@@ -984,7 +992,17 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir ) ...@@ -984,7 +992,17 @@ int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
/* The vast majority of chroma checks will terminate during the DC check or the higher /* The vast majority of chroma checks will terminate during the DC check or the higher
* threshold check, so we can save time by doing a DC-only DCT. */ * threshold check, so we can save time by doing a DC-only DCT. */
h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst ); if( h->mb.b_noise_reduction )
{
h->dctf.sub8x8_dct( dct4x4, p_src, p_dst );
for( int i4x4 = 0; i4x4 < 4; i4x4++ )
{
h->quantf.denoise_dct( dct4x4[i4x4], h->nr_residual_sum[2], h->nr_offset[2], 16 );
dct2x2[i4x4] = dct4x4[i4x4][0];
}
}
else
h->dctf.sub8x8_dct_dc( dct2x2, p_src, p_dst );