Commit ecc9bfab authored by Fiona Glaser's avatar Fiona Glaser

Psychovisually optimized rate-distortion optimization and trellis

The latter, psy-trellis, is disabled by default and is reserved as experimental; your mileage may vary.
Default subme is raised to 6 so that psy RD is on by default.
parent 95ed2720
......@@ -116,8 +116,10 @@ void x264_param_default( x264_param_t *param )
| X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_SPATIAL;
param->analyse.i_me_method = X264_ME_HEX;
param->analyse.f_psy_rd = 1.0;
param->analyse.f_psy_trellis = 0;
param->analyse.i_me_range = 16;
param->analyse.i_subpel_refine = 5;
param->analyse.i_subpel_refine = 6;
param->analyse.b_chroma_me = 1;
param->analyse.i_mv_range_thread = -1;
param->analyse.i_mv_range = -1; // set from level_idc
......@@ -470,6 +472,21 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
p->analyse.i_mv_range_thread = atoi(value);
OPT2("subme", "subq")
p->analyse.i_subpel_refine = atoi(value);
OPT("psy-rd")
{
if( 2 == sscanf( value, "%f:%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) ||
2 == sscanf( value, "%f,%f", &p->analyse.f_psy_rd, &p->analyse.f_psy_trellis ) )
{ }
else if( sscanf( value, "%f", &p->analyse.f_psy_rd ) )
{
p->analyse.f_psy_trellis = 0;
}
else
{
p->analyse.f_psy_rd = 0;
p->analyse.f_psy_trellis = 0;
}
}
OPT("bime")
p->analyse.b_bidir_me = atobool(value);
OPT("chroma-me")
......@@ -824,6 +841,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
s += sprintf( s, " analyse=%#x:%#x", p->analyse.intra, p->analyse.inter );
s += sprintf( s, " me=%s", x264_motion_est_names[ p->analyse.i_me_method ] );
s += sprintf( s, " subme=%d", p->analyse.i_subpel_refine );
s += sprintf( s, " psy_rd=%.1f:%.1f", p->analyse.f_psy_rd, p->analyse.f_psy_trellis );
s += sprintf( s, " brdo=%d", p->analyse.b_bframe_rdo );
s += sprintf( s, " mixed_ref=%d", p->analyse.b_mixed_references );
s += sprintf( s, " me_range=%d", p->analyse.i_me_range );
......
......@@ -381,6 +381,8 @@ struct x264_t
int b_chroma_me;
int b_trellis;
int b_noise_reduction;
int i_psy_rd; /* Psy RD strength--fixed point value*/
int i_psy_trellis; /* Psy trellis strength--fixed point value*/
int b_interlaced;
......@@ -462,6 +464,16 @@ struct x264_t
DECLARE_ALIGNED_16( int16_t i8x8_dct_buf[3][64] );
DECLARE_ALIGNED_16( int16_t i4x4_dct_buf[15][16] );
/* Psy trellis DCT data */
DECLARE_ALIGNED_16( int16_t fenc_dct8[4][64] );
DECLARE_ALIGNED_16( int16_t fenc_dct4[16][16] );
/* Psy RD SATD scores */
int fenc_satd[4][4];
int fenc_satd_sum;
int fenc_sa8d[2][2];
int fenc_sa8d_sum;
/* pointer over mb of the frame to be compressed */
uint8_t *p_fenc[3];
......
......@@ -41,6 +41,17 @@ static const uint16_t x264_dct8_weight_tab[64] = {
};
#undef W
#define W(i) (i==0 ? FIX8(1.76777) :\
i==1 ? FIX8(1.11803) :\
i==2 ? FIX8(0.70711) :0)
static const uint16_t x264_dct4_weight_tab[16] = {
W(0), W(1), W(0), W(1),
W(1), W(2), W(1), W(2),
W(0), W(1), W(0), W(1),
W(1), W(2), W(1), W(2)
};
#undef W
/* inverse squared */
#define W(i) (i==0 ? FIX8(3.125) :\
i==1 ? FIX8(1.25) :\
......
......@@ -467,6 +467,58 @@ static void predict_4x4_mode_available( unsigned int i_neighbour,
}
}
/* For trellis=2, we need to do this for both sizes of DCT, for trellis=1 we only need to use it on the chosen mode. */
static void inline x264_psy_trellis_init( x264_t *h, int do_both_dct )
{
DECLARE_ALIGNED_16( int16_t dct8x8[4][8][8] );
DECLARE_ALIGNED_16( int16_t dct4x4[16][4][4] );
DECLARE_ALIGNED_16( uint8_t zero[16*FDEC_STRIDE] ) = {0};
int i;
if( do_both_dct || h->mb.b_transform_8x8 )
{
h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[0], zero );
for( i = 0; i < 4; i++ )
h->zigzagf.scan_8x8( h->mb.pic.fenc_dct8[i], dct8x8[i] );
}
if( do_both_dct || !h->mb.b_transform_8x8)
{
h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], zero );
for( i = 0; i < 16; i++ )
h->zigzagf.scan_4x4( h->mb.pic.fenc_dct4[i], dct4x4[i] );
}
}
/* Pre-calculate fenc satd scores for psy RD, minus DC coefficients */
static inline void x264_mb_cache_fenc_satd( x264_t *h )
{
DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};
uint8_t *fenc;
int x, y, satd_sum = 0, sa8d_sum = 0;
if( h->param.analyse.i_trellis == 2 && h->mb.i_psy_trellis )
x264_psy_trellis_init( h, h->param.analyse.b_transform_8x8 );
if( !h->mb.i_psy_rd )
return;
for( y = 0; y < 4; y++ )
for( x = 0; x < 4; x++ )
{
fenc = h->mb.pic.p_fenc[0]+x*4+y*4*FENC_STRIDE;
h->mb.pic.fenc_satd[y][x] = h->pixf.satd[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )
- (h->pixf.sad[PIXEL_4x4]( zero, 0, fenc, FENC_STRIDE )>>1);
satd_sum += h->mb.pic.fenc_satd[y][x];
}
for( y = 0; y < 2; y++ )
for( x = 0; x < 2; x++ )
{
fenc = h->mb.pic.p_fenc[0]+x*8+y*8*FENC_STRIDE;
h->mb.pic.fenc_sa8d[y][x] = h->pixf.sa8d[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )
- (h->pixf.sad[PIXEL_8x8]( zero, 0, fenc, FENC_STRIDE )>>2);
sa8d_sum += h->mb.pic.fenc_sa8d[y][x];
}
h->mb.pic.fenc_satd_sum = satd_sum;
h->mb.pic.fenc_sa8d_sum = sa8d_sum;
}
static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
{
int i;
......@@ -1017,12 +1069,15 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a )
assert( a->l0.me16x16.mv[1] <= h->mb.mv_max_spel[1] || h->param.i_threads == 1 );
h->mb.i_type = P_L0;
if( a->b_mbrd && a->l0.me16x16.i_ref == 0
&& *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
if( a->b_mbrd )
{
h->mb.i_partition = D_16x16;
x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
x264_mb_cache_fenc_satd( h );
if( a->l0.me16x16.i_ref == 0 && *(uint32_t*)a->l0.me16x16.mv == *(uint32_t*)h->mb.cache.pskip_mv )
{
h->mb.i_partition = D_16x16;
x264_macroblock_cache_mv_ptr( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv );
a->l0.i_rd16x16 = x264_rd_cost_mb( h, a->i_lambda2 );
}
}
}
......@@ -1907,7 +1962,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd )
static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter )
{
int thresh = i_satd_inter * 17/16;
int thresh = i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16;
if( a->b_direct_available && a->i_rd16x16direct == COST_MAX )
{
......@@ -2066,6 +2121,8 @@ void x264_macroblock_analyse( x264_t *h )
/*--------------------------- Do the analysis ---------------------------*/
if( h->sh.i_type == SLICE_TYPE_I )
{
if( analysis.b_mbrd )
x264_mb_cache_fenc_satd( h );
x264_mb_analyse_intra( h, &analysis, COST_MAX );
if( analysis.b_mbrd )
x264_intra_rd( h, &analysis, COST_MAX );
......@@ -2344,6 +2401,9 @@ void x264_macroblock_analyse( x264_t *h )
int i_bskip_cost = COST_MAX;
int b_skip = 0;
if( analysis.b_mbrd )
x264_mb_cache_fenc_satd( h );
h->mb.i_type = B_SKIP;
if( h->mb.b_direct_auto_write )
{
......@@ -2589,6 +2649,8 @@ void x264_macroblock_analyse( x264_t *h )
h->mb.b_trellis = h->param.analyse.i_trellis;
h->mb.b_noise_reduction = !!h->param.analyse.i_noise_reduction;
if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 )
x264_psy_trellis_init( h, 0 );
if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction )
h->mb.i_skip_intra = 0;
}
......
......@@ -410,6 +410,7 @@ static int x264_validate_parameters( x264_t *h )
h->param.analyse.i_trellis = 0;
h->param.analyse.b_fast_pskip = 0;
h->param.analyse.i_noise_reduction = 0;
h->param.analyse.f_psy_rd = 0;
}
if( h->param.rc.i_rc_method == X264_RC_CQP )
{
......@@ -488,6 +489,26 @@ static int x264_validate_parameters( x264_t *h )
if( !h->param.b_cabac )
h->param.analyse.i_trellis = 0;
h->param.analyse.i_trellis = x264_clip3( h->param.analyse.i_trellis, 0, 2 );
if( !h->param.analyse.i_trellis )
h->param.analyse.f_psy_trellis = 0;
h->param.analyse.f_psy_rd = x264_clip3f( h->param.analyse.f_psy_rd, 0, 10 );
h->param.analyse.f_psy_trellis = x264_clip3f( h->param.analyse.f_psy_trellis, 0, 10 );
if( h->param.analyse.i_subpel_refine < 6 )
h->param.analyse.f_psy_rd = 0;
h->mb.i_psy_rd = FIX8( h->param.analyse.f_psy_rd );
/* Psy RDO increases overall quantizers to improve the quality of luma--this indirectly hurts chroma quality */
/* so we lower the chroma QP offset to compensate */
/* This can be triggered repeatedly on multiple calls to parameter_validate, but since encoding
* uses the pps chroma qp offset not the param chroma qp offset, this is not a problem. */
if( h->mb.i_psy_rd )
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_rd < 0.25 ? 1 : 2;
h->mb.i_psy_trellis = FIX8( h->param.analyse.f_psy_trellis / 4 );
/* Psy trellis has a similar effect. */
if( h->mb.i_psy_trellis )
h->param.analyse.i_chroma_qp_offset -= h->param.analyse.f_psy_trellis < 0.25 ? 1 : 2;
else
h->mb.i_psy_trellis = 0;
h->param.analyse.i_chroma_qp_offset = x264_clip3(h->param.analyse.i_chroma_qp_offset, -12, 12);
h->param.rc.i_aq_mode = x264_clip3( h->param.rc.i_aq_mode, 0, 1 );
if( h->param.rc.f_aq_strength <= 0 )
h->param.rc.i_aq_mode = 0;
......
......@@ -79,20 +79,20 @@ static int x264_mb_decimate_score( int16_t *dct, int i_max )
return i_score;
}
static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra )
static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
if( h->mb.b_trellis )
x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra );
x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra, idx );
else
h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra )
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra, int idx )
{
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
if( h->mb.b_trellis )
x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra );
x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra, idx );
else
h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}
......@@ -111,7 +111,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1 );
x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1, idx );
if( array_non_zero( dct4x4 ) )
{
......@@ -135,7 +135,7 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
x264_quant_8x8( h, dct8x8, i_qp, 1 );
x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
......@@ -174,7 +174,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
dct4x4[i][0][0] = 0;
/* quant/scan/dequant */
x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1 );
x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1, i );
h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
......@@ -453,7 +453,7 @@ void x264_macroblock_encode( x264_t *h )
{
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
x264_quant_8x8( h, dct8x8[idx], i_qp, 0 );
x264_quant_8x8( h, dct8x8[idx], i_qp, 0, idx );
h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
......@@ -498,7 +498,7 @@ void x264_macroblock_encode( x264_t *h )
if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0 );
x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0, idx );
h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
......@@ -777,7 +777,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
x264_quant_8x8( h, dct8x8, i_qp, 0 );
x264_quant_8x8( h, dct8x8, i_qp, 0, i8 );
h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
if( b_decimate && !h->mb.b_trellis )
......@@ -797,7 +797,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
for( i4 = 0; i4 < 4; i4++ )
x264_quant_4x4( h, dct4x4[i4], i_qp, DCT_LUMA_4x4, 0 );
x264_quant_4x4( h, dct4x4[i4], i_qp, DCT_LUMA_4x4, 0, i8*4+i4 );
for( i4 = 0; i4 < 4; i4++ )
h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
......
......@@ -50,9 +50,9 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
void x264_cabac_mb_skip( x264_t *h, int b_skip );
void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
int i_qp, int i_ctxBlockCat, int b_intra );
int i_qp, int i_ctxBlockCat, int b_intra, int idx );
void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
int i_qp, int b_intra );
int i_qp, int b_intra, int idx );
void x264_noise_reduction_update( x264_t *h );
......
......@@ -51,20 +51,78 @@ static uint16_t cabac_prefix_size[15][128];
#define COPY_CABAC h->mc.memcpy_aligned( &cabac_tmp.f8_bits_encoded, &h->cabac.f8_bits_encoded, \
sizeof(x264_cabac_t) - offsetof(x264_cabac_t,f8_bits_encoded) )
static int ssd_mb( x264_t *h )
#define ADD_ABS_SATD(satdtype, pixel)\
satd += abs((h->pixf.satdtype[pixel]( zero, 0, fdec, FDEC_STRIDE ) - dc_coef)\
- sum_##satdtype( h, pixel, x, y ));
/* Sum the cached SATDs to avoid repeating them. */
static inline int sum_satd( x264_t *h, int pixel, int x, int y )
{
int satd = 0;
int min_x = x>>2;
int min_y = y>>2;
int max_x = (x>>2) + (x264_pixel_size[pixel].w>>2);
int max_y = (y>>2) + (x264_pixel_size[pixel].h>>2);
if( pixel == PIXEL_16x16 )
return h->mb.pic.fenc_satd_sum;
for( y = min_y; y < max_y; y++ )
for( x = min_x; x < max_x; x++ )
satd += h->mb.pic.fenc_satd[y][x];
return satd;
}
static inline int sum_sa8d( x264_t *h, int pixel, int x, int y )
{
int sa8d = 0;
int min_x = x>>3;
int min_y = y>>3;
int max_x = (x>>3) + (x264_pixel_size[pixel].w>>3);
int max_y = (y>>3) + (x264_pixel_size[pixel].h>>3);
if( pixel == PIXEL_16x16 )
return h->mb.pic.fenc_sa8d_sum;
for( y = min_y; y < max_y; y++ )
for( x = min_x; x < max_x; x++ )
sa8d += h->mb.pic.fenc_sa8d[y][x];
return sa8d;
}
/* Psy RD distortion metric: SSD plus "Absolute Difference of Complexities" */
/* SATD and SA8D are used to measure block complexity. */
/* The difference between SATD and SA8D scores are both used to avoid bias from the DCT size. Using SATD */
/* only, for example, results in overusage of 8x8dct, while the opposite occurs when using SA8D. */
/* FIXME: Is there a better metric than averaged SATD/SA8D difference for complexity difference? */
/* Hadamard transform is recursive, so a SATD+SA8D can be done faster by taking advantage of this fact. */
/* This optimization can also be used in non-RD transform decision. */
static inline int ssd_plane( x264_t *h, int size, int p, int x, int y )
{
return h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], FENC_STRIDE,
h->mb.pic.p_fdec[0], FDEC_STRIDE )
+ h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], FENC_STRIDE,
h->mb.pic.p_fdec[1], FDEC_STRIDE )
+ h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], FENC_STRIDE,
h->mb.pic.p_fdec[2], FDEC_STRIDE );
DECLARE_ALIGNED_16(uint8_t zero[16]) = {0};
int satd = 0;
uint8_t *fdec = h->mb.pic.p_fdec[p] + x + y*FDEC_STRIDE;
uint8_t *fenc = h->mb.pic.p_fenc[p] + x + y*FENC_STRIDE;
if( p == 0 && h->mb.i_psy_rd )
{
int dc_coef = h->pixf.sad[size](zero, 0, fdec, FDEC_STRIDE) >> 1;
ADD_ABS_SATD(satd, size);
/* If the plane is smaller than 8x8, we can't do an SA8D; this probably isn't a big problem. */
if(size <= PIXEL_8x8)
{
dc_coef >>= 1;
ADD_ABS_SATD(sa8d, size);
satd >>= 1;
}
satd = (satd * h->mb.i_psy_rd * x264_lambda_tab[h->mb.i_qp] + 128) >> 8;
}
return h->pixf.ssd[size](fenc, FENC_STRIDE, fdec, FDEC_STRIDE) + satd;
}
static int ssd_plane( x264_t *h, int size, int p, int x, int y )
static inline int ssd_mb( x264_t *h )
{
return h->pixf.ssd[size]( h->mb.pic.p_fenc[p] + x+y*FENC_STRIDE, FENC_STRIDE,
h->mb.pic.p_fdec[p] + x+y*FDEC_STRIDE, FDEC_STRIDE );
return ssd_plane(h, PIXEL_16x16, 0, 0, 0)
+ ssd_plane(h, PIXEL_8x8, 1, 0, 0)
+ ssd_plane(h, PIXEL_8x8, 2, 0, 0);
}
static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
......@@ -269,7 +327,7 @@ static const int lambda2_tab[2][52] = {
};
typedef struct {
uint64_t score;
int64_t score;
int level_idx; // index into level_tree[]
uint8_t cabac_state[10]; //just the contexts relevant to coding abs_level_m1
} trellis_node_t;
......@@ -298,7 +356,7 @@ typedef struct {
static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
const uint16_t *quant_mf, const int *unquant_mf,
const int *coef_weight, const uint8_t *zigzag,
int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs )
int i_ctxBlockCat, int i_lambda2, int b_ac, int i_coefs, int idx )
{
int abs_coefs[64], signs[64];
trellis_node_t nodes[2][8];
......@@ -430,8 +488,20 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
// that are better left coded, especially at QP > 40.
for( abs_level = q; abs_level >= q-1; abs_level-- )
{
int d = i_coef - ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
uint64_t ssd = (int64_t)d*d * coef_weight[i];
int unquant_abs_level = ((unquant_mf[zigzag[i]] * abs_level + 128) >> 8);
int d = i_coef - unquant_abs_level;
int64_t ssd;
/* Psy trellis: bias in favor of higher AC coefficients in the reconstructed frame. */
if( h->mb.i_psy_trellis && i )
{
int orig_coef = (i_coefs == 64) ? h->mb.pic.fenc_dct8[idx][i] : h->mb.pic.fenc_dct4[idx][i];
int predicted_coef = orig_coef - i_coef * signs[i];
int psy_value = h->mb.i_psy_trellis * abs(predicted_coef + unquant_abs_level * signs[i]);
int psy_weight = (i_coefs == 64) ? x264_dct8_weight_tab[zigzag[i]] : x264_dct4_weight_tab[zigzag[i]];
ssd = (int64_t)d*d * coef_weight[i] - psy_weight * psy_value;
}
else
ssd = (int64_t)d*d * coef_weight[i];
for( j = 0; j < 8; j++ )
{
......@@ -495,24 +565,24 @@ static inline void quant_trellis_cabac( x264_t *h, int16_t *dct,
void x264_quant_4x4_trellis( x264_t *h, int16_t dct[4][4], int i_quant_cat,
int i_qp, int i_ctxBlockCat, int b_intra )
int i_qp, int i_ctxBlockCat, int b_intra, int idx )
{
int b_ac = (i_ctxBlockCat == DCT_LUMA_AC);
quant_trellis_cabac( h, (int16_t*)dct,
h->quant4_mf[i_quant_cat][i_qp], h->unquant4_mf[i_quant_cat][i_qp],
x264_dct4_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan4[h->mb.b_interlaced],
i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16 );
i_ctxBlockCat, lambda2_tab[b_intra][i_qp], b_ac, 16, idx );
}
void x264_quant_8x8_trellis( x264_t *h, int16_t dct[8][8], int i_quant_cat,
int i_qp, int b_intra )
int i_qp, int b_intra, int idx )
{
quant_trellis_cabac( h, (int16_t*)dct,
h->quant8_mf[i_quant_cat][i_qp], h->unquant8_mf[i_quant_cat][i_qp],
x264_dct8_weight2_zigzag[h->mb.b_interlaced],
x264_zigzag_scan8[h->mb.b_interlaced],
DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64 );
DCT_LUMA_8x8, lambda2_tab[b_intra][i_qp], 0, 64, idx );
}
......@@ -251,6 +251,10 @@ static void Help( x264_param_t *defaults, int b_longhelp )
H0( " -m, --subme <integer> Subpixel motion estimation and partition\n"
" decision quality: 1=fast, 7=best. [%d]\n", defaults->analyse.i_subpel_refine );
H0( " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" );
H0( " --psy-rd Strength of psychovisual optimization [\"%.1f:%.1f\"]\n"
" #1: RDO (requires subme>=6)\n"
" #2: Trellis (requires trellis, experimental)\n",
defaults->analyse.f_psy_rd,defaults->analyse.f_psy_trellis );
H0( " --mixed-refs Decide references on a per partition basis\n" );
H1( " --no-chroma-me Ignore chroma in motion estimation\n" );
H1( " --bime Jointly optimize both MVs in B-frames\n" );
......@@ -420,6 +424,7 @@ static int Parse( int argc, char **argv,
{ "mvrange", required_argument, NULL, 0 },
{ "mvrange-thread", required_argument, NULL, 0 },
{ "subme", required_argument, NULL, 'm' },
{ "psy-rd", required_argument, NULL, 0 },
{ "b-rdo", no_argument, NULL, 0 },
{ "mixed-refs", no_argument, NULL, 0 },
{ "no-chroma-me", no_argument, NULL, 0 },
......
......@@ -35,7 +35,7 @@
#include <stdarg.h>
#define X264_BUILD 63
#define X264_BUILD 64
/* x264_t:
* opaque handler for encoder */
......@@ -241,6 +241,8 @@ typedef struct x264_param_t
int b_fast_pskip; /* early SKIP detection on P-frames */
int b_dct_decimate; /* transform coefficient thresholding on P-frames */
int i_noise_reduction; /* adaptive pseudo-deadzone */
float f_psy_rd; /* Psy RD strength */
float f_psy_trellis; /* Psy trellis strength */
/* the deadzone size that will be used in luma quantization */
int i_luma_deadzone[2]; /* {inter, intra} */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment