Commit 8de7dbbe authored by Fiona Glaser's avatar Fiona Glaser

Activate trellis in p8x8 qpel RD

Also clean up macroblock.c with some refactoring
Note that this change significantly reduces subme7+trellis2 performance, but improves quality.
Issue originally reported by Alex_W.
parent 59de6938
...@@ -79,7 +79,25 @@ static int x264_mb_decimate_score( int16_t *dct, int i_max ) ...@@ -79,7 +79,25 @@ static int x264_mb_decimate_score( int16_t *dct, int i_max )
return i_score; return i_score;
} }
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) static ALWAYS_INLINE void x264_quant_4x4( x264_t *h, int16_t dct[4][4], int i_qp, int i_ctxBlockCat, int b_intra )
{
int i_quant_cat = b_intra ? CQM_4IY : CQM_4PY;
if( h->mb.b_trellis )
x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, i_ctxBlockCat, b_intra );
else
h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
}
static ALWAYS_INLINE void x264_quant_8x8( x264_t *h, int16_t dct[8][8], int i_qp, int b_intra )
{
int i_quant_cat = b_intra ? CQM_8IY : CQM_8PY;
if( h->mb.b_trellis )
x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, b_intra );
else
h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
}
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp )
{ {
uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]]; uint8_t *p_src = &h->mb.pic.p_fenc[0][block_idx_xy_fenc[idx]];
uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]]; uint8_t *p_dst = &h->mb.pic.p_fdec[0][block_idx_xy_fdec[idx]];
...@@ -93,15 +111,12 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) ...@@ -93,15 +111,12 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
h->dctf.sub4x4_dct( dct4x4, p_src, p_dst ); h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
if( h->mb.b_trellis ) x264_quant_4x4( h, dct4x4, i_qp, DCT_LUMA_4x4, 1 );
x264_quant_4x4_trellis( h, dct4x4, CQM_4IY, i_qscale, DCT_LUMA_4x4, 1 );
else
h->quantf.quant_4x4( dct4x4, h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
if( array_non_zero( dct4x4 ) ) if( array_non_zero( dct4x4 ) )
{ {
h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 ); h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4 );
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale ); h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qp );
/* output samples to fdec */ /* output samples to fdec */
h->dctf.add4x4_idct( p_dst, dct4x4 ); h->dctf.add4x4_idct( p_dst, dct4x4 );
...@@ -110,7 +125,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ) ...@@ -110,7 +125,7 @@ void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx])); memset( h->dct.luma4x4[idx], 0, sizeof(h->dct.luma4x4[idx]));
} }
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ) void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
{ {
int x = 8 * (idx&1); int x = 8 * (idx&1);
int y = 8 * (idx>>1); int y = 8 * (idx>>1);
...@@ -120,17 +135,14 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ) ...@@ -120,17 +135,14 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst ); h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
if( h->mb.b_trellis ) x264_quant_8x8( h, dct8x8, i_qp, 1 );
x264_quant_8x8_trellis( h, dct8x8, CQM_8IY, i_qscale, 1 );
else
h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8IY][i_qscale], h->quant8_bias[CQM_8IY][i_qscale] );
h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 ); h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8 );
h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale ); h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qp );
h->dctf.add8x8_idct8( p_dst, dct8x8 ); h->dctf.add8x8_idct8( p_dst, dct8x8 );
} }
static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) static void x264_mb_encode_i16x16( x264_t *h, int i_qp )
{ {
uint8_t *p_src = h->mb.pic.p_fenc[0]; uint8_t *p_src = h->mb.pic.p_fenc[0];
uint8_t *p_dst = h->mb.pic.p_fdec[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0];
...@@ -162,22 +174,19 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) ...@@ -162,22 +174,19 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
dct4x4[i][0][0] = 0; dct4x4[i][0][0] = 0;
/* quant/scan/dequant */ /* quant/scan/dequant */
if( h->mb.b_trellis ) x264_quant_4x4( h, dct4x4[i], i_qp, DCT_LUMA_AC, 1 );
x264_quant_4x4_trellis( h, dct4x4[i], CQM_4IY, i_qscale, DCT_LUMA_AC, 1 );
else
h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IY][i_qscale], h->quant4_bias[CQM_4IY][i_qscale] );
h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] ); h->zigzagf.scan_4x4( h->dct.luma4x4[i], dct4x4[i] );
h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qscale ); h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IY], i_qp );
} }
h->dctf.dct4x4dc( dct_dc4x4 ); h->dctf.dct4x4dc( dct_dc4x4 );
h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qscale][0]>>1, h->quant4_bias[CQM_4IY][i_qscale][0]<<1 ); h->quantf.quant_4x4_dc( dct_dc4x4, h->quant4_mf[CQM_4IY][i_qp][0]>>1, h->quant4_bias[CQM_4IY][i_qp][0]<<1 );
h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 ); h->zigzagf.scan_4x4( h->dct.luma16x16_dc, dct_dc4x4 );
/* output samples to fdec */ /* output samples to fdec */
h->dctf.idct4x4dc( dct_dc4x4 ); h->dctf.idct4x4dc( dct_dc4x4 );
x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qscale ); /* XXX not inversed */ x264_mb_dequant_4x4_dc( dct_dc4x4, h->dequant4_mf[CQM_4IY], i_qp ); /* XXX not inversed */
/* calculate dct coeffs */ /* calculate dct coeffs */
for( i = 0; i < 16; i++ ) for( i = 0; i < 16; i++ )
...@@ -189,7 +198,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale ) ...@@ -189,7 +198,7 @@ static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
h->dctf.add16x16_idct( p_dst, dct4x4 ); h->dctf.add16x16_idct( p_dst, dct4x4 );
} }
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
{ {
int i, ch; int i, ch;
int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate); int b_decimate = b_inter && (h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate);
...@@ -225,22 +234,20 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) ...@@ -225,22 +234,20 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
dct4x4[i][0][0] = 0; dct4x4[i][0][0] = 0;
/* no trellis; it doesn't seem to help chroma noticeably */ /* no trellis; it doesn't seem to help chroma noticeably */
h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qscale], h->quant4_bias[CQM_4IC+b_inter][i_qscale] ); h->quantf.quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC+b_inter][i_qp], h->quant4_bias[CQM_4IC+b_inter][i_qp] );
h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] ); h->zigzagf.scan_4x4( h->dct.luma4x4[16+i+ch*4], dct4x4[i] );
if( b_decimate ) if( b_decimate )
{
i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 ); i_decimate_score += x264_mb_decimate_score( h->dct.luma4x4[16+i+ch*4]+1, 15 );
}
} }
h->dctf.dct2x2dc( dct2x2 ); h->dctf.dct2x2dc( dct2x2 );
h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qscale][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qscale][0]<<1 ); h->quantf.quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC+b_inter][i_qp][0]>>1, h->quant4_bias[CQM_4IC+b_inter][i_qp][0]<<1 );
zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 ); zigzag_scan_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
/* output samples to fdec */ /* output samples to fdec */
h->dctf.idct2x2dc( dct2x2 ); h->dctf.idct2x2dc( dct2x2 );
x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); /* XXX not inversed */ x264_mb_dequant_2x2_dc( dct2x2, h->dequant4_mf[CQM_4IC + b_inter], i_qp ); /* XXX not inversed */
if( b_decimate && i_decimate_score < 7 ) if( b_decimate && i_decimate_score < 7 )
{ {
...@@ -253,7 +260,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ) ...@@ -253,7 +260,7 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
else else
{ {
for( i = 0; i < 4; i++ ) for( i = 0; i < 4; i++ )
h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale ); h->quantf.dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qp );
} }
dct4x4[0][0][0] = dct2x2[0][0]; dct4x4[0][0][0] = dct2x2[0][0];
dct4x4[1][0][0] = dct2x2[0][1]; dct4x4[1][0][0] = dct2x2[0][1];
...@@ -446,10 +453,7 @@ void x264_macroblock_encode( x264_t *h ) ...@@ -446,10 +453,7 @@ void x264_macroblock_encode( x264_t *h )
{ {
if( h->mb.b_noise_reduction ) if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 ); h->quantf.denoise_dct( *dct8x8[idx], h->nr_residual_sum[1], h->nr_offset[1], 64 );
if( h->mb.b_trellis ) x264_quant_8x8( h, dct8x8[idx], i_qp, 0 );
x264_quant_8x8_trellis( h, dct8x8[idx], CQM_8PY, i_qp, 0 );
else
h->quantf.quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] );
h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] ); h->zigzagf.scan_8x8( h->dct.luma8x8[idx], dct8x8[idx] );
...@@ -494,10 +498,7 @@ void x264_macroblock_encode( x264_t *h ) ...@@ -494,10 +498,7 @@ void x264_macroblock_encode( x264_t *h )
if( h->mb.b_noise_reduction ) if( h->mb.b_noise_reduction )
h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 ); h->quantf.denoise_dct( *dct4x4[idx], h->nr_residual_sum[0], h->nr_offset[0], 16 );
if( h->mb.b_trellis ) x264_quant_4x4( h, dct4x4[idx], i_qp, DCT_LUMA_4x4, 0 );
x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, DCT_LUMA_4x4, 0 );
else
h->quantf.quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] );
h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] ); h->zigzagf.scan_4x4( h->dct.luma4x4[idx], dct4x4[idx] );
...@@ -776,10 +777,10 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) ...@@ -776,10 +777,10 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
{ {
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] ); DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec ); h->dctf.sub8x8_dct8( dct8x8, p_fenc, p_fdec );
h->quantf.quant_8x8( dct8x8, h->quant8_mf[CQM_8PY][i_qp], h->quant8_bias[CQM_8PY][i_qp] ); x264_quant_8x8( h, dct8x8, i_qp, 0 );
h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 ); h->zigzagf.scan_8x8( h->dct.luma8x8[i8], dct8x8 );
if( b_decimate ) if( b_decimate && !h->mb.b_trellis )
nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 ); nnz8x8 = 4 <= x264_mb_decimate_score( h->dct.luma8x8[i8], 64 );
else else
nnz8x8 = array_non_zero( dct8x8 ); nnz8x8 = array_non_zero( dct8x8 );
...@@ -796,7 +797,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) ...@@ -796,7 +797,8 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] ); DECLARE_ALIGNED_16( int16_t dct4x4[4][4][4] );
h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec ); h->dctf.sub8x8_dct( dct4x4, p_fenc, p_fdec );
for( i4 = 0; i4 < 4; i4++ ) for( i4 = 0; i4 < 4; i4++ )
h->quantf.quant_4x4( dct4x4[i4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); x264_quant_4x4( h, dct4x4[i4], i_qp, DCT_LUMA_4x4, 0 );
for( i4 = 0; i4 < 4; i4++ ) for( i4 = 0; i4 < 4; i4++ )
h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] ); h->zigzagf.scan_4x4( h->dct.luma4x4[i8*4+i4], dct4x4[i4] );
......
...@@ -43,9 +43,9 @@ void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb ); ...@@ -43,9 +43,9 @@ void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s ); void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s );
void x264_macroblock_encode_p8x8( x264_t *h, int i8 ); void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale ); void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qp );
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale ); void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp );
void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale ); void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp );
void x264_cabac_mb_skip( x264_t *h, int b_skip ); void x264_cabac_mb_skip( x264_t *h, int b_skip );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment