Commit f2a12915 authored by Fiona Glaser's avatar Fiona Glaser

Faster CABAC residual encoding

6% faster block_residual_write_cabac in RD mode.
parent a7831e46
......@@ -24,6 +24,10 @@
#include "common/common.h"
#include "macroblock.h"
#ifndef RDO_SKIP_BS
#define RDO_SKIP_BS 0
#endif
static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
int ctx0, int ctx1, int ctx2, int ctx3, int ctx4, int ctx5 )
{
......@@ -304,7 +308,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
/* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
if( h->mb.i_type == I_16x16 && !h->mb.cbp[h->mb.i_mb_xy] )
{
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
h->mb.i_qp = h->mb.i_last_qp;
#endif
i_dqp = 0;
......@@ -335,7 +339,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h, x264_cabac_t *cb )
x264_cabac_encode_decision_noup( cb, 60 + ctx, 0 );
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
void x264_cabac_mb_skip( x264_t *h, int b_skip )
{
int ctx = (h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ))
......@@ -454,7 +458,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
else if( i_abs < 9 )
{
x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
if( i_abs > 4 )
{
for( i = 1; i < 4; i++ )
......@@ -474,7 +478,7 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis
else
{
x264_cabac_encode_decision( cb, ctxbase + ctx, 1 );
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
for( i = 1; i < 4; i++ )
x264_cabac_encode_decision( cb, ctxbase + ctxes[i], 1 );
cb->f8_bits_encoded += cabac_size_5ones[cb->state[ctxbase+6]];
......@@ -557,68 +561,67 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
int i_nza = 0;
int i_nzb = 0;
if( i_cat == DCT_LUMA_DC )
{
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
i_nza = h->mb.cbp[i_mba_xy] & 0x100;
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
}
}
else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 )
{
if( i_idx & ~10 ) // block_idx_x > 0
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( i_idx & ~5 ) // block_idx_y > 0
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_top_xy;
/* no need to test for skip/pcm */
if( i_mba_xy >= 0 )
i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
if( i_mbb_xy >= 0 )
i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
}
else if( i_cat == DCT_CHROMA_DC )
{
/* no need to test skip/pcm */
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx);
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx);
}
}
else if( i_cat == DCT_CHROMA_AC )
switch( i_cat )
{
if( i_idx & 1 )
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( i_idx & 2 )
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_top_xy;
/* no need to test skip/pcm */
if( i_mba_xy >= 0 )
i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
if( i_mbb_xy >= 0 )
i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
case DCT_LUMA_DC:
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
i_nza = h->mb.cbp[i_mba_xy] & 0x100;
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
}
break;
case DCT_LUMA_AC:
case DCT_LUMA_4x4:
if( i_idx & ~10 ) // block_idx_x > 0
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( i_idx & ~5 ) // block_idx_y > 0
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_top_xy;
/* no need to test for skip/pcm */
if( i_mba_xy >= 0 )
i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
if( i_mbb_xy >= 0 )
i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
break;
case DCT_CHROMA_DC:
/* no need to test skip/pcm */
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
i_nza = h->mb.cbp[i_mba_xy] & (0x200 << i_idx);
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
i_nzb = h->mb.cbp[i_mbb_xy] & (0x200 << i_idx);
}
break;
case DCT_CHROMA_AC:
if( i_idx & 1 )
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( i_idx & 2 )
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_top_xy;
/* no need to test skip/pcm */
if( i_mba_xy >= 0 )
i_nza = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 1];
if( i_mbb_xy >= 0 )
i_nzb = h->mb.cache.non_zero_count[x264_scan8[i_idx] - 8];
}
if( IS_INTRA( h->mb.i_type ) )
......@@ -659,8 +662,6 @@ static const uint8_t last_coeff_flag_offset_8x8[63] = {
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
static const uint8_t identity[16] =
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
// 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
......@@ -680,6 +681,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
const int i_ctx_sig = significant_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_last = last_coeff_flag_offset[h->mb.b_interlaced][i_ctxBlockCat];
const int i_ctx_level = coeff_abs_level_m1_offset[i_ctxBlockCat];
const uint8_t *significant_coeff_flag_offset = significant_coeff_flag_offset_8x8[h->mb.b_interlaced];
int i_coeff_abs_m1[64];
int UNUSED i_coeff_sign[64];
......@@ -689,9 +691,6 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
int node_ctx = 0;
int i, j;
const uint8_t *significant_coeff_flag_offset;
const uint8_t *last_coeff_flag_offset;
/* yes this is always aligned, and l[-1] exists in the cases where it's used (ac) */
for( j = i_count - 4; j >= -1; j -= 4 )
if( *(uint64_t*)(l+j) )
......@@ -710,38 +709,37 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
}
}
significant_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8)
? significant_coeff_flag_offset_8x8[h->mb.b_interlaced]
: identity;
last_coeff_flag_offset = (i_ctxBlockCat == DCT_LUMA_8x8)
? last_coeff_flag_offset_8x8 : identity;
for( i = j; i < j+4; i++)
if( l[i] )
i_last = i;
i_sigmap_size = X264_MIN( i_last+1, i_count-1 );
for( i = 0; i < i_sigmap_size; i++ )
{
if( l[i] )
{
i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;
#ifndef RDO_SKIP_BS
i_coeff_sign[i_coeff] = l[i] < 0;
#endif
i_coeff++;
x264_cabac_encode_decision( cb, i_ctx_sig + significant_coeff_flag_offset[i], 1 );
x264_cabac_encode_decision( cb, i_ctx_last + last_coeff_flag_offset[i], i == i_last );
}
else
x264_cabac_encode_decision( cb, i_ctx_sig + significant_coeff_flag_offset[i], 0 );
#define WRITE_SIGMAP( l8x8 )\
for( i = 0; i < i_sigmap_size; i++ )\
{\
if( l[i] )\
{\
i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;\
if( !RDO_SKIP_BS )\
i_coeff_sign[i_coeff] = l[i] < 0;\
i_coeff++;\
x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 1 );\
x264_cabac_encode_decision( cb, i_ctx_last + (l8x8 ? last_coeff_flag_offset_8x8[i] : i), i == i_last );\
}\
else\
x264_cabac_encode_decision( cb, i_ctx_sig + (l8x8 ? significant_coeff_flag_offset[i] : i), 0 );\
}
if( i_ctxBlockCat == DCT_LUMA_8x8 )
WRITE_SIGMAP( 1 )
else
WRITE_SIGMAP( 0 )
if( i == i_last )
{
i_coeff_abs_m1[i_coeff] = abs(l[i]) - 1;
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
i_coeff_sign[i_coeff] = l[i] < 0;
#endif
i_coeff++;
......@@ -760,7 +758,7 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
{
x264_cabac_encode_decision( cb, ctx, 1 );
ctx = coeff_abs_levelgt1_ctx[node_ctx] + i_ctx_level;
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
cb->f8_bits_encoded += cabac_size_unary[i_prefix][cb->state[ctx]];
cb->state[ctx] = cabac_transition_unary[i_prefix][cb->state[ctx]];
#else
......@@ -778,12 +776,12 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
{
x264_cabac_encode_decision( cb, ctx, 0 );
node_ctx = coeff_abs_level_transition[0][node_ctx];
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
x264_cabac_encode_bypass( cb, 0 ); // sign
#endif
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
x264_cabac_encode_bypass( cb, i_coeff_sign[i_coeff] );
#endif
} while( i_coeff > 0 );
......@@ -797,7 +795,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
int i_list;
int i;
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
const int i_mb_pos_start = x264_cabac_pos( cb );
int i_mb_pos_tex;
#endif
......@@ -805,7 +803,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
/* Write the MB type */
x264_cabac_mb_type( h, cb );
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
i_mb_pos_tex = x264_cabac_pos( cb );
......@@ -980,7 +978,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
}
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
i_mb_pos_tex = x264_cabac_pos( cb );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
#endif
......@@ -1036,12 +1034,12 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
}
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
h->stat.frame.i_tex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
#endif
}
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
/*****************************************************************************
* RD only; doesn't generate a valid bitstream
* doesn't write cbp or chroma dc (I don't know how much this matters)
......
......@@ -24,6 +24,10 @@
#include "common/common.h"
#include "macroblock.h"
#ifndef RDO_SKIP_BS
#define RDO_SKIP_BS 0
#endif
static const uint8_t intra4x4_cbp_to_golomb[48]=
{
3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
......@@ -168,7 +172,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
}
else
{
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
/* Weight highly against overflows. */
s->i_bits_encoded += 1000000;
#else
......@@ -212,7 +216,7 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
&& !array_non_zero(h->dct.luma16x16_dc) )
{
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
h->mb.i_qp = h->mb.i_last_qp;
#endif
i_dqp = 0;
......@@ -300,7 +304,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
int i_mb_i_offset;
int i;
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
#endif
......@@ -327,7 +331,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
bs_write1( s, h->mb.b_interlaced );
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
if( i_mb_type == I_PCM)
{
bs_write_ue( s, i_mb_i_offset + 25 );
......@@ -579,7 +583,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
return;
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
#endif
......@@ -628,12 +632,12 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
}
}
#ifndef RDO_SKIP_BS
#if !RDO_SKIP_BS
h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
#endif
}
#ifdef RDO_SKIP_BS
#if RDO_SKIP_BS
/*****************************************************************************
* RD only; doesn't generate a valid bitstream
* doesn't write cbp or chroma dc (I don't know how much this matters)
......
......@@ -22,7 +22,7 @@
* instead of writing the bitstream.
* TODO: use these for fast 1st pass too. */
#define RDO_SKIP_BS
#define RDO_SKIP_BS 1
/* Transition and size tables for abs<9 MVD and residual coding */
/* Consist of i_prefix-2 1s, one zero, and a bypass sign bit */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment