Commit a93903c6 authored by Fiona Glaser's avatar Fiona Glaser
Browse files

Hardcode the bs_t in cavlc.c; passing it around is a waste

Saves ~1.5kb of code size, very slight speed boost.
parent 91c0fd94
......@@ -61,8 +61,9 @@ static const uint8_t sub_mb_type_b_to_golomb[13]=
/****************************************************************************
* block_residual_write_cavlc:
****************************************************************************/
static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_suffix_length, int level )
static inline int block_residual_write_cavlc_escape( x264_t *h, int i_suffix_length, int level )
{
bs_t *s = &h->out.bs;
static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
int i_level_prefix = 15;
int mask = level >> 15;
......@@ -112,8 +113,9 @@ static inline int block_residual_write_cavlc_escape( x264_t *h, bs_t *s, int i_s
return i_suffix_length;
}
static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, int16_t *l, int nC )
static int block_residual_write_cavlc( x264_t *h, int i_ctxBlockCat, int16_t *l, int nC )
{
bs_t *s = &h->out.bs;
static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
static const int count_cat[5] = {16, 15, 16, 4, 15};
x264_run_level_t runlevel;
......@@ -157,7 +159,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
}
else
i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
for( i = i_trailing+1; i < i_total; i++ )
{
val = runlevel.level[i] + LEVEL_TABLE_SIZE/2;
......@@ -167,7 +169,7 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
}
else
i_suffix_length = block_residual_write_cavlc_escape( h, s, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
i_suffix_length = block_residual_write_cavlc_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
}
}
......@@ -191,18 +193,19 @@ static int block_residual_write_cavlc( x264_t *h, bs_t *s, int i_ctxBlockCat, in
static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
#define block_residual_write_cavlc(h,s,cat,idx,l)\
#define block_residual_write_cavlc(h,cat,idx,l)\
{\
int nC = cat == DCT_CHROMA_DC ? 4 : ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? 0 : idx )];\
uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\
if( !*nnz )\
bs_write_vlc( s, x264_coeff0_token[nC] );\
bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
else\
*nnz = block_residual_write_cavlc(h,s,cat,l,nC);\
*nnz = block_residual_write_cavlc(h,cat,l,nC);\
}
static void cavlc_qp_delta( x264_t *h, bs_t *s )
static void cavlc_qp_delta( x264_t *h )
{
bs_t *s = &h->out.bs;
int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
/* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely flat background area */
......@@ -225,39 +228,40 @@ static void cavlc_qp_delta( x264_t *h, bs_t *s )
bs_write_se( s, i_dqp );
}
static void cavlc_mb_mvd( x264_t *h, bs_t *s, int i_list, int idx, int width )
static void cavlc_mb_mvd( x264_t *h, int i_list, int idx, int width )
{
bs_t *s = &h->out.bs;
ALIGNED_4( int16_t mvp[2] );
x264_mb_predict_mv( h, i_list, idx, width, mvp );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
}
static inline void cavlc_mb8x8_mvd( x264_t *h, bs_t *s, int i )
static inline void cavlc_mb8x8_mvd( x264_t *h, int i )
{
switch( h->mb.i_sub_partition[i] )
{
case D_L0_8x8:
cavlc_mb_mvd( h, s, 0, 4*i, 2 );
cavlc_mb_mvd( h, 0, 4*i, 2 );
break;
case D_L0_8x4:
cavlc_mb_mvd( h, s, 0, 4*i+0, 2 );
cavlc_mb_mvd( h, s, 0, 4*i+2, 2 );
cavlc_mb_mvd( h, 0, 4*i+0, 2 );
cavlc_mb_mvd( h, 0, 4*i+2, 2 );
break;
case D_L0_4x8:
cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
cavlc_mb_mvd( h, 0, 4*i+0, 1 );
cavlc_mb_mvd( h, 0, 4*i+1, 1 );
break;
case D_L0_4x4:
cavlc_mb_mvd( h, s, 0, 4*i+0, 1 );
cavlc_mb_mvd( h, s, 0, 4*i+1, 1 );
cavlc_mb_mvd( h, s, 0, 4*i+2, 1 );
cavlc_mb_mvd( h, s, 0, 4*i+3, 1 );
cavlc_mb_mvd( h, 0, 4*i+0, 1 );
cavlc_mb_mvd( h, 0, 4*i+1, 1 );
cavlc_mb_mvd( h, 0, 4*i+2, 1 );
cavlc_mb_mvd( h, 0, 4*i+3, 1 );
break;
}
}
static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8start, int i8end )
static inline void x264_macroblock_luma_write_cavlc( x264_t *h, int i8start, int i8end )
{
int i8, i4;
if( h->mb.b_transform_8x8 )
......@@ -271,20 +275,23 @@ static inline void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s, int i8s
for( i8 = i8start; i8 <= i8end; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cavlc( h, s, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
block_residual_write_cavlc( h, DCT_LUMA_4x4, i4+i8*4, h->dct.luma4x4[i4+i8*4] );
}
/*****************************************************************************
* x264_macroblock_write:
*****************************************************************************/
void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
void x264_macroblock_write_cavlc( x264_t *h )
{
bs_t *s = &h->out.bs;
const int i_mb_type = h->mb.i_type;
static const uint8_t i_offsets[3] = {5,23,0};
int i_mb_i_offset = i_offsets[h->sh.i_type];
int i;
#if !RDO_SKIP_BS
#if RDO_SKIP_BS
s->i_bits_encoded = 0;
#else
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
#endif
......@@ -365,7 +372,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
if( h->mb.pic.i_fref[0] > 1 )
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
cavlc_mb_mvd( h, s, 0, 0, 4 );
cavlc_mb_mvd( h, 0, 0, 4 );
}
else if( h->mb.i_partition == D_16x8 )
{
......@@ -375,8 +382,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
}
cavlc_mb_mvd( h, s, 0, 0, 4 );
cavlc_mb_mvd( h, s, 0, 8, 4 );
cavlc_mb_mvd( h, 0, 0, 4 );
cavlc_mb_mvd( h, 0, 8, 4 );
}
else if( h->mb.i_partition == D_8x16 )
{
......@@ -386,8 +393,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
}
cavlc_mb_mvd( h, s, 0, 0, 2 );
cavlc_mb_mvd( h, s, 0, 4, 2 );
cavlc_mb_mvd( h, 0, 0, 2 );
cavlc_mb_mvd( h, 0, 4, 2 );
}
}
else if( i_mb_type == P_8x8 )
......@@ -422,7 +429,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
}
for( i = 0; i < 4; i++ )
cavlc_mb8x8_mvd( h, s, i );
cavlc_mb8x8_mvd( h, i );
}
else if( i_mb_type == B_8x8 )
{
......@@ -445,10 +452,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
/* mvd */
for( i = 0; i < 4; i++ )
if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
cavlc_mb_mvd( h, s, 0, 4*i, 2 );
cavlc_mb_mvd( h, 0, 4*i, 2 );
for( i = 0; i < 4; i++ )
if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
cavlc_mb_mvd( h, s, 1, 4*i, 2 );
cavlc_mb_mvd( h, 1, 4*i, 2 );
}
else if( i_mb_type != B_DIRECT )
{
......@@ -463,8 +470,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
{
if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
}
else
{
......@@ -474,17 +481,17 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
if( h->mb.i_partition == D_16x8 )
{
if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 4 );
if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 8, 4 );
if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 4 );
if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 8, 4 );
if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 4 );
if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 8, 4 );
if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 4 );
if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 8, 4 );
}
else //if( h->mb.i_partition == D_8x16 )
{
if( b_list[0][0] ) cavlc_mb_mvd( h, s, 0, 0, 2 );
if( b_list[0][1] ) cavlc_mb_mvd( h, s, 0, 4, 2 );
if( b_list[1][0] ) cavlc_mb_mvd( h, s, 1, 0, 2 );
if( b_list[1][1] ) cavlc_mb_mvd( h, s, 1, 4, 2 );
if( b_list[0][0] ) cavlc_mb_mvd( h, 0, 0, 2 );
if( b_list[0][1] ) cavlc_mb_mvd( h, 0, 4, 2 );
if( b_list[1][0] ) cavlc_mb_mvd( h, 1, 0, 2 );
if( b_list[1][1] ) cavlc_mb_mvd( h, 1, 4, 2 );
}
}
}
......@@ -509,29 +516,29 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
/* write residual */
if( i_mb_type == I_16x16 )
{
cavlc_qp_delta( h, s );
cavlc_qp_delta( h );
/* DC Luma */
block_residual_write_cavlc( h, s, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
block_residual_write_cavlc( h, DCT_LUMA_DC, 24 , h->dct.luma16x16_dc );
/* AC Luma */
if( h->mb.i_cbp_luma )
for( i = 0; i < 16; i++ )
block_residual_write_cavlc( h, s, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
block_residual_write_cavlc( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
}
else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
{
cavlc_qp_delta( h, s );
x264_macroblock_luma_write_cavlc( h, s, 0, 3 );
cavlc_qp_delta( h );
x264_macroblock_luma_write_cavlc( h, 0, 3 );
}
if( h->mb.i_cbp_chroma )
{
/* Chroma DC residual present */
block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
block_residual_write_cavlc( h, s, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
for( i = 16; i < 24; i++ )
block_residual_write_cavlc( h, s, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
}
#if !RDO_SKIP_BS
......@@ -549,36 +556,36 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
*****************************************************************************/
static int x264_partition_size_cavlc( x264_t *h, int i8, int i_pixel )
{
bs_t *s = &h->out.bs;
const int i_mb_type = h->mb.i_type;
int b_8x16 = h->mb.i_partition == D_8x16;
int j;
h->out.bs.i_bits_encoded = 0;
if( i_mb_type == P_8x8 )
{
cavlc_mb8x8_mvd( h, &h->out.bs, i8 );
bs_write_ue( &h->out.bs, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
cavlc_mb8x8_mvd( h, i8 );
bs_write_ue( s, sub_mb_type_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
}
else if( i_mb_type == P_L0 )
cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
{
if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 4>>b_8x16 );
if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 4>>b_8x16 );
if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mb_mvd( h, 0, 4*i8, 4>>b_8x16 );
if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mb_mvd( h, 1, 4*i8, 4>>b_8x16 );
}
else //if( i_mb_type == B_8x8 )
{
if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
cavlc_mb_mvd( h, &h->out.bs, 0, 4*i8, 2 );
cavlc_mb_mvd( h, 0, 4*i8, 2 );
if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
cavlc_mb_mvd( h, &h->out.bs, 1, 4*i8, 2 );
cavlc_mb_mvd( h, 1, 4*i8, 2 );
}
for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
{
x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
x264_macroblock_luma_write_cavlc( h, i8, i8 );
block_residual_write_cavlc( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
block_residual_write_cavlc( h, DCT_CHROMA_AC, 20+i8, h->dct.luma4x4[20+i8]+1 );
i8 += x264_pixel_size[i_pixel].h >> 3;
}
......@@ -589,12 +596,12 @@ static int x264_subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
{
int b_8x4 = i_pixel == PIXEL_8x4;
h->out.bs.i_bits_encoded = 0;
cavlc_mb_mvd( h, &h->out.bs, 0, i4, 1+b_8x4 );
block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
cavlc_mb_mvd( h, 0, i4, 1+b_8x4 );
block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
if( i_pixel != PIXEL_4x4 )
{
i4 += 2-b_8x4;
block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
}
return h->out.bs.i_bits_encoded;
......@@ -612,14 +619,14 @@ static int x264_partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
{
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
bs_write_ue( &h->out.bs, intra4x4_cbp_to_golomb[( h->mb.i_cbp_chroma << 4 )|h->mb.i_cbp_luma] );
x264_macroblock_luma_write_cavlc( h, &h->out.bs, i8, i8 );
x264_macroblock_luma_write_cavlc( h, i8, i8 );
return h->out.bs.i_bits_encoded;
}
static int x264_partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
{
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
block_residual_write_cavlc( h, &h->out.bs, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
block_residual_write_cavlc( h, DCT_LUMA_4x4, i4, h->dct.luma4x4[i4] );
return h->out.bs.i_bits_encoded;
}
......@@ -628,14 +635,14 @@ static int x264_i8x8_chroma_size_cavlc( x264_t *h )
h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
if( h->mb.i_cbp_chroma )
{
block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
block_residual_write_cavlc( h, DCT_CHROMA_DC, 25, h->dct.chroma_dc[0] );
block_residual_write_cavlc( h, DCT_CHROMA_DC, 26, h->dct.chroma_dc[1] );
if( h->mb.i_cbp_chroma == 2 )
{
int i;
for( i = 16; i < 24; i++ )
block_residual_write_cavlc( h, &h->out.bs, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
block_residual_write_cavlc( h, DCT_CHROMA_AC, i, h->dct.luma4x4[i]+1 );
}
}
return h->out.bs.i_bits_encoded;
......
......@@ -1741,7 +1741,7 @@ static int x264_slice_write( x264_t *h )
bs_write_ue( &h->out.bs, i_skip ); /* skip run */
i_skip = 0;
}
x264_macroblock_write_cavlc( h, &h->out.bs );
x264_macroblock_write_cavlc( h );
}
}
......
......@@ -45,7 +45,7 @@ void x264_predict_lossless_16x16( x264_t *h, int i_mode );
void x264_macroblock_encode ( x264_t *h );
void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
void x264_macroblock_write_cavlc ( x264_t *h, bs_t *s );
void x264_macroblock_write_cavlc ( x264_t *h );
void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
......
......@@ -159,10 +159,8 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
}
else
{
bs_t bs_tmp = h->out.bs;
bs_tmp.i_bits_encoded = 0;
x264_macroblock_size_cavlc( h, &bs_tmp );
i_bits = ( bs_tmp.i_bits_encoded * i_lambda2 + 128 ) >> 8;
x264_macroblock_size_cavlc( h );
i_bits = ( h->out.bs.i_bits_encoded * i_lambda2 + 128 ) >> 8;
}
h->mb.b_transform_8x8 = b_transform_bak;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment