Commit b659ca6f authored by Loren Merritt's avatar Loren Merritt

during RDO, skip the bitstream writing and just calculate the number of bits

that would be used. speedup: cabac +4-8%, cavlc +2-4%.



git-svn-id: svn://svn.videolan.org/x264/trunk@330 df754926-b1dd-0310-bc7b-ec298dee348c
parent 48c2e935
......@@ -33,6 +33,7 @@ typedef struct bs_s
uint8_t *p_end;
int i_left; /* i_count number of available bits */
int i_bits_encoded; /* RD only */
} bs_t;
static inline void bs_init( bs_t *s, void *p_data, int i_data )
......
......@@ -716,7 +716,6 @@ static const int x264_transition_mps[64] =
49,50,51,52,53,54,55,56,57,58,59,60,61,62,62,63,
};
#define FIX8(f) ((int)(f*(1<<8)))
static const int x264_cabac_probability[128] =
{
FIX8(0.9812), FIX8(0.9802), FIX8(0.9792), FIX8(0.9781),
......@@ -789,8 +788,6 @@ static const int x264_cabac_entropy[128] =
FIX8(5.5114), FIX8(5.5866), FIX8(5.6618), FIX8(5.7370)
};
#undef FIX8
/*****************************************************************************
*
......@@ -1113,3 +1110,23 @@ void x264_cabac_encode_flush( x264_cabac_t *cb )
bs_align_0( cb->s );
}
/*****************************************************************************
*
*****************************************************************************/
void x264_cabac_size_decision( x264_cabac_t *cb, int i_ctx, int b )
{
int i_state = cb->ctxstate[i_ctx].i_state;
int i_mps = cb->ctxstate[i_ctx].i_mps;
if( b != i_mps )
{
cb->ctxstate[i_ctx].i_mps ^= ( i_state == 0 );
cb->ctxstate[i_ctx].i_state = x264_transition_lps[i_state];
cb->f8_bits_encoded += x264_cabac_entropy[ 64 + i_state ];
}
else
{
cb->ctxstate[i_ctx].i_state = x264_transition_mps[i_state];
cb->f8_bits_encoded += x264_cabac_entropy[ 63 - i_state ];
}
}
......@@ -49,6 +49,7 @@ typedef struct
/* bit stream */
int b_first_bit;
int i_bits_outstanding;
int f8_bits_encoded; // only if using x264_cabac_size_decision()
bs_t *s;
} x264_cabac_t;
......@@ -72,6 +73,8 @@ void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx_idx, int b );
void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
void x264_cabac_encode_terminal( x264_cabac_t *cb, int b );
void x264_cabac_encode_flush( x264_cabac_t *cb );
/* don't write the bitstream, just calculate cost: */
void x264_cabac_size_decision( x264_cabac_t *cb, int i_ctx, int b );
static inline int x264_cabac_pos( x264_cabac_t *cb )
{
......
......@@ -58,6 +58,7 @@
#define X264_MAX3(a,b,c) X264_MAX((a),X264_MAX((b),(c)))
#define X264_MIN4(a,b,c,d) X264_MIN((a),X264_MIN3((b),(c),(d)))
#define X264_MAX4(a,b,c,d) X264_MAX((a),X264_MAX3((b),(c),(d)))
#define FIX8(f) ((int)(f*(1<<8)+.5))
/****************************************************************************
* Generals functions
......
......@@ -30,6 +30,9 @@
static inline void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits, int val )
{
#ifdef RDO_SKIP_BS
cb->f8_bits_encoded += ( bs_size_ue( val + (1<<exp_bits)-1 ) - exp_bits ) << 8;
#else
int k;
for( k = exp_bits; val >= (1<<k); k++ )
{
......@@ -39,6 +42,7 @@ static inline void x264_cabac_encode_ue_bypass( x264_cabac_t *cb, int exp_bits,
x264_cabac_encode_bypass( cb, 0 );
while( k-- )
x264_cabac_encode_bypass( cb, (val >> k)&0x01 );
#endif
}
static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_mb_type,
......@@ -52,6 +56,7 @@ static inline void x264_cabac_mb_type_intra( x264_t *h, x264_cabac_t *cb, int i_
{
x264_cabac_encode_decision( cb, ctx0, 1 );
x264_cabac_encode_terminal( cb, 1 );
x264_cabac_encode_flush( cb );
}
else
{
......@@ -851,21 +856,25 @@ static void block_residual_write_cabac( x264_t *h, x264_cabac_t *cb, int i_ctxBl
void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
{
bs_t *s = cb->s;
const int i_mb_type = h->mb.i_type;
const int i_mb_pos_start = x264_cabac_pos( cb );
int i_mb_pos_tex = 0;
const int b_update_stats = (cb == &h->cabac);
int i_list;
int i;
#ifndef RDO_SKIP_BS
const int i_mb_pos_start = x264_cabac_pos( cb );
int i_mb_pos_tex;
#endif
/* Write the MB type */
x264_cabac_mb_type( h, cb );
/* PCM special block type UNTESTED */
if( i_mb_type == I_PCM )
{
#ifdef RDO_SKIP_BS
cb->f8_bits_encoded += (384*8) << 8;
#else
bs_t *s = cb->s;
bs_align_0( s ); /* not sure */
/* Luma */
for( i = 0; i < 16*16; i++ )
......@@ -889,6 +898,7 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
}
x264_cabac_encode_init( cb, s );
#endif
return;
}
......@@ -1034,11 +1044,10 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
}
}
if( b_update_stats )
{
i_mb_pos_tex = x264_cabac_pos( cb );
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
}
#ifndef RDO_SKIP_BS
i_mb_pos_tex = x264_cabac_pos( cb );
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
#endif
if( i_mb_type != I_16x16 )
{
......@@ -1091,12 +1100,11 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
}
}
if( b_update_stats )
{
if( IS_INTRA( i_mb_type ) )
h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
else
h->stat.frame.i_ptex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
}
#ifndef RDO_SKIP_BS
if( IS_INTRA( i_mb_type ) )
h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
else
h->stat.frame.i_ptex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
#endif
}
......@@ -321,7 +321,7 @@ static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list )
}
}
void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
static void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
{
int i8, i4, i;
if( h->mb.b_transform_8x8 )
......@@ -350,11 +350,14 @@ void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
{
const int i_mb_type = h->mb.i_type;
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
int i_mb_i_offset;
int i;
#ifndef RDO_SKIP_BS
const int i_mb_pos_start = bs_pos( s );
int i_mb_pos_tex;
#endif
switch( h->sh.i_type )
{
case SLICE_TYPE_I:
......@@ -380,6 +383,9 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
/* Untested */
bs_write_ue( s, i_mb_i_offset + 25 );
#ifdef RDO_SKIP_BS
s->i_bits_encoded += 384*8;
#else
bs_align_0( s );
/* Luma */
for( i = 0; i < 16*16; i++ )
......@@ -402,6 +408,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
const int y = 8 * h->mb.i_mb_y + (i / 8);
bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
}
#endif
return;
}
else if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
......@@ -646,8 +653,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
return;
}
#ifndef RDO_SKIP_BS
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
#endif
/* Coded block patern */
if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
......@@ -693,8 +702,10 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
block_residual_write_cavlc( h, s, 16 + i, h->dct.block[16+i].residual_ac, 15 );
}
#ifndef RDO_SKIP_BS
if( IS_INTRA( i_mb_type ) )
h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex;
else
h->stat.frame.i_ptex_bits += bs_pos(s) - i_mb_pos_tex;
#endif
}
......@@ -20,6 +20,33 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
/* duplicate all the writer functions, just calculating bit cost
* instead of writing the bitstream.
* TODO: use these for fast 1st pass too. */
#define RDO_SKIP_BS
/* CAVLC: produces exactly the same bit count as a normal encode */
/* this probably still leaves some unnecessary computations */
#define bs_write1(s,v) ((s)->i_bits_encoded += 1)
#define bs_write(s,n,v) ((s)->i_bits_encoded += (n))
#define bs_write_ue(s,v) ((s)->i_bits_encoded += bs_size_ue(v))
#define bs_write_se(s,v) ((s)->i_bits_encoded += bs_size_se(v))
#define bs_write_te(s,v,l) ((s)->i_bits_encoded += bs_size_te(v,l))
#define x264_macroblock_write_cavlc x264_macroblock_size_cavlc
#include "cavlc.c"
/* CABAC: not exactly the same. x264_cabac_size_decision() keeps track of
* fractional bits, but only finite precision. */
#define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v)
#define x264_cabac_encode_terminal(c,v) x264_cabac_size_decision(c,276,v)
#define x264_cabac_encode_bypass(c,v) ((c)->f8_bits_encoded += 256)
#define x264_cabac_encode_flush(c)
#define x264_macroblock_write_cabac x264_macroblock_size_cabac
#define x264_cabac_mb_skip x264_cabac_mb_size_skip_unused
#include "cabac.c"
static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
{
// backup mb_type because x264_macroblock_encode may change it to skip
......@@ -39,24 +66,24 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
if( IS_SKIP( h->mb.i_type ) )
{
i_bits = 1;
i_bits = 1 * i_lambda2;
}
else if( h->param.b_cabac )
{
x264_cabac_t cabac_tmp = h->cabac;
bs_t bs_tmp = h->out.bs;
cabac_tmp.s = &bs_tmp;
x264_macroblock_write_cabac( h, &cabac_tmp );
i_bits = x264_cabac_pos( &cabac_tmp ) - x264_cabac_pos( &h->cabac );
cabac_tmp.f8_bits_encoded = 0;
x264_macroblock_size_cabac( h, &cabac_tmp );
i_bits = ( cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
}
else
{
bs_t bs_tmp = h->out.bs;
x264_macroblock_write_cavlc( h, &bs_tmp );
i_bits = bs_pos( &bs_tmp ) - bs_pos( &h->out.bs );
bs_tmp.i_bits_encoded = 0;
x264_macroblock_size_cavlc( h, &bs_tmp );
i_bits = bs_tmp.i_bits_encoded * i_lambda2;
}
h->mb.i_type = i_type_bak;
h->mb.b_transform_8x8 = b_transform_bak;
return i_ssd + i_bits * i_lambda2;
return i_ssd + i_bits;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment