Commit b3076aef authored by Loren Merritt's avatar Loren Merritt

cabac: use bytestream instead of bitstream.

35% faster cabac, 20% faster overall lossless, ~1% faster overall at normal bitrates.


git-svn-id: svn://svn.videolan.org/x264/trunk@651 df754926-b1dd-0310-bc7b-ec298dee348c
parent 8300d334
......@@ -739,6 +739,13 @@ static const uint8_t x264_cabac_transition[2][128] =
113,114,115,116,117,118,119,120,121,122,123,124,125,126,126,127,
}};
static const uint8_t renorm_shift[64]= {
6,5,4,4,3,3,3,3,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
static const uint8_t x264_cabac_probability[128] =
{
FIX8(0.9812), FIX8(0.9802), FIX8(0.9792), FIX8(0.9781),
......@@ -835,124 +842,61 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
}
}
/*****************************************************************************
*
*****************************************************************************/
void x264_cabac_decode_init( x264_cabac_t *cb, bs_t *s )
{
cb->i_range = 0x01fe;
cb->i_low = bs_read( s, 9 );
cb->s = s;
}
static inline void x264_cabac_decode_renorm( x264_cabac_t *cb )
{
while( cb->i_range < 0x0100 )
{
cb->i_range <<= 1;
cb->i_low = ( cb->i_low << 1 )|bs_read( cb->s, 1 );
}
}
int x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx )
{
int i_state = cb->state[i_ctx];
int i_range_lps = x264_cabac_range_lps[i_state][(cb->i_range>>6)&0x03];
int val = (i_state >> 6);
cb->i_range -= i_range_lps;
if( cb->i_low >= cb->i_range )
{
val ^= 1;
cb->i_low -= cb->i_range;
cb->i_range= i_range_lps;
}
cb->state[i_ctx] = x264_cabac_transition[val][i_state];
x264_cabac_decode_renorm( cb );
return val;
}
int x264_cabac_decode_bypass( x264_cabac_t *cb )
{
cb->i_low = (cb->i_low << 1)|bs_read( cb->s, 1 );
if( cb->i_low >= cb->i_range )
{
cb->i_low -= cb->i_range;
return 1;
}
return 0;
}
int x264_cabac_decode_terminal( x264_cabac_t *cb )
{
if( cb->i_low >= cb->i_range - 2 )
{
return 1;
}
cb->i_range -= 2;
x264_cabac_decode_renorm( cb );
return 0;
}
/*****************************************************************************
*
*****************************************************************************/
void x264_cabac_encode_init( x264_cabac_t *cb, bs_t *s )
void x264_cabac_encode_init( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end )
{
cb->i_low = 0;
cb->i_range = 0x01FE;
cb->i_bits_outstanding = 0;
cb->s = s;
s->i_left++; // the first bit will be shifted away and not written
cb->i_queue = -1; // the first bit will be shifted away and not written
cb->i_bytes_outstanding = 0;
cb->p_start = p_data;
cb->p = p_data;
cb->p_end = p_end;
}
static inline void x264_cabac_putbit( x264_cabac_t *cb, int b )
static inline void x264_cabac_putbyte( x264_cabac_t *cb )
{
bs_write1( cb->s, b );
if( cb->i_bits_outstanding > 0 )
if( cb->i_queue >= 8 )
{
while( cb->i_bits_outstanding > 32 )
{
bs_write1( cb->s, 1-b );
cb->i_bits_outstanding--;
}
bs_write( cb->s, cb->i_bits_outstanding, (1-b)*(~0) );
cb->i_bits_outstanding = 0;
}
}
int out = cb->i_low >> (cb->i_queue+2);
cb->i_low &= (4<<cb->i_queue)-1;
cb->i_queue -= 8;
static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
{
/* RenormE */
while( cb->i_range < 0x100 )
{
if( cb->i_low < 0x100 )
{
x264_cabac_putbit( cb, 0 );
}
else if( cb->i_low >= 0x200 )
if( (out & 0xff) == 0xff )
{
cb->i_low -= 0x200;
x264_cabac_putbit( cb, 1 );
cb->i_bytes_outstanding++;
}
else
{
cb->i_low -= 0x100;
cb->i_bits_outstanding++;
if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end )
return;
int carry = out & 0x100;
if( carry )
{
// this can't happen on the first byte (buffer underrun),
// because that would correspond to a probability > 1.
// this can't carry beyond the one byte, because any 0xff bytes
// are in bytes_outstanding and thus not written yet.
cb->p[-1]++;
}
while( cb->i_bytes_outstanding > 0 )
{
*(cb->p++) = carry ? 0 : 0xff;
cb->i_bytes_outstanding--;
}
*(cb->p++) = out;
}
cb->i_range <<= 1;
cb->i_low <<= 1;
}
}
static inline void x264_cabac_encode_renorm( x264_cabac_t *cb )
{
int shift = renorm_shift[cb->i_range>>3];
cb->i_range <<= shift;
cb->i_low <<= shift;
cb->i_queue += shift;
x264_cabac_putbyte( cb );
}
void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx, int b )
{
int i_state = cb->state[i_ctx];
......@@ -975,17 +919,8 @@ void x264_cabac_encode_bypass( x264_cabac_t *cb, int b )
{
cb->i_low <<= 1;
cb->i_low += (((int32_t)b<<31)>>31) & cb->i_range;
if( cb->i_low >= 0x400 || cb->i_low < 0x200 )
{
x264_cabac_putbit( cb, cb->i_low >> 10 );
cb->i_low &= 0x3ff;
}
else
{
cb->i_low -= 0x200;
cb->i_bits_outstanding++;
}
cb->i_queue += 1;
x264_cabac_putbyte( cb );
}
void x264_cabac_encode_terminal( x264_cabac_t *cb, int b )
......@@ -994,19 +929,34 @@ void x264_cabac_encode_terminal( x264_cabac_t *cb, int b )
if( b )
{
cb->i_low += cb->i_range;
cb->i_range = 2;
cb->i_range = 2<<7;
cb->i_low <<= 7;
cb->i_queue += 7;
x264_cabac_putbyte( cb );
}
else
{
x264_cabac_encode_renorm( cb );
}
x264_cabac_encode_renorm( cb );
}
void x264_cabac_encode_flush( x264_cabac_t *cb )
{
x264_cabac_putbit( cb, (cb->i_low >> 9)&0x01 );
bs_write1( cb->s, (cb->i_low >> 8)&0x01 );
cb->i_low |= 0x80;
cb->i_low <<= 10;
cb->i_queue += 10;
x264_cabac_putbyte( cb );
x264_cabac_putbyte( cb );
cb->i_queue = 0;
if( cb->p + cb->i_bytes_outstanding + 1 >= cb->p_end )
return; //FIXME throw an error instead of silently truncating the frame
/* check that */
bs_write1( cb->s, 0x01 );
bs_align_0( cb->s );
while( cb->i_bytes_outstanding > 0 )
{
*(cb->p++) = 0xff;
cb->i_bytes_outstanding--;
}
}
/*****************************************************************************
......
......@@ -34,23 +34,21 @@ typedef struct
int i_range;
/* bit stream */
int i_bits_outstanding;
int i_queue;
int i_bytes_outstanding;
int f8_bits_encoded; // only if using x264_cabac_size_decision()
bs_t *s;
uint8_t *p_start;
uint8_t *p;
uint8_t *p_end;
} x264_cabac_t;
/* init the contexts given i_slice_type, the quantif and the model */
void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model );
/* decoder only (unused): */
void x264_cabac_decode_init ( x264_cabac_t *cb, bs_t *s );
int x264_cabac_decode_decision( x264_cabac_t *cb, int i_ctx_idx );
int x264_cabac_decode_bypass ( x264_cabac_t *cb );
int x264_cabac_decode_terminal( x264_cabac_t *cb );
/* encoder only: */
void x264_cabac_encode_init ( x264_cabac_t *cb, bs_t *s );
void x264_cabac_encode_init ( x264_cabac_t *cb, uint8_t *p_data, uint8_t *p_end );
void x264_cabac_encode_decision( x264_cabac_t *cb, int i_ctx_idx, int b );
void x264_cabac_encode_bypass( x264_cabac_t *cb, int b );
void x264_cabac_encode_terminal( x264_cabac_t *cb, int b );
......@@ -63,7 +61,7 @@ int x264_cabac_size_decision_noup( uint8_t *state, int b );
static inline int x264_cabac_pos( x264_cabac_t *cb )
{
return bs_pos( cb->s ) + cb->i_bits_outstanding;
return (cb->p - cb->p_start + cb->i_bytes_outstanding) * 8 + cb->i_queue;
}
#endif
......@@ -831,30 +831,27 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
#ifdef RDO_SKIP_BS
cb->f8_bits_encoded += (384*8) << 8;
#else
bs_t *s = cb->s;
bs_align_0( s ); /* not sure */
if( cb->p + 385 >= cb->p_end )
return; //FIXME throw an error
/* Luma */
for( i = 0; i < 16*16; i++ )
for( i = 0; i < 16; i++ )
{
const int x = 16 * h->mb.i_mb_x + (i % 16);
const int y = 16 * h->mb.i_mb_y + (i / 16);
bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] );
memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 );
cb->p += 16;
}
/* Cb */
for( i = 0; i < 8*8; i++ )
for( i = 0; i < 8; i++ )
{
const int x = 8 * h->mb.i_mb_x + (i % 8);
const int y = 8 * h->mb.i_mb_y + (i / 8);
bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] );
memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 );
cb->p += 8;
}
/* Cr */
for( i = 0; i < 8*8; i++ )
for( i = 0; i < 8; i++ )
{
const int x = 8 * h->mb.i_mb_x + (i % 8);
const int y = 8 * h->mb.i_mb_y + (i / 8);
bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 );
cb->p += 8;
}
x264_cabac_encode_init( cb, s );
x264_cabac_encode_init( cb, cb->p, cb->p_end );
#endif
return;
}
......
......@@ -1043,7 +1043,7 @@ static void x264_slice_write( x264_t *h )
/* init cabac */
x264_cabac_context_init( &h->cabac, h->sh.i_type, h->sh.i_qp, h->sh.i_cabac_init_idc );
x264_cabac_encode_init ( &h->cabac, &h->out.bs );
x264_cabac_encode_init ( &h->cabac, h->out.bs.p, h->out.bs.p_end );
}
h->mb.i_last_qp = h->sh.i_qp;
h->mb.i_last_dqp = 0;
......@@ -1166,7 +1166,7 @@ static void x264_slice_write( x264_t *h )
if( h->param.b_cabac )
{
x264_cabac_encode_flush( &h->cabac );
h->out.bs.p = h->cabac.p;
}
else
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment