Commit ab90da74 authored by Fiona Glaser's avatar Fiona Glaser Committed by Loren Merritt

faster bs_write

parent c61a1df1
/*****************************************************************************
* bs.h :
*****************************************************************************
* Copyright (C) 2003 Laurent Aimar <fenrir@via.ecp.fr>
* Copyright (C) 2003-2008 x264 project
*
* Authors: Loren Merritt <lorenm@u.washington.edu>
* Fiona Glaser <fiona@x264.com>
* Laurent Aimar <fenrir@via.ecp.fr>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
......@@ -27,103 +31,125 @@ typedef struct bs_s
uint8_t *p;
uint8_t *p_end;
intptr_t cur_bits;
int i_left; /* i_count number of available bits */
int i_bits_encoded; /* RD only */
} bs_t;
static inline void bs_init( bs_t *s, void *p_data, int i_data )
{
s->p_start = p_data;
s->p = p_data;
s->p_end = s->p + i_data;
s->i_left = 8;
int offset = ((intptr_t)p_data & (WORD_SIZE-1));
s->p = s->p_start = (uint8_t*)p_data - offset;
s->p_end = (uint8_t*)p_data + i_data;
s->i_left = offset ? 8*offset : (WORD_SIZE*8);
s->cur_bits = endian_fix( *(intptr_t*)s->p );
}
static inline int bs_pos( bs_t *s )
{
return( 8 * ( s->p - s->p_start ) + 8 - s->i_left );
return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
}
/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32/64-bit aligned. */
static inline void bs_flush( bs_t *s )
{
*(intptr_t*)s->p = endian_fix( s->cur_bits << s->i_left );
s->p += WORD_SIZE - s->i_left / 8;
s->i_left = WORD_SIZE*8;
}
static inline void bs_write( bs_t *s, int i_count, uint32_t i_bits )
{
while( i_count > 0 )
if( WORD_SIZE == 8 )
{
s->cur_bits = (s->cur_bits << i_count) | i_bits;
s->i_left -= i_count;
if( s->i_left <= 32 )
{
*(uint32_t*)s->p = endian_fix( s->cur_bits << s->i_left );
s->i_left += 32;
s->p += 4;
}
}
else
{
if( i_count < 32 )
i_bits &= (1<<i_count)-1;
if( i_count < s->i_left )
{
*s->p = (*s->p << i_count) | i_bits;
s->cur_bits = (s->cur_bits << i_count) | i_bits;
s->i_left -= i_count;
break;
}
else
{
*s->p = (*s->p << s->i_left) | (i_bits >> (i_count - s->i_left));
i_count -= s->i_left;
s->p++;
s->i_left = 8;
s->cur_bits = (s->cur_bits << s->i_left) | (i_bits >> i_count);
*(uint32_t*)s->p = endian_fix( s->cur_bits );
s->p += 4;
s->cur_bits = i_bits;
s->i_left = 32 - i_count;
}
}
}
/* Special case to eliminate branch in normal bs_write. */
/* Golomb never writes an even-size code, so this is only used in slice headers. */
static inline void bs_write32( bs_t *s, uint32_t i_bits )
{
bs_write( s, 16, i_bits >> 16 );
bs_write( s, 16, i_bits );
}
static inline void bs_write1( bs_t *s, uint32_t i_bit )
{
*s->p <<= 1;
*s->p |= i_bit;
s->cur_bits <<= 1;
s->cur_bits |= i_bit;
s->i_left--;
if( s->i_left == 0 )
if( s->i_left == WORD_SIZE*8-32 )
{
s->p++;
s->i_left = 8;
*(uint32_t*)s->p = endian_fix32( s->cur_bits );
s->p += 4;
s->i_left = WORD_SIZE*8;
}
}
static inline void bs_align_0( bs_t *s )
{
if( s->i_left != 8 )
if( s->i_left&7 )
{
*s->p <<= s->i_left;
s->i_left = 8;
s->p++;
s->cur_bits <<= s->i_left&7;
s->i_left &= ~7;
}
bs_flush( s );
}
static inline void bs_align_1( bs_t *s )
{
if( s->i_left != 8 )
if( s->i_left&7 )
{
*s->p <<= s->i_left;
*s->p |= (1 << s->i_left) - 1;
s->i_left = 8;
s->p++;
s->cur_bits <<= s->i_left&7;
s->cur_bits |= (1 << (s->i_left&7)) - 1;
s->i_left &= ~7;
}
bs_flush( s );
}
static inline void bs_align( bs_t *s )
{
bs_align_0( s );
}
/* golomb functions */
static inline void bs_write_ue( bs_t *s, unsigned int val )
static const uint8_t i_size0_255[256] =
{
1,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
};
static inline void bs_write_ue_big( bs_t *s, unsigned int val )
{
int i_size = 0;
static const uint8_t i_size0_255[256] =
{
1,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
};
if( val == 0 )
{
bs_write1( s, 1 );
}
else
{
unsigned int tmp = ++val;
......@@ -144,89 +170,103 @@ static inline void bs_write_ue( bs_t *s, unsigned int val )
}
}
/* Only works on values under 255. */
static inline void bs_write_ue( bs_t *s, int val )
{
if( val == 0 )
bs_write1( s, 1 );
else
bs_write( s, 2 * i_size0_255[val+1] - 1, val+1 );
}
static inline void bs_write_se( bs_t *s, int val )
{
bs_write_ue( s, val <= 0 ? -val * 2 : val * 2 - 1);
int i_size = 0;
val = val <= 0 ? -val * 2 : val * 2 - 1;
if( val == 0 )
bs_write1( s, 1 );
else
{
unsigned int tmp = ++val;
if( tmp >= 0x100 )
{
i_size += 8;
tmp >>= 8;
}
i_size += i_size0_255[tmp];
bs_write( s, 2 * i_size - 1, val );
}
}
static inline void bs_write_te( bs_t *s, int x, int val )
{
if( x == 1 )
{
bs_write1( s, 1&~val );
}
bs_write1( s, 1^val );
else if( x > 1 )
{
bs_write_ue( s, val );
}
}
static inline void bs_rbsp_trailing( bs_t *s )
{
bs_write1( s, 1 );
if( s->i_left != 8 )
{
bs_write( s, s->i_left, 0x00 );
}
bs_flush( s );
}
static const uint8_t i_size0_254[255] =
{
1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
11,11,11,11,11,11,11,11,11,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
};
static inline int bs_size_ue( unsigned int val )
{
static const uint8_t i_size0_254[255] =
{
1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,
11,11,11,11,11,11,11,11,11,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
13,13,13,13,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
};
return i_size0_254[val];
}
static inline int bs_size_ue_big( unsigned int val )
{
if( val < 255 )
{
return i_size0_254[val];
}
else
{
int i_size = 0;
val++;
if( val >= 0x10000 )
{
i_size += 32;
val = (val >> 16) - 1;
}
if( val >= 0x100 )
{
i_size += 16;
val = (val >> 8) - 1;
}
return i_size0_254[val] + i_size;
val = (val >> 8) - 1;
return i_size0_254[val] + 16;
}
}
static inline int bs_size_se( int val )
{
return bs_size_ue( val <= 0 ? -val * 2 : val * 2 - 1);
val = val <= 0 ? -val * 2 : val * 2 - 1;
if( val < 255 )
return i_size0_254[val];
else
{
val++;
val = (val >> 8) - 1;
return i_size0_254[val] + 16;
}
}
static inline int bs_size_te( int x, int val )
{
if( x == 1 )
{
return 1;
}
else if( x > 1 )
{
return bs_size_ue( val );
}
return i_size0_254[val];
return 0;
}
......
......@@ -133,8 +133,7 @@
#define x264_pthread_cond_wait(c,m)
#endif
/* FIXME: long isn't always the native register size (e.g. win64). */
#define WORD_SIZE sizeof(long)
#define WORD_SIZE sizeof(void*)
#if !defined(_WIN64) && !defined(__LP64__)
#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
......@@ -142,4 +141,31 @@
#endif
#endif
#ifdef WORDS_BIGENDIAN
#define endian_fix(x) (x)
#elif defined(__GNUC__) && defined(HAVE_MMX)
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
asm("bswap %0":"+r"(x));
return x;
}
static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
{
asm("bswap %0":"+r"(x));
return x;
}
#else
static ALWAYS_INLINE uint32_t endian_fix32( uint32_t x )
{
return (x<<24) + ((x<<8)&0xff0000) + ((x>>8)&0xff00) + (x>>24);
}
static ALWAYS_INLINE intptr_t endian_fix( intptr_t x )
{
if( WORD_SIZE == 8 )
return endian_fix32(x>>32) + ((uint64_t)endian_fix32(x)<<32);
else
return endian_fix32(x);
}
#endif
#endif /* X264_OSDEP_H */
......@@ -147,19 +147,17 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
if( ( i_level_code >> i_suffix_length ) < 14 )
{
bs_write( s, (i_level_code >> i_suffix_length) + 1, 1 );
if( i_suffix_length > 0 )
bs_write( s, i_suffix_length, i_level_code );
bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length,
(1<<i_suffix_length) + (i_level_code & ((1<<i_suffix_length)-1)) );
}
else if( i_suffix_length == 0 && i_level_code < 30 )
{
bs_write( s, 15, 1 );
bs_write( s, 4, i_level_code - 14 );
bs_write( s, 19, (1<<4) + (i_level_code - 14) );
}
else if( i_suffix_length > 0 && ( i_level_code >> i_suffix_length ) == 14 )
{
bs_write( s, 15, 1 );
bs_write( s, i_suffix_length, i_level_code );
bs_write( s, 15 + i_suffix_length,
(1<<i_suffix_length) + (i_level_code & ((1<<i_suffix_length)-1)) );
}
else
{
......@@ -192,7 +190,7 @@ static void block_residual_write_cavlc( x264_t *h, bs_t *s, int i_idx, int16_t *
}
}
bs_write( s, i_level_prefix + 1, 1 );
bs_write( s, i_level_prefix - 3, i_level_code );
bs_write( s, i_level_prefix - 3, i_level_code & ((1<<(i_level_prefix-3))-1) );
}
if( i_suffix_length == 0 )
......@@ -398,15 +396,9 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
}
else
{
bs_write1( s, 0 ); /* b_prev_intra4x4_pred_mode */
if( i_mode < i_pred )
{
bs_write( s, 3, i_mode );
}
else
{
bs_write( s, 3, i_mode - 1 );
}
if( i_mode >= i_pred )
i_mode--;
bs_write( s, 4, i_mode );
}
}
bs_write_ue( s, x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ] );
......
......@@ -39,6 +39,8 @@
#define NALU_OVERHEAD 5 // startcode + NAL type costs 5 bytes per frame
#define bs_write_ue bs_write_ue_big
static void x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
x264_nal_t **pp_nal, int *pi_nal,
x264_picture_t *pic_out );
......
......@@ -43,7 +43,7 @@ static uint16_t cabac_prefix_size[15][128];
#define x264_cabac_encode_decision(c,x,v) x264_cabac_size_decision(c,x,v)
#define x264_cabac_encode_terminal(c) x264_cabac_size_decision(c,276,0)
#define x264_cabac_encode_bypass(c,v) ((c)->f8_bits_encoded += 256)
#define x264_cabac_encode_ue_bypass(c,e,v) ((c)->f8_bits_encoded += (bs_size_ue(v+(1<<e)-1)-e)<<8)
#define x264_cabac_encode_ue_bypass(c,e,v) ((c)->f8_bits_encoded += (bs_size_ue_big(v+(1<<e)-1)-e)<<8)
#define x264_cabac_encode_flush(h,c)
#define x264_macroblock_write_cabac x264_macroblock_size_cabac
#include "cabac.c"
......
......@@ -28,6 +28,8 @@
#include "config.h"
#endif
#define bs_write_ue bs_write_ue_big
static void transpose( uint8_t *buf, int w )
{
int i, j;
......@@ -339,8 +341,8 @@ void x264_sps_write( bs_t *s, x264_sps_t *sps )
bs_write1( s, sps->vui.b_timing_info_present );
if( sps->vui.b_timing_info_present )
{
bs_write( s, 32, sps->vui.i_num_units_in_tick );
bs_write( s, 32, sps->vui.i_time_scale );
bs_write32( s, sps->vui.i_num_units_in_tick );
bs_write32( s, sps->vui.i_time_scale );
bs_write1( s, sps->vui.b_fixed_frame_rate );
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment