Commit 1a072a3a authored by David Conrad's avatar David Conrad Committed by Fiona Glaser

Fix unaligned accesses in bitstream writer

Fixes x264 on CPUs with no unaligned access support (e.g. SPARC).
Improves performance marginally on CPUs with penalties for unaligned stores (e.g. some x86).
parent 77c46ebc
......@@ -73,21 +73,22 @@ extern vlc_large_t x264_level_token[7][LEVEL_TABLE_SIZE];
static inline void bs_init( bs_t *s, void *p_data, int i_data )
{
int offset = ((intptr_t)p_data & (WORD_SIZE-1));
int offset = ((intptr_t)p_data & 3);
s->p = s->p_start = (uint8_t*)p_data - offset;
s->p_end = (uint8_t*)p_data + i_data;
s->i_left = offset ? 8*offset : (WORD_SIZE*8);
s->cur_bits = endian_fix( *(intptr_t*)s->p );
s->i_left = (WORD_SIZE - offset)*8;
s->cur_bits = endian_fix32(*(uint32_t *)(s->p));
s->cur_bits >>= (4-offset)*8;
}
static inline int bs_pos( bs_t *s )
{
return( 8 * (s->p - s->p_start) + (WORD_SIZE*8) - s->i_left );
}
/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32/64-bit aligned. */
/* Write the rest of cur_bits to the bitstream; results in a bitstream no longer 32-bit aligned. */
static inline void bs_flush( bs_t *s )
{
*(intptr_t*)s->p = endian_fix( s->cur_bits << s->i_left );
*(uint32_t*)s->p = endian_fix32( s->cur_bits << (s->i_left&31) );
s->p += WORD_SIZE - s->i_left / 8;
s->i_left = WORD_SIZE*8;
}
......@@ -151,21 +152,12 @@ static inline void bs_write1( bs_t *s, uint32_t i_bit )
static inline void bs_align_0( bs_t *s )
{
if( s->i_left&7 )
{
s->cur_bits <<= s->i_left&7;
s->i_left &= ~7;
}
bs_write( s, s->i_left&7, 0 );
bs_flush( s );
}
static inline void bs_align_1( bs_t *s )
{
if( s->i_left&7 )
{
s->cur_bits <<= s->i_left&7;
s->cur_bits |= (1 << (s->i_left&7)) - 1;
s->i_left &= ~7;
}
bs_write( s, s->i_left&7, (1 << (s->i_left&7)) - 1 );
bs_flush( s );
}
......@@ -245,7 +237,7 @@ static inline void bs_write_te( bs_t *s, int x, int val )
static inline void bs_rbsp_trailing( bs_t *s )
{
bs_write1( s, 1 );
bs_flush( s );
bs_write( s, s->i_left&7, 0 );
}
static inline int bs_size_ue( unsigned int val )
......
......@@ -298,6 +298,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
#if !RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
uint8_t *p_start = s->p_start;
bs_write_ue( s, i_mb_i_offset + 25 );
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
......@@ -313,6 +314,9 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
s->p += 64;
bs_init( s, s->p, s->p_end - s->p );
s->p_start = p_start;
/* if PCM is chosen, we need to store reconstructed frame data */
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
......
......@@ -981,6 +981,7 @@ int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
x264_nal_start( h, NAL_PPS, NAL_PRIORITY_HIGHEST );
x264_pps_write( &h->out.bs, h->pps );
x264_nal_end( h );
bs_flush( &h->out.bs );
}
/* now set output*/
*pi_nal = h->out.i_nal;
......@@ -1374,6 +1375,7 @@ static int x264_slice_write( x264_t *h )
bs_write_ue( &h->out.bs, i_skip ); /* last skip run */
/* rbsp_slice_trailing_bits */
bs_rbsp_trailing( &h->out.bs );
bs_flush( &h->out.bs );
}
x264_nal_end( h );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment