Commit 13922ab8 authored by Henrik Gramner's avatar Henrik Gramner Committed by Fiona Glaser

More write-combining

parent a40aa64d
......@@ -658,7 +658,7 @@ struct x264_t
ALIGNED_8( int8_t intra4x4_pred_mode[X264_SCAN8_LUMA_SIZE] );
/* i_non_zero_count if available else 0x80 */
ALIGNED_4( uint8_t non_zero_count[X264_SCAN8_SIZE] );
ALIGNED_16( uint8_t non_zero_count[X264_SCAN8_SIZE] );
/* -1 if unused, -2 if unavailable */
ALIGNED_4( int8_t ref[2][X264_SCAN8_LUMA_SIZE] );
......
......@@ -458,10 +458,10 @@ void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qp )
static void x264_macroblock_encode_skip( x264_t *h )
{
h->mb.i_cbp_luma = 0x00;
h->mb.i_cbp_chroma = 0x00;
memset( h->mb.cache.non_zero_count, 0, sizeof( h->mb.cache.non_zero_count ) );
/* store cbp */
for( int i = 0; i < sizeof( h->mb.cache.non_zero_count ); i += 16 )
M128( &h->mb.cache.non_zero_count[i] ) = M128_ZERO;
h->mb.i_cbp_luma = 0;
h->mb.i_cbp_chroma = 0;
h->mb.cbp[h->mb.i_mb_xy] = 0;
}
......
......@@ -438,10 +438,13 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
if( i < b_ac )
{
/* We only need to memset an empty 4x4 block. 8x8 can be
/* We only need to zero an empty 4x4 block. 8x8 can be
implicitly emptied via zero nnz, as can dc. */
if( i_coefs == 16 && !dc )
memset( dct, 0, 16 * sizeof(int16_t) );
{
M128( &dct[0] ) = M128_ZERO;
M128( &dct[8] ) = M128_ZERO;
}
return 0;
}
......@@ -608,7 +611,10 @@ static ALWAYS_INLINE int quant_trellis_cabac( x264_t *h, int16_t *dct,
if( bnode == &nodes_cur[0] )
{
if( i_coefs == 16 && !dc )
memset( dct, 0, 16 * sizeof(int16_t) );
{
M128( &dct[0] ) = M128_ZERO;
M128( &dct[8] ) = M128_ZERO;
}
return 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment