Commit 95df880c authored by Fiona Glaser's avatar Fiona Glaser

Cleanup and simplification of macroblock_load

Doesn't do anything now, but will be useful for many future changes.
Splitting out neighbour calculation will make MBAFF implementation easier.
Calculation of neighbour_frame value (actual neighbouring MBs, ignoring slices) will be useful for some future patches.
parent 459473b2
...@@ -531,12 +531,16 @@ struct x264_t ...@@ -531,12 +531,16 @@ struct x264_t
unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */ unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */
unsigned int i_neighbour4[16]; /* at the time the block is coded */ unsigned int i_neighbour4[16]; /* at the time the block is coded */
unsigned int i_neighbour_intra; /* for constrained intra pred */ unsigned int i_neighbour_intra; /* for constrained intra pred */
unsigned int i_neighbour_frame; /* ignoring slice boundaries */
int i_mb_type_top; int i_mb_type_top;
int i_mb_type_left; int i_mb_type_left;
int i_mb_type_topleft; int i_mb_type_topleft;
int i_mb_type_topright; int i_mb_type_topright;
int i_mb_prev_xy; int i_mb_prev_xy;
int i_mb_left_xy;
int i_mb_top_xy; int i_mb_top_xy;
int i_mb_topleft_xy;
int i_mb_topright_xy;
/**** thread synchronization ends here ****/ /**** thread synchronization ends here ****/
/* subsequent variables are either thread-local or constant, /* subsequent variables are either thread-local or constant,
......
...@@ -422,18 +422,16 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[ ...@@ -422,18 +422,16 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
/* spatial predictors */ /* spatial predictors */
if( h->mb.i_neighbour & MB_LEFT ) if( h->mb.i_neighbour & MB_LEFT )
{ {
int i_mb_l = h->mb.i_mb_xy - 1; SET_MVP( mvr[h->mb.i_mb_left_xy] );
SET_MVP( mvr[i_mb_l] );
} }
if( h->mb.i_neighbour & MB_TOP ) if( h->mb.i_neighbour & MB_TOP )
{ {
int i_mb_t = h->mb.i_mb_top_xy; SET_MVP( mvr[h->mb.i_mb_top_xy] );
SET_MVP( mvr[i_mb_t] );
if( h->mb.i_neighbour & MB_TOPLEFT ) if( h->mb.i_neighbour & MB_TOPLEFT )
SET_MVP( mvr[i_mb_t-1] ); SET_MVP( mvr[h->mb.i_mb_topleft_xy] );
if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 ) if( h->mb.i_neighbour & MB_TOPRIGHT )
SET_MVP( mvr[i_mb_t+1] ); SET_MVP( mvr[h->mb.i_mb_topright_xy] );
} }
#undef SET_MVP #undef SET_MVP
...@@ -891,26 +889,26 @@ static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src ) ...@@ -891,26 +889,26 @@ static NOINLINE void copy_column8( uint8_t *dst, uint8_t *src )
dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE]; dst[i*FDEC_STRIDE] = src[i*FDEC_STRIDE];
} }
static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb_x, int i_mb_y, int i) static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x, int mb_y, int i )
{ {
const int w = (i == 0 ? 16 : 8); const int w = (i == 0 ? 16 : 8);
const int i_stride = h->fdec->i_stride[!!i]; const int i_stride = h->fdec->i_stride[!!i];
const int i_stride2 = i_stride << h->mb.b_interlaced; const int i_stride2 = i_stride << h->mb.b_interlaced;
const int i_pix_offset = h->mb.b_interlaced const int i_pix_offset = h->mb.b_interlaced
? w * (i_mb_x + (i_mb_y&~1) * i_stride) + (i_mb_y&1) * i_stride ? w * (mb_x + (mb_y&~1) * i_stride) + (mb_y&1) * i_stride
: w * (i_mb_x + i_mb_y * i_stride); : w * (mb_x + mb_y * i_stride);
const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset]; const uint8_t *plane_fdec = &h->fdec->plane[i][i_pix_offset];
const uint8_t *intra_fdec = h->param.b_sliced_threads ? plane_fdec-i_stride2 : const uint8_t *intra_fdec = h->param.b_sliced_threads ? plane_fdec-i_stride2 :
&h->mb.intra_border_backup[i_mb_y & h->sh.b_mbaff][i][i_mb_x*16>>!!i]; &h->mb.intra_border_backup[mb_y & h->sh.b_mbaff][i][mb_x*16>>!!i];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset }; int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
x264_frame_t **fref[2] = { h->fref0, h->fref1 }; x264_frame_t **fref[2] = { h->fref0, h->fref1 };
if( h->mb.b_interlaced ) if( h->mb.b_interlaced )
ref_pix_offset[1] += (1-2*(i_mb_y&1)) * i_stride; ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
h->mb.pic.i_stride[i] = i_stride2; h->mb.pic.i_stride[i] = i_stride2;
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset]; h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE, h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
h->mb.pic.p_fenc_plane[i], i_stride2, w ); h->mb.pic.p_fenc_plane[i], i_stride2, w );
if( i_mb_y > 0 ) if( mb_y > 0 )
memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 ); memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
else else
memset( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], 0, w*3/2+1 ); memset( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], 0, w*3/2+1 );
...@@ -940,53 +938,111 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb ...@@ -940,53 +938,111 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb
} }
} }
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
{ {
int i_mb_xy = i_mb_y * h->mb.i_mb_stride + i_mb_x; int top = (mb_y - (1 << h->mb.b_interlaced)) * h->mb.i_mb_stride + mb_x;
int i_mb_4x4 = 4*(i_mb_y * h->mb.i_b4_stride + i_mb_x); h->mb.i_mb_x = mb_x;
int i_mb_8x8 = 2*(i_mb_y * h->mb.i_b8_stride + i_mb_x); h->mb.i_mb_y = mb_y;
int i_top_y = i_mb_y - (1 << h->mb.b_interlaced); h->mb.i_mb_xy = mb_y * h->mb.i_mb_stride + mb_x;
int i_top_xy = i_top_y * h->mb.i_mb_stride + i_mb_x; h->mb.i_b8_xy = 2*(mb_y * h->mb.i_b8_stride + mb_x);
int i_top_4x4 = (4*i_top_y+3) * h->mb.i_b4_stride + 4*i_mb_x; h->mb.i_b4_xy = 4*(mb_y * h->mb.i_b4_stride + mb_x);
int i_top_8x8 = (2*i_top_y+1) * h->mb.i_b8_stride + 2*i_mb_x;
int i_left_xy = -1;
int i_top_type = -1; /* gcc warn */
int i_left_type= -1;
/* init index */
h->mb.i_mb_x = i_mb_x;
h->mb.i_mb_y = i_mb_y;
h->mb.i_mb_xy = i_mb_xy;
h->mb.i_b8_xy = i_mb_8x8;
h->mb.i_b4_xy = i_mb_4x4;
h->mb.i_mb_top_xy = i_top_xy;
h->mb.i_neighbour = 0; h->mb.i_neighbour = 0;
h->mb.i_neighbour_intra = 0; h->mb.i_neighbour_intra = 0;
h->mb.i_neighbour_frame = 0;
h->mb.i_mb_top_xy = -1;
h->mb.i_mb_left_xy = -1;
h->mb.i_mb_topleft_xy = -1;
h->mb.i_mb_topright_xy = -1;
h->mb.i_mb_type_top = -1;
h->mb.i_mb_type_left = -1;
h->mb.i_mb_type_topleft = -1;
h->mb.i_mb_type_topright = -1;
if( top >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOP;
h->mb.i_mb_top_xy = top;
if( top >= h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOP;
h->mb.i_mb_type_top = h->mb.type[h->mb.i_mb_top_xy];
/* load cache */ if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_top ) )
if( i_top_xy >= h->sh.i_first_mb ) h->mb.i_neighbour_intra |= MB_TOP;
}
}
if( mb_x > 0 )
{ {
h->mb.i_mb_type_top = h->mb.i_neighbour_frame |= MB_LEFT;
i_top_type = h->mb.type[i_top_xy]; h->mb.i_mb_left_xy = h->mb.i_mb_xy - 1;
h->mb.cache.i_cbp_top = h->mb.cbp[i_top_xy]; if( h->mb.i_mb_xy > h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_LEFT;
h->mb.i_mb_type_left = h->mb.type[h->mb.i_mb_left_xy];
if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_left ) )
h->mb.i_neighbour_intra |= MB_LEFT;
}
}
if( mb_x > 0 && top - 1 >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOPLEFT;
h->mb.i_mb_topleft_xy = top - 1;
if( top - 1 >= h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPLEFT;
h->mb.i_mb_type_topright = h->mb.type[h->mb.i_mb_topleft_xy];
if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topleft ) )
h->mb.i_neighbour_intra |= MB_TOPLEFT;
}
}
if( mb_x < h->sps->i_mb_width - 1 && top + 1 >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOPRIGHT;
h->mb.i_mb_topright_xy = top + 1;
if( top + 1 >= h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPRIGHT;
h->mb.i_mb_type_topleft = h->mb.type[h->mb.i_mb_topright_xy];
if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topright ) )
h->mb.i_neighbour_intra |= MB_TOPRIGHT;
}
}
/* We can't predict from the previous threadslice since it hasn't been encoded yet, so
* only use left. */
if( h->i_threadslice_start == mb_y )
h->mb.i_neighbour_frame &= MB_LEFT;
}
h->mb.i_neighbour |= MB_TOP; void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
{
x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
if( !h->param.b_constrained_intra || IS_INTRA( i_top_type ) ) int left = h->mb.i_mb_left_xy;
h->mb.i_neighbour_intra |= MB_TOP; int top = h->mb.i_mb_top_xy;
/* load cache */
if( h->mb.i_neighbour & MB_TOP )
{
h->mb.cache.i_cbp_top = h->mb.cbp[top];
/* load intra4x4 */ /* load intra4x4 */
CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &h->mb.intra4x4_pred_mode[i_top_xy][0] ); CP32( &h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8], &h->mb.intra4x4_pred_mode[top][0] );
/* load non_zero_count */ /* load non_zero_count */
CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &h->mb.non_zero_count[i_top_xy][12] ); CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &h->mb.non_zero_count[top][12] );
/* shift because x264_scan8[16] is misaligned */ /* shift because x264_scan8[16] is misaligned */
M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = M16( &h->mb.non_zero_count[i_top_xy][18] ) << 8; M32( &h->mb.cache.non_zero_count[x264_scan8[16+0] - 9] ) = M16( &h->mb.non_zero_count[top][18] ) << 8;
M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = M16( &h->mb.non_zero_count[i_top_xy][22] ) << 8; M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = M16( &h->mb.non_zero_count[top][22] ) << 8;
} }
else else
{ {
h->mb.i_mb_type_top = -1;
h->mb.cache.i_cbp_top = -1; h->mb.cache.i_cbp_top = -1;
/* load intra4x4 */ /* load intra4x4 */
...@@ -998,39 +1054,30 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -998,39 +1054,30 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = 0x80808080U; M32( &h->mb.cache.non_zero_count[x264_scan8[16+4] - 9] ) = 0x80808080U;
} }
if( i_mb_x > 0 && i_mb_xy > h->sh.i_first_mb ) if( h->mb.i_neighbour & MB_LEFT )
{ {
i_left_xy = i_mb_xy - 1; h->mb.cache.i_cbp_left = h->mb.cbp[left];
h->mb.i_mb_type_left =
i_left_type = h->mb.type[i_left_xy];
h->mb.cache.i_cbp_left = h->mb.cbp[h->mb.i_mb_xy - 1];
h->mb.i_neighbour |= MB_LEFT;
if( !h->param.b_constrained_intra || IS_INTRA( i_left_type ) )
h->mb.i_neighbour_intra |= MB_LEFT;
/* load intra4x4 */ /* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][4]; h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.intra4x4_pred_mode[left][4];
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][5]; h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.intra4x4_pred_mode[left][5];
h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][6]; h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = h->mb.intra4x4_pred_mode[left][6];
h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = h->mb.intra4x4_pred_mode[i_left_xy][3]; h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = h->mb.intra4x4_pred_mode[left][3];
/* load non_zero_count */ /* load non_zero_count */
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = h->mb.non_zero_count[i_left_xy][3]; h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = h->mb.non_zero_count[left][3];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = h->mb.non_zero_count[i_left_xy][7]; h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = h->mb.non_zero_count[left][7];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = h->mb.non_zero_count[i_left_xy][11]; h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = h->mb.non_zero_count[left][11];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = h->mb.non_zero_count[i_left_xy][15]; h->mb.cache.non_zero_count[x264_scan8[10] - 1] = h->mb.non_zero_count[left][15];
h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = h->mb.non_zero_count[i_left_xy][16+1]; h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = h->mb.non_zero_count[left][16+1];
h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = h->mb.non_zero_count[i_left_xy][16+3]; h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = h->mb.non_zero_count[left][16+3];
h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = h->mb.non_zero_count[i_left_xy][16+4+1]; h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = h->mb.non_zero_count[left][16+4+1];
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = h->mb.non_zero_count[i_left_xy][16+4+3]; h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = h->mb.non_zero_count[left][16+4+3];
} }
else else
{ {
h->mb.i_mb_type_left = -1;
h->mb.cache.i_cbp_left = -1; h->mb.cache.i_cbp_left = -1;
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
...@@ -1049,30 +1096,11 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1049,30 +1096,11 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80; h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80;
} }
if( i_mb_x < h->sps->i_mb_width - 1 && i_top_xy + 1 >= h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPRIGHT;
h->mb.i_mb_type_topright = h->mb.type[ i_top_xy + 1 ];
if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topright ) )
h->mb.i_neighbour_intra |= MB_TOPRIGHT;
}
else
h->mb.i_mb_type_topright = -1;
if( i_mb_x > 0 && i_top_xy - 1 >= h->sh.i_first_mb )
{
h->mb.i_neighbour |= MB_TOPLEFT;
h->mb.i_mb_type_topleft = h->mb.type[ i_top_xy - 1 ];
if( !h->param.b_constrained_intra || IS_INTRA( h->mb.i_mb_type_topleft ) )
h->mb.i_neighbour_intra |= MB_TOPLEFT;
}
else
h->mb.i_mb_type_topleft = -1;
if( h->pps->b_transform_8x8_mode ) if( h->pps->b_transform_8x8_mode )
{ {
h->mb.cache.i_neighbour_transform_size = h->mb.cache.i_neighbour_transform_size =
( i_left_type >= 0 && h->mb.mb_transform_size[i_left_xy] ) ( (h->mb.i_neighbour & MB_LEFT) && h->mb.mb_transform_size[left] )
+ ( i_top_type >= 0 && h->mb.mb_transform_size[i_top_xy] ); + ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
} }
if( h->sh.b_mbaff ) if( h->sh.b_mbaff )
...@@ -1093,142 +1121,127 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1093,142 +1121,127 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
/* load picture pointers */ /* load picture pointers */
x264_macroblock_load_pic_pointers( h, i_mb_x, i_mb_y, 0 ); x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 0 );
x264_macroblock_load_pic_pointers( h, i_mb_x, i_mb_y, 1 ); x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 1 );
x264_macroblock_load_pic_pointers( h, i_mb_x, i_mb_y, 2 ); x264_macroblock_load_pic_pointers( h, mb_x, mb_y, 2 );
if( h->fdec->integral ) if( h->fdec->integral )
{ {
assert( !h->mb.b_interlaced ); int offset = 16 * (mb_x + mb_y * h->fdec->i_stride[0]);
for( int i = 0; i < h->mb.pic.i_fref[0]; i++ ) for( int i = 0; i < h->mb.pic.i_fref[0]; i++ )
h->mb.pic.p_integral[0][i] = &h->fref0[i]->integral[ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )]; h->mb.pic.p_integral[0][i] = &h->fref0[i]->integral[offset];
for( int i = 0; i < h->mb.pic.i_fref[1]; i++ ) for( int i = 0; i < h->mb.pic.i_fref[1]; i++ )
h->mb.pic.p_integral[1][i] = &h->fref1[i]->integral[ 16 * ( i_mb_x + i_mb_y * h->fdec->i_stride[0] )]; h->mb.pic.p_integral[1][i] = &h->fref1[i]->integral[offset];
} }
x264_prefetch_fenc( h, h->fenc, i_mb_x, i_mb_y ); x264_prefetch_fenc( h, h->fenc, mb_x, mb_y );
/* load ref/mv/mvd */ /* load ref/mv/mvd */
if( h->sh.i_type != SLICE_TYPE_I ) if( h->sh.i_type != SLICE_TYPE_I )
{ {
const int s8x8 = h->mb.i_b8_stride; const int s8x8 = h->mb.i_b8_stride;
const int s4x4 = h->mb.i_b4_stride; const int s4x4 = h->mb.i_b4_stride;
const int top_y = mb_y - (1 << h->mb.b_interlaced);
const int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*mb_x;
const int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*mb_x;
for( int i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ ) for( int l = 0; l < (h->sh.i_type == SLICE_TYPE_B) + 1; l++ )
{ {
/* int i8 = x264_scan8[0] - 1 - 1*8;
h->mb.cache.ref[i_list][x264_scan8[5 ]+1] =
h->mb.cache.ref[i_list][x264_scan8[7 ]+1] =
h->mb.cache.ref[i_list][x264_scan8[13]+1] = -2;
*/
if( h->mb.i_neighbour & MB_TOPLEFT ) if( h->mb.i_neighbour & MB_TOPLEFT )
{ {
const int i8 = x264_scan8[0] - 1 - 1*8; h->mb.cache.ref[l][i8] = h->mb.ref[l][top_8x8 - 1];
const int ir = i_top_8x8 - 1; CP32( h->mb.cache.mv[l][i8], h->mb.mv[l][top_4x4 - 1] );
const int iv = i_top_4x4 - 1;
h->mb.cache.ref[i_list][i8] = h->mb.ref[i_list][ir];
CP32( h->mb.cache.mv[i_list][i8], h->mb.mv[i_list][iv] );
} }
else else
{ {
const int i8 = x264_scan8[0] - 1 - 1*8; h->mb.cache.ref[l][i8] = -2;
h->mb.cache.ref[i_list][i8] = -2; M32( h->mb.cache.mv[l][i8] ) = 0;
M32( h->mb.cache.mv[i_list][i8] ) = 0;
} }
i8 = x264_scan8[0] - 8;
if( h->mb.i_neighbour & MB_TOP ) if( h->mb.i_neighbour & MB_TOP )
{ {
const int i8 = x264_scan8[0] - 8; h->mb.cache.ref[l][i8+0] =
const int ir = i_top_8x8; h->mb.cache.ref[l][i8+1] = h->mb.ref[l][top_8x8 + 0];
const int iv = i_top_4x4; h->mb.cache.ref[l][i8+2] =
h->mb.cache.ref[i_list][i8+0] = h->mb.cache.ref[l][i8+3] = h->mb.ref[l][top_8x8 + 1];
h->mb.cache.ref[i_list][i8+1] = h->mb.ref[i_list][ir + 0]; CP64( h->mb.cache.mv[l][i8+0], h->mb.mv[l][top_4x4+0] );
h->mb.cache.ref[i_list][i8+2] = CP64( h->mb.cache.mv[l][i8+2], h->mb.mv[l][top_4x4+2] );
h->mb.cache.ref[i_list][i8+3] = h->mb.ref[i_list][ir + 1];
CP64( h->mb.cache.mv[i_list][i8+0], h->mb.mv[i_list][iv+0] );
CP64( h->mb.cache.mv[i_list][i8+2], h->mb.mv[i_list][iv+2] );
} }
else else
{ {
const int i8 = x264_scan8[0] - 8; M64( h->mb.cache.mv[l][i8+0] ) = 0;
M64( h->mb.cache.mv[i_list][i8+0] ) = 0; M64( h->mb.cache.mv[l][i8+2] ) = 0;
M64( h->mb.cache.mv[i_list][i8+2] ) = 0; M32( &h->mb.cache.ref[l][i8] ) = (uint8_t)(-2) * 0x01010101U;
M32( &h->mb.cache.ref[i_list][i8] ) = (uint8_t)(-2) * 0x01010101U;
} }
i8 = x264_scan8[0] + 4 - 1*8;
if( h->mb.i_neighbour & MB_TOPRIGHT ) if( h->mb.i_neighbour & MB_TOPRIGHT )
{ {
const int i8 = x264_scan8[0] + 4 - 1*8; h->mb.cache.ref[l][i8] = h->mb.ref[l][top_8x8 + 2];
const int ir = i_top_8x8 + 2; CP32( h->mb.cache.mv[l][i8], h->mb.mv[l][top_4x4 + 4] );
const int iv = i_top_4x4 + 4;
h->mb.cache.ref[i_list][i8] = h->mb.ref[i_list][ir];
CP32( h->mb.cache.mv[i_list][i8], h->mb.mv[i_list][iv] );
} }
else else
{ h->mb.cache.ref[l][i8] = -2;
const int i8 = x264_scan8[0] + 4 - 1*8;
h->mb.cache.ref[i_list][i8] = -2;
}
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT ) if( h->mb.i_neighbour & MB_LEFT )
{ {
const int i8 = x264_scan8[0] - 1; const int ir = h->mb.i_b8_xy - 1;
const int ir = i_mb_8x8 - 1; const int iv = h->mb.i_b4_xy - 1;
const int iv = i_mb_4x4 - 1; h->mb.cache.ref[l][i8+0*8] =
h->mb.cache.ref[i_list][i8+0*8] = h->mb.cache.ref[l][i8+1*8] = h->mb.ref[l][ir + 0*s8x8];
h->mb.cache.ref[i_list][i8+1*8] = h->mb.ref[i_list][ir + 0*s8x8]; h->mb.cache.ref[l][i8+2*8] =
h->mb.cache.ref[i_list][i8+2*8] = h->mb.cache.ref[l][i8+3*8] = h->mb.ref[l][ir + 1*s8x8];
h->mb.cache.ref[i_list][i8+3*8] = h->mb.ref[i_list][ir + 1*s8x8];
CP32( h->mb.cache.mv[l][i8+0*8], h->mb.mv[l][iv + 0*s4x4] );
CP32( h->mb.cache.mv[i_list][i8+0*8], h->mb.mv[i_list][iv + 0*s4x4] ); CP32( h->mb.cache.mv[l][i8+1*8], h->mb.mv[l][iv + 1*s4x4] );
CP32( h->mb.cache.mv[i_list][i8+1*8], h->mb.mv[i_list][iv + 1*s4x4] ); CP32( h->mb.cache.mv[l][i8+2*8], h->mb.mv[l][iv + 2*s4x4] );
CP32( h->mb.cache.mv[i_list][i8+2*8], h->mb.mv[i_list][iv + 2*s4x4] ); CP32( h->mb.cache.mv[l][i8+3*8], h->mb.mv[l][iv + 3*s4x4] );
CP32( h->mb.cache.mv[i_list][i8+3*8], h->mb.mv[i_list][iv + 3*s4x4] );
} }
else else
{ {
const int i8 = x264_scan8[0] - 1;
for( int i = 0; i < 4; i++ ) for( int i = 0; i < 4; i++ )
{ {
h->mb.cache.ref[i_list][i8+i*8] = -2; h->mb.cache.ref[l][i8+i*8] = -2;
M32( h->mb.cache.mv[i_list][i8+i*8] ) = 0; M32( h->mb.cache.mv[l][i8+i*8] ) = 0;
} }
} }
if( h->param.b_cabac ) if( h->param.b_cabac )
{ {
if( i_top_type >= 0 ) if( h->mb.i_neighbour & MB_TOP )
CP64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8], h->mb.mvd[i_list][i_top_xy][0] ); CP64( h->mb.cache.mvd[l][x264_scan8[0] - 8], h->mb.mvd[l][top][0] );
else else
M64( h->mb.cache.mvd[i_list][x264_scan8[0] - 8] ) = 0; M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
if( i_left_type >= 0 ) if( h->mb.i_neighbour & MB_LEFT )
{ {
CP16( h->mb.cache.mvd[i_list][x264_scan8[0 ] - 1], h->mb.mvd[i_list][i_left_xy][4] ); CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], h->mb.mvd[l][left][4] );
CP16( h->mb.cache.mvd[i_list][x264_scan8[2 ] - 1], h->mb.mvd[i_list][i_left_xy][5] ); CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], h->mb.mvd[l][left][5] );
CP16( h->mb.cache.mvd[i_list][x264_scan8[8 ] - 1], h->mb.mvd[i_list][i_left_xy][6] ); CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], h->mb.mvd[l][left][6] );
CP16( h->mb.cache.mvd[i_list][x264_scan8[10] - 1], h->mb.mvd[i_list][i_left_xy][3] ); CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], h->mb.mvd[l][left][3] );
} }
else else
for( int i = 0; i < 4; i++ ) for( int i = 0; i < 4; i++ )
M16( h->mb.cache.mvd[i_list][x264_scan8[0]-1+i*8] ) = 0; M16( h->mb.cache.mvd[l][x264_scan8[0]-1+i*8] ) = 0;
} }
} }
/* load skip */ /* load skip */
if( h->sh.i_type == SLICE_TYPE_B ) if( h->sh.i_type == SLICE_TYPE_B )
{ {
h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(i_mb_y&1)]; h->mb.bipred_weight = h->mb.bipred_weight_buf[h->mb.b_interlaced&(mb_y&1)];
h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(i_mb_y&1)]; h->mb.dist_scale_factor = h->mb.dist_scale_factor_buf[h->mb.b_interlaced&(mb_y&1)];
if( h->param.b_cabac ) if( h->param.b_cabac )
{ {
uint8_t skipbp; uint8_t skipbp;
x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 ); x264_macroblock_cache_skip( h, 0, 0, 4, 4, 0 );
skipbp = i_left_type >= 0 ? h->mb.skipbp[i_left_xy] : 0; skipbp = (h->mb.i_neighbour & MB_LEFT) ? h->mb.skipbp[left] : 0;
h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2; h->mb.cache.skip[x264_scan8[0] - 1] = skipbp & 0x2;
h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8; h->mb.cache.skip[x264_scan8[8] - 1] = skipbp & 0x8;
skipbp = i_top_type >= 0 ? h->mb.skipbp[i_top_xy] : 0; skipbp = (h->mb.i_neighbour & MB_TOP) ? h->mb.skipbp[top] : 0;
h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4; h->mb.cache.skip[x264_scan8[0] - 8] = skipbp & 0x4;
h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8; h->mb.cache.skip[x264_scan8[4] - 8] = skipbp & 0x8;
} }
......
...@@ -264,7 +264,7 @@ enum cabac_ctx_block_cat_e ...@@ -264,7 +264,7 @@ enum cabac_ctx_block_cat_e
int x264_macroblock_cache_init( x264_t *h ); int x264_macroblock_cache_init( x264_t *h );
void x264_macroblock_slice_init( x264_t *h ); void x264_macroblock_slice_init( x264_t *h );
void x264_macroblock_thread_init( x264_t *h ); void x264_macroblock_thread_init( x264_t *h );
void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ); void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y );
void x264_macroblock_cache_save( x264_t *h ); void x264_macroblock_cache_save( x264_t *h );
void x264_macroblock_cache_end( x264_t *h ); void x264_macroblock_cache_end( x264_t *h );
......
...@@ -191,11 +191,11 @@ static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int ...@@ -191,11 +191,11 @@ static void x264_cabac_mb_intra4x4_pred_mode( x264_cabac_t *cb, int i_pred, int
static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb ) static void x264_cabac_mb_intra_chroma_pred_mode( x264_t *h, x264_cabac_t *cb )
{ {
const int i_mode = x264_mb_pred_mode8x8c_fix[ h->mb.i_chroma_pred_mode ]; const int i_mode = x264_mb_pred_mode8x8c_fix[h->mb.i_chroma_pred_mode];
int ctx = 0; int ctx = 0;
/* No need to test for I4x4 or I_16x16 as cache_save handle that */ /* No need to test for I4x4 or I_16x16 as cache_save handle that */
if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 ) if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_left_xy] != 0 )
ctx++; ctx++;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 ) if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_top_xy] != 0 )
ctx++; ctx++;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment