Commit 08502a7c authored by Simon Horlick's avatar Simon Horlick Committed by Fiona Glaser

MBAFF: Make interlaced support a compile time option

parent 8029e664
......@@ -92,6 +92,16 @@ do {\
#include <assert.h>
#include <limits.h>
#if HAVE_INTERLACED
# define MB_INTERLACED h->mb.b_interlaced
# define SLICE_MBAFF h->sh.b_mbaff
# define PARAM_INTERLACED h->param.b_interlaced
#else
# define MB_INTERLACED 0
# define SLICE_MBAFF 0
# define PARAM_INTERLACED 0
#endif
/* Unions for type-punning.
* Mn: load or store n bits, aligned, native-endian
* CPn: copy n bits, aligned, native-endian
......
......@@ -320,7 +320,7 @@ void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2]
{
bs[dir][edge][i] = 2;
}
else if( (edge == 0 && h->mb.b_interlaced != neighbour_field[dir]) ||
else if( (edge == 0 && MB_INTERLACED != neighbour_field[dir]) ||
ref[0][q] != ref[0][p] ||
abs( mv[0][q][0] - mv[0][p][0] ) >= 4 ||
abs( mv[0][q][1] - mv[0][p][1] ) >= mvy_limit ||
......@@ -339,7 +339,7 @@ void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2]
if( h->mb.i_neighbour & MB_LEFT )
{
if( h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
if( h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
{
static const uint8_t offset[2][2][8] = {
{ { 0, 0, 0, 0, 1, 1, 1, 1 },
......@@ -353,12 +353,12 @@ void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2]
memset( bS, 4, 8 );
else
{
const uint8_t *off = offset[h->mb.b_interlaced][h->mb.i_mb_y&1];
const uint8_t *off = offset[MB_INTERLACED][h->mb.i_mb_y&1];
uint8_t (*nnz)[24] = h->mb.non_zero_count;
for( int i = 0; i < 8; i++ )
{
int left = h->mb.i_mb_left_xy[h->mb.b_interlaced ? i>>2 : i&1];
int left = h->mb.i_mb_left_xy[MB_INTERLACED ? i>>2 : i&1];
int nnz_this = h->mb.cache.non_zero_count[x264_scan8[0]+8*(i>>1)];
int nnz_left = nnz[left][3 + 4*off[i]];
if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
......@@ -376,7 +376,7 @@ void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2]
}
}
if( h->mb.b_interlaced )
if( MB_INTERLACED )
{
for( int i = 0; i < 4; i++ ) bs[0][0][i] = bS[i];
for( int i = 0; i < 4; i++ ) bs[0][4][i] = bS[4+i];
......@@ -391,7 +391,7 @@ void deblock_strength_mbaff_c( uint8_t nnz_cache[X264_SCAN8_SIZE], int8_t ref[2]
if( h->mb.i_neighbour & MB_TOP )
{
if( !(h->mb.i_mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
if( !(h->mb.i_mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
{
/* Need to filter both fields (even for frame macroblocks).
* Filter top two rows using the top macroblock of the above
......@@ -486,8 +486,8 @@ static inline void deblock_edge_intra( x264_t *h, pixel *pix, int i_stride, uint
void x264_frame_deblock_row( x264_t *h, int mb_y )
{
int b_interlaced = h->sh.b_mbaff;
int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->pps->i_chroma_qp_index_offset );
int b_interlaced = SLICE_MBAFF;
int qp_thresh = 15 - X264_MIN( h->sh.i_alpha_c0_offset, h->sh.i_beta_offset ) - X264_MAX( 0, h->param.analyse.i_chroma_qp_offset );
int stridey = h->fdec->i_stride[0];
int strideuv = h->fdec->i_stride[1];
......@@ -503,14 +503,14 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
pixel *pixy = h->fdec->plane[0] + 16*mb_y*stridey + 16*mb_x;
pixel *pixuv = h->fdec->plane[1] + 8*mb_y*strideuv + 16*mb_x;
if( mb_y & h->mb.b_interlaced )
if( mb_y & MB_INTERLACED )
{
pixy -= 15*stridey;
pixuv -= 7*strideuv;
}
int stride2y = stridey << h->mb.b_interlaced;
int stride2uv = strideuv << h->mb.b_interlaced;
int stride2y = stridey << MB_INTERLACED;
int stride2uv = strideuv << MB_INTERLACED;
int qp = h->mb.qp[mb_xy];
int qpc = h->chroma_qp_table[qp];
int first_edge_only = h->mb.type[mb_xy] == P_SKIP || qp <= qp_thresh;
......@@ -529,7 +529,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
if( h->mb.i_neighbour & MB_LEFT )
{
if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != h->mb.b_interlaced )
if( b_interlaced && h->mb.field[h->mb.i_mb_left_xy[0]] != MB_INTERLACED )
{
int luma_qp[2];
int chroma_qp[2];
......@@ -551,8 +551,8 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
deblock_edge( h, pixuv + 1, 2*strideuv, bs[0][0], chroma_qp[0], 1, deblock_v_chroma_mbaff_c );
}
int offy = h->mb.b_interlaced ? 4 : 0;
int offuv = h->mb.b_interlaced ? 3 : 0;
int offy = MB_INTERLACED ? 4 : 0;
int offuv = MB_INTERLACED ? 3 : 0;
left_qp[1] = h->mb.qp[h->mb.i_mb_left_xy[1]];
luma_qp[1] = (current_qp + left_qp[1] + 1) >> 1;
chroma_qp[1] = (h->chroma_qp_table[current_qp] + h->chroma_qp_table[left_qp[1]] + 1) >> 1;
......@@ -591,7 +591,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
if( h->mb.i_neighbour & MB_TOP )
{
if( b_interlaced && !(mb_y&1) && !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_top_xy] )
if( b_interlaced && !(mb_y&1) && !MB_INTERLACED && h->mb.field[h->mb.i_mb_top_xy] )
{
int mbn_xy = mb_xy - 2 * h->mb.i_mb_stride;
......@@ -613,7 +613,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int qpc_top = (h->chroma_qp_table[qp] + h->chroma_qp_table[qpt] + 1) >> 1;
int intra_top = IS_INTRA( h->mb.type[h->mb.i_mb_top_xy] );
if( (!b_interlaced || (!h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_top_xy]))
if( (!b_interlaced || (!MB_INTERLACED && !h->mb.field[h->mb.i_mb_top_xy]))
&& (intra_cur || intra_top) )
{
FILTER( _intra, 1, 0, qp_top, qpc_top );
......@@ -658,7 +658,7 @@ void x264_macroblock_deblock( x264_t *h )
memset( bs, 3, 2*8*4*sizeof(uint8_t) );
else
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
bs, 4 >> h->sh.b_mbaff, h->sh.i_type == SLICE_TYPE_B, h );
bs, 4 >> SLICE_MBAFF, h->sh.i_type == SLICE_TYPE_B, h );
int transform_8x8 = h->mb.b_transform_8x8;
pixel *fdec = h->mb.pic.p_fdec[0];
......
......@@ -48,7 +48,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
int i_mb_count = h->mb.i_mb_count;
int i_stride, i_width, i_lines;
int i_padv = PADV << h->param.b_interlaced;
int i_padv = PADV << PARAM_INTERLACED;
int luma_plane_size, chroma_plane_size;
int align = h->param.cpu&X264_CPU_CACHELINE_64 ? 64 : h->param.cpu&X264_CPU_CACHELINE_32 ? 32 : 16;
int disalign = h->param.cpu&X264_CPU_ALTIVEC ? 1<<9 : 1<<10;
......@@ -100,7 +100,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
CHECKED_MALLOC( frame->buffer[1], chroma_plane_size * sizeof(pixel) );
frame->plane[1] = frame->buffer[1] + frame->i_stride[1] * i_padv/2 + PADH;
if( h->param.b_interlaced )
if( PARAM_INTERLACED )
{
CHECKED_MALLOC( frame->buffer_fld[1], chroma_plane_size * sizeof(pixel) );
frame->plane_fld[1] = frame->buffer_fld[1] + frame->i_stride[1] * i_padv/2 + PADH;
......@@ -112,7 +112,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
{
/* FIXME: Don't allocate both buffers in non-adaptive MBAFF. */
CHECKED_MALLOC( frame->buffer[0], 4*luma_plane_size * sizeof(pixel) );
if( h->param.b_interlaced )
if( PARAM_INTERLACED )
CHECKED_MALLOC( frame->buffer_fld[0], 4*luma_plane_size * sizeof(pixel) );
for( int i = 0; i < 4; i++ )
{
......@@ -125,7 +125,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
else
{
CHECKED_MALLOC( frame->buffer[0], luma_plane_size * sizeof(pixel) );
if( h->param.b_interlaced )
if( PARAM_INTERLACED )
CHECKED_MALLOC( frame->buffer_fld[0], luma_plane_size * sizeof(pixel) );
frame->filtered[0] = frame->plane[0] = frame->buffer[0] + frame->i_stride[0] * i_padv + PADH;
frame->filtered_fld[0] = frame->plane_fld[0] = frame->buffer_fld[0] + frame->i_stride[0] * i_padv + PADH;
......@@ -160,7 +160,7 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
frame->i_stride[0] * (frame->i_lines[0] + 2*i_padv) * sizeof(uint16_t) << h->frames.b_have_sub8x8_esa );
frame->integral = (uint16_t*)frame->buffer[3] + frame->i_stride[0] * i_padv + PADH;
}
if( h->param.b_interlaced )
if( PARAM_INTERLACED )
CHECKED_MALLOC( frame->field, i_mb_count * sizeof(uint8_t) );
}
else /* fenc frame */
......@@ -371,20 +371,20 @@ static void plane_expand_border( pixel *pix, int i_stride, int i_width, int i_he
void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_end )
{
int b_start = !mb_y;
if( mb_y & h->sh.b_mbaff )
if( mb_y & SLICE_MBAFF )
return;
for( int i = 0; i < frame->i_plane; i++ )
{
int stride = frame->i_stride[i];
int width = 16*h->sps->i_mb_width;
int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF : 16) >> !!i;
int padh = PADH;
int padv = PADV >> !!i;
// buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
if( b_end && !b_start )
height += 4 >> (!!i + h->sh.b_mbaff);
height += 4 >> (!!i + SLICE_MBAFF);
pixel *pix;
if( h->sh.b_mbaff )
if( SLICE_MBAFF )
{
// border samples for each field are extended separately
pix = frame->plane_fld[i] + X264_MAX(0, (16*mb_y-4)*stride >> !!i);
......@@ -413,14 +413,14 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
int b_start = !mb_y;
int stride = frame->i_stride[0];
int width = 16*h->mb.i_mb_width + 8;
int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> SLICE_MBAFF) + 16 : 16;
int padh = PADH - 4;
int padv = PADV - 8;
for( int i = 1; i < 4; i++ )
{
// buffer: 8 luma, to match the hpel filter
pixel *pix;
if( h->sh.b_mbaff )
if( SLICE_MBAFF )
{
pix = frame->filtered_fld[i] + (16*mb_y - 16) * stride - 4;
plane_expand_border( pix, stride*2, width, height, padh, padv, b_start, b_end, 0 );
......@@ -428,7 +428,7 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
}
pix = frame->filtered[i] + (16*mb_y - 8) * stride - 4;
plane_expand_border( pix, stride, width, height << h->sh.b_mbaff, padh, padv, b_start, b_end, 0 );
plane_expand_border( pix, stride, width, height << SLICE_MBAFF, padh, padv, b_start, b_end, 0 );
}
}
......@@ -458,7 +458,7 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
{
for( int y = i_height; y < i_height + i_pady; y++ )
memcpy( &frame->plane[i][y*frame->i_stride[i]],
&frame->plane[i][(i_height-(~y&h->param.b_interlaced)-1)*frame->i_stride[i]],
&frame->plane[i][(i_height-(~y&PARAM_INTERLACED)-1)*frame->i_stride[i]],
(i_width + i_padx) * sizeof(pixel) );
}
}
......
......@@ -40,7 +40,7 @@ static NOINLINE void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int h
mvx, mvy, 4*width, 4*height, &h->sh.weight[i_ref][0] );
// chroma is offset if MCing from a field of opposite parity
if( h->mb.b_interlaced & i_ref )
if( MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
......@@ -69,7 +69,7 @@ static NOINLINE void x264_mb_mc_1xywh( x264_t *h, int x, int y, int width, int h
h->mb.pic.p_fref[1][i_ref], h->mb.pic.i_stride[0],
mvx, mvy, 4*width, 4*height, weight_none );
if( h->mb.b_interlaced & i_ref )
if( MB_INTERLACED & i_ref )
mvy += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( &h->mb.pic.p_fdec[1][2*y*FDEC_STRIDE+2*x],
......@@ -101,9 +101,9 @@ static NOINLINE void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int
h->mc.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y*FDEC_STRIDE+4*x], FDEC_STRIDE,
src0, i_stride0, src1, i_stride1, weight );
if( h->mb.b_interlaced & i_ref0 )
if( MB_INTERLACED & i_ref0 )
mvy0 += (h->mb.i_mb_y & 1)*4 - 2;
if( h->mb.b_interlaced & i_ref1 )
if( MB_INTERLACED & i_ref1 )
mvy1 += (h->mb.i_mb_y & 1)*4 - 2;
h->mc.mc_chroma( tmp0, tmp0+8, 16, h->mb.pic.p_fref[0][i_ref0][4], h->mb.pic.i_stride[1],
......@@ -212,7 +212,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
h->mb.i_b8_stride = h->mb.i_mb_width * 2;
h->mb.i_b4_stride = h->mb.i_mb_width * 4;
h->mb.b_interlaced = h->param.b_interlaced;
h->mb.b_interlaced = PARAM_INTERLACED;
CHECKED_MALLOC( h->mb.qp, i_mb_count * sizeof(int8_t) );
CHECKED_MALLOC( h->mb.cbp, i_mb_count * sizeof(int16_t) );
......@@ -236,7 +236,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
for( int i = 0; i < 2; i++ )
{
int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << h->param.b_interlaced;
int i_refs = X264_MIN(X264_REF_MAX, (i ? 1 + !!h->param.i_bframe_pyramid : h->param.i_frame_reference) ) << PARAM_INTERLACED;
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
i_refs = X264_MIN(X264_REF_MAX, i_refs + 1 + (BIT_DEPTH == 8)); //smart weights add two duplicate frames, one in >8-bit
......@@ -250,7 +250,7 @@ int x264_macroblock_cache_allocate( x264_t *h )
if( h->param.analyse.i_weighted_pred )
{
int i_padv = PADV << h->param.b_interlaced;
int i_padv = PADV << PARAM_INTERLACED;
int luma_plane_size = 0;
int numweightbuf;
......@@ -315,16 +315,16 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
{
for( int i = 0; i <= 4*h->param.b_interlaced; i++ )
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int j = 0; j < 2; j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], (h->sps->i_mb_width*16+32) * sizeof(pixel) );
h->intra_border_backup[i][j] += 16;
if( !h->param.b_interlaced )
if( !PARAM_INTERLACED )
h->intra_border_backup[1][j] = h->intra_border_backup[i][j];
}
for( int i = 0; i <= h->param.b_interlaced; i++ )
for( int i = 0; i <= PARAM_INTERLACED; i++ )
{
CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
h->deblock_strength[1] = h->deblock_strength[i];
......@@ -358,9 +358,9 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
{
if( !b_lookahead )
{
for( int i = 0; i <= h->param.b_interlaced; i++ )
for( int i = 0; i <= PARAM_INTERLACED; i++ )
x264_free( h->deblock_strength[i] );
for( int i = 0; i <= 4*h->param.b_interlaced; i++ )
for( int i = 0; i <= 4*PARAM_INTERLACED; i++ )
for( int j = 0; j < 2; j++ )
x264_free( h->intra_border_backup[i][j] - 16 );
}
......@@ -409,12 +409,12 @@ void x264_macroblock_slice_init( x264_t *h )
{
deblock_ref_table(-2) = -2;
deblock_ref_table(-1) = -1;
for( int i = 0; i < h->i_ref[0] << h->sh.b_mbaff; i++ )
for( int i = 0; i < h->i_ref[0] << SLICE_MBAFF; i++ )
{
/* Mask off high bits to avoid frame num collisions with -1/-2.
* In current x264 frame num values don't cover a range of more
* than 32, so 6 bits is enough for uniqueness. */
if( !h->mb.b_interlaced )
if( !MB_INTERLACED )
deblock_ref_table(i) = h->fref[0][i]->i_frame_num&63;
else
deblock_ref_table(i) = ((h->fref[0][i>>1]->i_frame_num&63)<<1) + (i&1);
......@@ -426,7 +426,7 @@ void x264_macroblock_slice_init( x264_t *h )
memset( h->mb.cache.ref, -2, sizeof( h->mb.cache.ref ) );
if( h->i_ref[0] > 0 )
for( int field = 0; field <= h->sh.b_mbaff; field++ )
for( int field = 0; field <= SLICE_MBAFF; field++ )
{
int curpoc = h->fdec->i_poc + h->fdec->i_delta_poc[field];
int refpoc = h->fref[0][0]->i_poc + h->fref[0][0]->i_delta_poc[field];
......@@ -499,16 +499,16 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
{
int w = (i ? 8 : 16);
int i_stride = h->fdec->i_stride[i];
int i_stride2 = i_stride << h->mb.b_interlaced;
int i_pix_offset = h->mb.b_interlaced
int i_stride2 = i_stride << MB_INTERLACED;
int i_pix_offset = MB_INTERLACED
? 16 * mb_x + w * (mb_y&~1) * i_stride + (mb_y&1) * i_stride
: 16 * mb_x + w * mb_y * i_stride;
pixel *plane_fdec = &h->fdec->plane[i][i_pix_offset];
int fdec_idx = b_mbaff ? (h->mb.b_interlaced ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
int fdec_idx = b_mbaff ? (MB_INTERLACED ? (3 + (mb_y&1)) : (mb_y&1) ? 2 : 4) : 0;
pixel *intra_fdec = &h->intra_border_backup[fdec_idx][i][mb_x*16];
int ref_pix_offset[2] = { i_pix_offset, i_pix_offset };
/* ref_pix_offset[0] references the current field and [1] the opposite field. */
if( h->mb.b_interlaced )
if( MB_INTERLACED )
ref_pix_offset[1] += (1-2*(mb_y&1)) * i_stride;
h->mb.pic.i_stride[i] = i_stride2;
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
......@@ -545,7 +545,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
for( int j = 0; j < h->mb.pic.i_fref[0]; j++ )
{
// Interpolate between pixels in same field.
if( h->mb.b_interlaced )
if( MB_INTERLACED )
{
plane_src = h->fref[0][j>>1]->plane_fld[i];
filtered_src = h->fref[0][j>>1]->filtered_fld;
......@@ -562,7 +562,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
for( int k = 1; k < 4; k++ )
h->mb.pic.p_fref[0][j][k] = filtered_src[k] + ref_pix_offset[j&1];
if( h->sh.weight[j][0].weightfn )
h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> h->mb.b_interlaced][ref_pix_offset[j&1]];
h->mb.pic.p_fref_w[j] = &h->fenc->weighted[j >> MB_INTERLACED][ref_pix_offset[j&1]];
else
h->mb.pic.p_fref_w[j] = h->mb.pic.p_fref[0][j][0];
}
......@@ -570,7 +570,7 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int mb_x
if( h->sh.i_type == SLICE_TYPE_B )
for( int j = 0; j < h->mb.pic.i_fref[1]; j++ )
{
if( h->mb.b_interlaced )
if( MB_INTERLACED )
{
plane_src = h->fref[1][j>>1]->plane_fld[i];
filtered_src = h->fref[1][j>>1]->filtered_fld;
......@@ -601,7 +601,7 @@ x264_left_table_t left_indices[4] =
static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, int mb_y )
{
int top_y = mb_y - (1 << h->mb.b_interlaced);
int top_y = mb_y - (1 << MB_INTERLACED);
int top = top_y * h->mb.i_mb_stride + mb_x;
h->mb.i_mb_x = mb_x;
......@@ -639,17 +639,17 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2;
h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4;
if( h->sh.b_mbaff )
if( SLICE_MBAFF )
{
if( mb_y&1 )
{
if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
if( mb_x && MB_INTERLACED != h->mb.field[h->mb.i_mb_xy-1] )
{
left[0] = left[1] = h->mb.i_mb_xy - 1 - h->mb.i_mb_stride;
h->mb.left_b8[0] = h->mb.left_b8[1] = h->mb.i_b8_xy - 2 - 2*h->mb.i_b8_stride;
h->mb.left_b4[0] = h->mb.left_b4[1] = h->mb.i_b4_xy - 4 - 4*h->mb.i_b4_stride;
if( h->mb.b_interlaced )
if( MB_INTERLACED )
{
h->mb.left_index_table = &left_indices[2];
left[1] += h->mb.i_mb_stride;
......@@ -663,12 +663,12 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
h->mb.topleft_partition = 1;
}
}
if( !h->mb.b_interlaced )
if( !MB_INTERLACED )
topright_y = -1;
}
else
{
if( h->mb.b_interlaced && top >= 0 )
if( MB_INTERLACED && top >= 0 )
{
if( !h->mb.field[top] )
{
......@@ -680,9 +680,9 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
if( mb_x < h->mb.i_mb_width-1 )
topright_y += !h->mb.field[h->mb.i_mb_stride*topright_y + mb_x + 1];
}
if( mb_x && h->mb.b_interlaced != h->mb.field[h->mb.i_mb_xy-1] )
if( mb_x && MB_INTERLACED != h->mb.field[h->mb.i_mb_xy-1] )
{
if( h->mb.b_interlaced )
if( MB_INTERLACED )
{
h->mb.left_index_table = &left_indices[2];
left[1] += h->mb.i_mb_stride;
......@@ -713,7 +713,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
}
/* We can't predict from the previous threadslice since it hasn't been encoded yet. */
if( (h->i_threadslice_start >> h->mb.b_interlaced) != (mb_y >> h->mb.b_interlaced) )
if( (h->i_threadslice_start >> MB_INTERLACED) != (mb_y >> MB_INTERLACED) )
{
if( top >= 0 )
{
......@@ -772,6 +772,13 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
}
}
#define LTOP 0
#if HAVE_INTERLACED
# define LBOT 1
#else
# define LBOT 0
#endif
void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
{
x264_macroblock_cache_load_neighbours( h, mb_x, mb_y );
......@@ -832,27 +839,32 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
if( h->mb.i_neighbour & MB_LEFT )
{
const int16_t top_luma = (cbp[left[0]] >> (left_index_table->mv[0]&(~1))) & 2;
const int16_t bot_luma = (cbp[left[1]] >> (left_index_table->mv[2]&(~1))) & 2;
h->mb.cache.i_cbp_left = (cbp[left[0]] & 0xfff0) | (bot_luma<<2) | top_luma;
if( SLICE_MBAFF )
{
const int16_t top_luma = (cbp[left[LTOP]] >> (left_index_table->mv[0]&(~1))) & 2;
const int16_t bot_luma = (cbp[left[LBOT]] >> (left_index_table->mv[2]&(~1))) & 2;
h->mb.cache.i_cbp_left = (cbp[left[LTOP]] & 0xfff0) | (bot_luma<<2) | top_luma;
}
else
h->mb.cache.i_cbp_left = cbp[left[0]];
/* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[0]][left_index_table->intra[0]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[0]][left_index_table->intra[1]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[1]][left_index_table->intra[2]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[1]][left_index_table->intra[3]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[0]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = i4x4[left[LTOP]][left_index_table->intra[1]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = i4x4[left[LBOT]][left_index_table->intra[2]];
h->mb.cache.intra4x4_pred_mode[x264_scan8[10] - 1] = i4x4[left[LBOT]][left_index_table->intra[3]];
/* load non_zero_count */
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table->nnz[0]];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table->nnz[1]];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table->nnz[2]];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table->nnz[3]];
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[0]];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[LTOP]][left_index_table->nnz[1]];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[LBOT]][left_index_table->nnz[2]];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[LBOT]][left_index_table->nnz[3]];
h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[0]][left_index_table->nnz_chroma[0]];
h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[1]][left_index_table->nnz_chroma[1]];
h->mb.cache.non_zero_count[x264_scan8[16+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[0]];
h->mb.cache.non_zero_count[x264_scan8[16+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[1]];
h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[0]][left_index_table->nnz_chroma[2]];
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[1]][left_index_table->nnz_chroma[3]];
h->mb.cache.non_zero_count[x264_scan8[16+4+0] - 1] = nnz[left[LTOP]][left_index_table->nnz_chroma[2]];
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = nnz[left[LBOT]][left_index_table->nnz_chroma[3]];
}
else
{
......@@ -881,13 +893,13 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
+ ( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] );
}
if( h->param.b_interlaced )
if( PARAM_INTERLACED )
{
h->mb.pic.i_fref[0] = h->i_ref[0] << h->mb.b_interlaced;
h->mb.pic.i_fref[1] = h->i_ref[1] << h->mb.b_interlaced;
h->mb.pic.i_fref[0] = h->i_ref[0] << MB_INTERLACED;
h->mb.pic.i_fref[1] = h->i_ref[1] << MB_INTERLACED;
}
if( !h->param.b_interlaced )
if( !PARAM_INTERLACED )
{
x264_copy_column8( h->mb.pic.p_fdec[0]-1+ 4*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+ 4*FDEC_STRIDE );
x264_copy_column8( h->mb.pic.p_fdec[0]-1+12*FDEC_STRIDE, h->mb.pic.p_fdec[0]+15+12*FDEC_STRIDE );
......@@ -921,9 +933,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
int i8 = x264_scan8[0] - 1 - 1*8;
if( h->mb.i_neighbour & MB_TOPLEFT )
{
int ir = 2*(s8x8*h->mb.i_mb_topleft_y + mb_x-1)+1+s8x8;
int iv = 4*(s4x4*h->mb.i_mb_topleft_y + mb_x-1)+3+3*s4x4;
if( h->mb.topleft_partition )
int ir = SLICE_MBAFF ? 2*(s8x8*h->mb.i_mb_topleft_y + mb_x-1)+1+s8x8 : top_8x8 - 1;
int iv = SLICE_MBAFF ? 4*(s4x4*h->mb.i_mb_topleft_y + mb_x-1)+3+3*s4x4 : top_4x4 - 1;
if( SLICE_MBAFF && h->mb.topleft_partition )
{
/* Take motion vector from the middle of macroblock instead of
* the bottom right as usual. */
......@@ -957,8 +969,10 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
i8 = x264_scan8[0] + 4 - 1*8;
if( h->mb.i_neighbour & MB_TOPRIGHT )
{
h->mb.cache.ref[l][i8] = ref[2*(s8x8*h->mb.i_mb_topright_y + (mb_x+1))+s8x8];
CP32( h->mb.cache.mv[l][i8], mv[4*(s4x4*h->mb.i_mb_topright_y + (mb_x+1))+3*s4x4] );
int ir = SLICE_MBAFF ? 2*(s8x8*h->mb.i_mb_topright_y + (mb_x+1))+s8x8 : top_8x8 + 2;
int iv = SLICE_MBAFF ? 4*(s4x4*h->mb.i_mb_topright_y + (mb_x+1))+3*s4x4 : top_4x4 + 4;
h->mb.cache.ref[l][i8] = ref[ir];
CP32( h->mb.cache.mv[l][i8], mv[iv] );
}
else
h->mb.cache.ref[l][i8] = -2;
......@@ -966,15 +980,32 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
i8 = x264_scan8[0] - 1;
if( h->mb.i_neighbour & MB_LEFT )
{
h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table->ref[0]];
h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table->ref[1]];
h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table->ref[2]];
h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table->ref[3]];
CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[1]] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[2]] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[3]] );
if( SLICE_MBAFF )
{
h->mb.cache.ref[l][i8+0*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[0]];
h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[LTOP] + 1 + s8x8*left_index_table->ref[1]];
h->mb.cache.ref[l][i8+2*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[2]];
h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[LBOT] + 1 + s8x8*left_index_table->ref[3]];
CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[LTOP] + 3 + s4x4*left_index_table->mv[1]] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[2]] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[LBOT] + 3 + s4x4*left_index_table->mv[3]] );
}
else
{
const int ir = h->mb.i_b8_xy - 1;
const int iv = h->mb.i_b4_xy - 1;
h->mb.cache.ref[l][i8+0*8] =
h->mb.cache.ref[l][i8+1*8] = ref[ir + 0*s8x8];
h->mb.cache.ref[l][i8+2*8] =
h->mb.cache.ref[l][i8+3*8] = ref[ir + 1*s8x8];
CP32( h->mb.cache.mv[l][i8+0*8], mv[iv + 0*s4x4] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[iv + 1*s4x4] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[iv + 2*s4x4] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[iv + 3*s4x4] );
}
}
else
{
......@@ -995,9 +1026,9 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
* above diagram do not exist, but the entries d, e and f exist (in
* the macroblock to the left) then use those instead.
*/
if( h->sh.b_mbaff && (h->mb.i_neighbour & MB_LEFT) )
if( SLICE_MBAFF && (h->mb.i_neighbour & MB_LEFT) )
{
if( h->mb.b_interlaced && !h->mb.field[h->mb.i_mb_xy-1] )
if( MB_INTERLACED && !h->mb.field[h->mb.i_mb_xy-1] )
{
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*0];
h->mb.cache.topright_ref[l][1] = ref[h->mb.left_b8[0] + 1 + s8x8*1];
......@@ -1006,7 +1037,7 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
CP32( h->mb.cache.topright_mv[l][1], mv[h->mb.left_b4[0] + 3 + s4x4*(left_index_table->mv[1]+1)] );
CP32( h->mb.cache.topright_mv[l][2], mv[h->mb.left_b4[1] + 3 + s4x4*(left_index_table->mv[2]+1)] );
}
else if( !h->mb.b_interlaced && h->mb.field[h->mb.i_mb_xy-1] )
else if( !MB_INTERLACED && h->mb.field[h->mb.i_mb_xy-1] )
{
// Looking at the bottom field so always take the bottom macroblock of the pair.
h->mb.cache.topright_ref[l][0] = ref[h->mb.left_b8[0] + 1 + s8x8*2 + s8x8*left_index_table->ref[0]];
......@@ -1026,20 +1057,20 @@ void x264_macroblock_cache_load( x264_t *h, int mb_x, int mb_y )
else
M64( h->mb.cache.mvd[l][x264_scan8[0] - 8] ) = 0;
if( h->mb.cache.ref[l][x264_scan8[0]-1] >= 0 )
if( h->mb.i_neighbour & MB_LEFT && (!SLICE_MBAFF || h->mb.cache.ref[l][x264_scan8[0]-1] >= 0) )
{
CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[0]][left_index_table->intra[0]] );
CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[0]][left_index_table->intra[1]] );
CP16( h->mb.cache.mvd[l][x264_scan8[0 ] - 1], mvd[left[LTOP]][left_index_table->intra[0]] );
CP16( h->mb.cache.mvd[l][x264_scan8[2 ] - 1], mvd[left[LTOP]][left_index_table->intra[1]] );
}
else
{
M16( h->mb.cache.mvd[l][x264_scan8[0]-1+0*8] ) = 0;
M16( h->mb.cache.mvd[l][x264_scan8[0]-1+1*8] ) = 0;
}
if( h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0 )
if( h->mb.i_neighbour & MB_LEFT && (!SLICE_MBAFF || h->mb.cache.ref[l][x264_scan8[0]-1+2*8] >=0) )
{
CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[1]][left_index_table->intra[2]] );
CP16( h->mb.cache.mvd[l][x264_scan8[10] - 1], mvd[left[1]][left_index_table->intra[3]] );
CP16( h->mb.cache.mvd[l][x264_scan8[8 ] - 1], mvd[left[LBOT]][left_index_table->intra[2]] );