Commit fcda8dd9 authored by Fiona Glaser's avatar Fiona Glaser
Browse files

Remove double-dereferences for MB width/height data

Store it in x264_t instead of going through the SPS.
parent 89463430
......@@ -517,6 +517,8 @@ struct x264_t
/* MB table and cache for current frame/mb */
struct
{
int i_mb_width;
int i_mb_height;
int i_mb_count; /* number of mbs in a frame */
/* Strides */
......
......@@ -305,7 +305,7 @@ void x264_frame_deblock_row( x264_t *h, int mb_y )
int strideuv = h->fdec->i_stride[1];
int stride2uv = strideuv << b_interlaced;
for( int mb_x = 0; mb_x < h->sps->i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
for( int mb_x = 0; mb_x < h->mb.i_mb_width; mb_x += (~b_interlaced | mb_y)&1, mb_y ^= b_interlaced )
{
x264_prefetch_fenc( h, h->fdec, mb_x, mb_y );
x264_macroblock_cache_load_neighbours_deblock( h, mb_x, mb_y );
......
......@@ -38,9 +38,9 @@ x264_frame_t *x264_frame_new( x264_t *h, int b_fdec )
CHECKED_MALLOCZERO( frame, sizeof(x264_frame_t) );
/* allocate frame data (+64 for extra data for me) */
i_width = h->sps->i_mb_width*16;
i_width = h->mb.i_mb_width*16;
i_stride = ALIGN( i_width + 2*PADH, align );
i_lines = h->sps->i_mb_height*16;
i_lines = h->mb.i_mb_height*16;
frame->i_plane = 3;
for( int i = 0; i < 3; i++ )
......@@ -286,8 +286,8 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y, int b_e
for( int i = 0; i < frame->i_plane; i++ )
{
int stride = frame->i_stride[i];
int width = 16*h->sps->i_mb_width >> !!i;
int height = (b_end ? 16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
int width = 16*h->mb.i_mb_width >> !!i;
int height = (b_end ? 16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff : 16) >> !!i;
int padh = PADH >> !!i;
int padv = PADV >> !!i;
// buffer: 2 chroma, 3 luma (rounded to 4) because deblocking goes beyond the top of the mb
......@@ -313,8 +313,8 @@ void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y
we want to expand border from the last filtered pixel */
int b_start = !mb_y;
int stride = frame->i_stride[0];
int width = 16*h->sps->i_mb_width + 8;
int height = b_end ? (16*(h->sps->i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
int width = 16*h->mb.i_mb_width + 8;
int height = b_end ? (16*(h->mb.i_mb_height - mb_y) >> h->sh.b_mbaff) + 16 : 16;
int padh = PADH - 4;
int padv = PADV - 8;
for( int i = 1; i < 4; i++ )
......@@ -344,8 +344,8 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
int i_subsample = i ? 1 : 0;
int i_width = h->param.i_width >> i_subsample;
int i_height = h->param.i_height >> i_subsample;
int i_padx = (h->sps->i_mb_width * 16 - h->param.i_width) >> i_subsample;
int i_pady = (h->sps->i_mb_height * 16 - h->param.i_height) >> i_subsample;
int i_padx = (h->mb.i_mb_width * 16 - h->param.i_width) >> i_subsample;
int i_pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> i_subsample;
if( i_padx )
{
......
......@@ -216,9 +216,9 @@ int x264_macroblock_cache_allocate( x264_t *h )
{
int i_mb_count = h->mb.i_mb_count;
h->mb.i_mb_stride = h->sps->i_mb_width;
h->mb.i_b8_stride = h->sps->i_mb_width * 2;
h->mb.i_b4_stride = h->sps->i_mb_width * 4;
h->mb.i_mb_stride = h->mb.i_mb_width;
h->mb.i_b8_stride = h->mb.i_mb_width * 2;
h->mb.i_b4_stride = h->mb.i_mb_width * 4;
h->mb.b_interlaced = h->param.b_interlaced;
......@@ -267,8 +267,8 @@ int x264_macroblock_cache_allocate( x264_t *h )
if( !h->param.i_sync_lookahead || h == h->thread[h->param.i_threads] )
{
// Fake analysis only works on lowres
i_stride = ALIGN( h->sps->i_mb_width*8 + 2*PADH, align );
luma_plane_size = i_stride * (h->sps->i_mb_height*8+2*i_padv);
i_stride = ALIGN( h->mb.i_mb_width*8 + 2*PADH, align );
luma_plane_size = i_stride * (h->mb.i_mb_height*8+2*i_padv);
// Only need 1 buffer for analysis
numweightbuf = 1;
}
......@@ -277,8 +277,8 @@ int x264_macroblock_cache_allocate( x264_t *h )
}
else
{
i_stride = ALIGN( h->sps->i_mb_width*16 + 2*PADH, align );
luma_plane_size = i_stride * (h->sps->i_mb_height*16+2*i_padv);
i_stride = ALIGN( h->mb.i_mb_width*16 + 2*PADH, align );
luma_plane_size = i_stride * (h->mb.i_mb_height*16+2*i_padv);
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART )
//SMART can weight one ref and one offset -1
......@@ -327,10 +327,10 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
for( int j = 0; j < 3; j++ )
{
/* shouldn't really be initialized, just silences a valgrind false-positive in predict_8x8_filter_mmx */
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], ((h->sps->i_mb_width*16+32)>>!!j) * sizeof(pixel) );
CHECKED_MALLOCZERO( h->intra_border_backup[i][j], ((h->mb.i_mb_width*16+32)>>!!j) * sizeof(pixel) );
h->intra_border_backup[i][j] += 8;
}
CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->sps->i_mb_width );
CHECKED_MALLOC( h->deblock_strength[i], sizeof(**h->deblock_strength) * h->mb.i_mb_width );
}
/* Allocate scratch buffer */
......@@ -344,7 +344,7 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
((me_range*2+18) * sizeof(int16_t) + (me_range+4) * (me_range+1) * 4 * sizeof(mvsad_t));
scratch_size = X264_MAX3( buf_hpel, buf_ssim, buf_tesa );
}
int buf_mbtree = h->param.rc.b_mb_tree * ((h->sps->i_mb_width+3)&~3) * sizeof(int);
int buf_mbtree = h->param.rc.b_mb_tree * ((h->mb.i_mb_width+3)&~3) * sizeof(int);
scratch_size = X264_MAX( scratch_size, buf_mbtree );
CHECKED_MALLOC( h->scratch_buffer, scratch_size );
......@@ -614,7 +614,7 @@ static void inline x264_macroblock_cache_load_neighbours( x264_t *h, int mb_x, i
}
}
if( mb_x < h->sps->i_mb_width - 1 && top + 1 >= 0 )
if( mb_x < h->mb.i_mb_width - 1 && top + 1 >= 0 )
{
h->mb.i_neighbour_frame |= MB_TOPRIGHT;
h->mb.i_mb_topright_xy = top + 1;
......
......@@ -455,9 +455,9 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int16_t mvc[
}
SET_TMVP(0,0);
if( h->mb.i_mb_x < h->sps->i_mb_width-1 )
if( h->mb.i_mb_x < h->mb.i_mb_width-1 )
SET_TMVP(1,0);
if( h->mb.i_mb_y < h->sps->i_mb_height-1 )
if( h->mb.i_mb_y < h->mb.i_mb_height-1 )
SET_TMVP(0,1);
#undef SET_TMVP
}
......
......@@ -92,7 +92,7 @@ static void mv( int x0, int y0, int16_t dmv[2], int ref, int zoom, char *col )
int x264_visualize_init( x264_t *h )
{
CHECKED_MALLOC( h->visualize, h->sps->i_mb_width * h->sps->i_mb_height * sizeof(visualize_t) );
CHECKED_MALLOC( h->visualize, h->mb.i_mb_width * h->mb.i_mb_height * sizeof(visualize_t) );
return 0;
fail:
return -1;
......@@ -170,11 +170,11 @@ void x264_visualize_show( x264_t *h )
else
disp_gray_zoom( 0, frame, width, height, stride, "fdec", zoom );
for( int mb_xy = 0; mb_xy < h->sps->i_mb_width * h->sps->i_mb_height; mb_xy++ )
for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
{
visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
const int mb_y = mb_xy / h->sps->i_mb_width;
const int mb_x = mb_xy % h->sps->i_mb_width;
const int mb_y = mb_xy / h->mb.i_mb_width;
const int mb_x = mb_xy % h->mb.i_mb_width;
char *const col = GET_STRING( mb_types, v->i_type );
int x = mb_x*16*zoom;
int y = mb_y*16*zoom;
......
......@@ -933,7 +933,9 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( x264_cqm_init( h ) < 0 )
goto fail;
h->mb.i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height;
h->mb.i_mb_width = h->sps->i_mb_width;
h->mb.i_mb_height = h->sps->i_mb_height;
h->mb.i_mb_count = h->mb.i_mb_width * h->mb.i_mb_height;
/* Init frames. */
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && !h->param.rc.b_stat_read )
......@@ -1577,7 +1579,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
{
memcpy( h->intra_border_backup[j][i],
h->fdec->plane[i] + ((mb_y*16 >> !!i) + j - 1 - h->sh.b_mbaff) * h->fdec->i_stride[i],
(h->sps->i_mb_width*16 >> !!i) * sizeof(pixel) );
(h->mb.i_mb_width*16 >> !!i) * sizeof(pixel) );
}
if( b_deblock )
......@@ -1586,7 +1588,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
if( b_hpel )
{
int end = mb_y == h->sps->i_mb_height;
int end = mb_y == h->mb.i_mb_height;
x264_frame_expand_border( h, h->fdec, min_y, end );
if( h->param.analyse.i_subpel_refine )
{
......@@ -1775,7 +1777,7 @@ static int x264_slice_write( x264_t *h )
/* If this isn't the first slice in the threadslice, set the slice QP
* equal to the last QP in the previous slice for more accurate
* CABAC initialization. */
if( h->sh.i_first_mb != h->i_threadslice_start * h->sps->i_mb_width )
if( h->sh.i_first_mb != h->i_threadslice_start * h->mb.i_mb_width )
{
h->sh.i_qp = h->mb.i_last_qp;
h->sh.i_qp_delta = h->sh.i_qp - h->pps->i_pic_init_qp;
......@@ -1794,11 +1796,11 @@ static int x264_slice_write( x264_t *h )
h->mb.i_last_qp = h->sh.i_qp;
h->mb.i_last_dqp = 0;
i_mb_y = h->sh.i_first_mb / h->sps->i_mb_width;
i_mb_x = h->sh.i_first_mb % h->sps->i_mb_width;
i_mb_y = h->sh.i_first_mb / h->mb.i_mb_width;
i_mb_x = h->sh.i_first_mb % h->mb.i_mb_width;
i_skip = 0;
while( (mb_xy = i_mb_x + i_mb_y * h->sps->i_mb_width) <= h->sh.i_last_mb )
while( (mb_xy = i_mb_x + i_mb_y * h->mb.i_mb_width) <= h->sh.i_last_mb )
{
int mb_spos = bs_pos(&h->out.bs) + x264_cabac_pos(&h->cabac);
if( h->param.i_slice_max_size > 0 )
......@@ -1973,11 +1975,11 @@ static int x264_slice_write( x264_t *h )
if( h->sh.b_mbaff )
{
i_mb_x += i_mb_y & 1;
i_mb_y ^= i_mb_x < h->sps->i_mb_width;
i_mb_y ^= i_mb_x < h->mb.i_mb_width;
}
else
i_mb_x++;
if( i_mb_x == h->sps->i_mb_width )
if( i_mb_x == h->mb.i_mb_width )
{
i_mb_y++;
i_mb_x = 0;
......@@ -2000,7 +2002,7 @@ static int x264_slice_write( x264_t *h )
if( x264_nal_end( h ) )
return -1;
if( h->sh.i_last_mb == (h->i_threadslice_end * h->sps->i_mb_width - 1) )
if( h->sh.i_last_mb == (h->i_threadslice_end * h->mb.i_mb_width - 1) )
{
h->stat.frame.i_misc_bits = bs_pos( &h->out.bs )
+ (h->out.i_nal*NALU_OVERHEAD * 8)
......@@ -2067,8 +2069,8 @@ static void *x264_slices_write( x264_t *h )
h->sh.i_last_mb = h->sh.i_first_mb + h->param.i_slice_max_mbs - 1;
else if( h->param.i_slice_count && !h->param.b_sliced_threads )
{
int height = h->sps->i_mb_height >> h->param.b_interlaced;
int width = h->sps->i_mb_width << h->param.b_interlaced;
int height = h->mb.i_mb_height >> h->param.b_interlaced;
int width = h->mb.i_mb_width << h->param.b_interlaced;
i_slice_num++;
h->sh.i_last_mb = (height * i_slice_num + h->param.i_slice_count/2) / h->param.i_slice_count * width - 1;
}
......@@ -2105,14 +2107,14 @@ static int x264_threaded_slices_write( x264_t *h )
t->param = h->param;
memcpy( &t->i_frame, &h->i_frame, offsetof(x264_t, rc) - offsetof(x264_t, i_frame) );
}
int height = h->sps->i_mb_height >> h->param.b_interlaced;
int height = h->mb.i_mb_height >> h->param.b_interlaced;
t->i_threadslice_start = ((height * i + h->param.i_slice_count/2) / h->param.i_threads) << h->param.b_interlaced;
t->i_threadslice_end = ((height * (i+1) + h->param.i_slice_count/2) / h->param.i_threads) << h->param.b_interlaced;
t->sh.i_first_mb = t->i_threadslice_start * h->sps->i_mb_width;
t->sh.i_last_mb = t->i_threadslice_end * h->sps->i_mb_width - 1;
t->sh.i_first_mb = t->i_threadslice_start * h->mb.i_mb_width;
t->sh.i_last_mb = t->i_threadslice_end * h->mb.i_mb_width - 1;
}
x264_stack_align( x264_analyse_weight_frame, h, h->sps->i_mb_height*16 + 16 );
x264_stack_align( x264_analyse_weight_frame, h, h->mb.i_mb_height*16 + 16 );
x264_threads_distribute_ratecontrol( h );
......@@ -2225,8 +2227,8 @@ int x264_encoder_encode( x264_t *h,
if( x264_frame_copy_picture( h, fenc, pic_in ) < 0 )
return -1;
if( h->param.i_width != 16 * h->sps->i_mb_width ||
h->param.i_height != 16 * h->sps->i_mb_height )
if( h->param.i_width != 16 * h->mb.i_mb_width ||
h->param.i_height != 16 * h->mb.i_mb_height )
x264_frame_expand_border_mod16( h, fenc );
fenc->i_frame = h->frames.i_input++;
......@@ -2418,16 +2420,16 @@ int x264_encoder_encode( x264_t *h,
h->b_queued_intra_refresh = 0;
/* PIR is currently only supported with ref == 1, so any intra frame effectively refreshes
* the whole frame and counts as an intra refresh. */
h->fdec->f_pir_position = h->sps->i_mb_width;
h->fdec->f_pir_position = h->mb.i_mb_width;
}
else if( h->fenc->i_type == X264_TYPE_P )
{
int pocdiff = (h->fdec->i_poc - h->fref0[0]->i_poc)/2;
float increment = X264_MAX( ((float)h->sps->i_mb_width-1) / h->param.i_keyint_max, 1 );
float increment = X264_MAX( ((float)h->mb.i_mb_width-1) / h->param.i_keyint_max, 1 );
h->fdec->f_pir_position = h->fref0[0]->f_pir_position;
h->fdec->i_frames_since_pir = h->fref0[0]->i_frames_since_pir + pocdiff;
if( h->fdec->i_frames_since_pir >= h->param.i_keyint_max ||
(h->b_queued_intra_refresh && h->fdec->f_pir_position + 0.5 >= h->sps->i_mb_width) )
(h->b_queued_intra_refresh && h->fdec->f_pir_position + 0.5 >= h->mb.i_mb_width) )
{
h->fdec->f_pir_position = 0;
h->fdec->i_frames_since_pir = 0;
......@@ -2475,7 +2477,7 @@ int x264_encoder_encode( x264_t *h,
if( h->fenc->i_type != X264_TYPE_IDR )
{
int time_to_recovery = X264_MIN( h->sps->i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
int time_to_recovery = X264_MIN( h->mb.i_mb_width - 1, h->param.i_keyint_max ) + h->param.i_bframe;
x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE );
x264_sei_recovery_point_write( h, &h->out.bs, time_to_recovery );
x264_nal_end( h );
......@@ -2524,7 +2526,7 @@ int x264_encoder_encode( x264_t *h,
/* Write frame */
h->i_threadslice_start = 0;
h->i_threadslice_end = h->sps->i_mb_height;
h->i_threadslice_end = h->mb.i_mb_height;
if( h->i_thread_frames > 1 )
{
if( x264_pthread_create( &h->thread_handle, NULL, (void*)x264_slices_write, h ) )
......@@ -2766,14 +2768,14 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
{
static const char mb_chars[] = { 'i', 'i', 'I', 'C', 'P', '8', 'S',
'D', '<', 'X', 'B', 'X', '>', 'B', 'B', 'B', 'B', '8', 'S' };
for( int mb_xy = 0; mb_xy < h->sps->i_mb_width * h->sps->i_mb_height; mb_xy++ )
for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ )
{
if( h->mb.type[mb_xy] < X264_MBTYPE_MAX && h->mb.type[mb_xy] >= 0 )
fprintf( stderr, "%c ", mb_chars[ h->mb.type[mb_xy] ] );
else
fprintf( stderr, "? " );
if( (mb_xy+1) % h->sps->i_mb_width == 0 )
if( (mb_xy+1) % h->mb.i_mb_width == 0 )
fprintf( stderr, "\n" );
}
}
......
......@@ -241,8 +241,8 @@ void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_off
* FIXME: while they're written in 5 significant digits, they're only tuned to 2. */
float strength;
float avg_adj = 0.f;
int width = h->sps->i_mb_width;
int height = h->sps->i_mb_height;
int width = h->mb.i_mb_width;
int height = h->mb.i_mb_height;
/* Initialize frame stats */
for( int i = 0; i < 3; i++ )
{
......@@ -1146,7 +1146,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
if( rc->b_vbv )
{
memset( h->fdec->i_row_bits, 0, h->sps->i_mb_height * sizeof(int) );
memset( h->fdec->i_row_bits, 0, h->mb.i_mb_height * sizeof(int) );
rc->row_pred = &rc->row_preds[h->sh.i_type];
rc->buffer_rate = h->fenc->i_cpb_duration * rc->vbv_max_rate * h->sps->vui.i_num_units_in_tick / h->sps->vui.i_time_scale;
update_vbv_plan( h, overhead );
......@@ -1166,7 +1166,7 @@ void x264_ratecontrol_start( x264_t *h, int i_force_qp, int overhead )
{
//384 * ( Max( PicSizeInMbs, fR * MaxMBPS ) + MaxMBPS * ( tr( 0 ) - tr,n( 0 ) ) ) / MinCR
double fr = 1. / 172;
int pic_size_in_mbs = h->sps->i_mb_width * h->sps->i_mb_height;
int pic_size_in_mbs = h->mb.i_mb_width * h->mb.i_mb_height;
rc->frame_size_maximum = 384 * 8 * X264_MAX( pic_size_in_mbs, fr*l->mbps ) / mincr;
}
else
......@@ -1283,7 +1283,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
rc->qpa_rc += rc->qpm;
rc->qpa_aq += h->mb.i_qp;
if( h->mb.i_mb_x != h->sps->i_mb_width - 1 || !rc->b_vbv )
if( h->mb.i_mb_x != h->mb.i_mb_width - 1 || !rc->b_vbv )
return;
h->fdec->f_row_qp[y] = rc->qpm;
......@@ -1320,7 +1320,7 @@ void x264_ratecontrol_mb( x264_t *h, int bits )
size_of_other_slices += h->thread[i]->rc->frame_size_estimated;
}
else
rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->sps->i_mb_width) );
rc->max_frame_error = X264_MAX( 0.05, 1.0 / (h->mb.i_mb_width) );
/* More threads means we have to be more cautious in letting ratecontrol use up extra bits. */
float rc_tol = buffer_left_planned / h->param.i_threads * rc->rate_tolerance;
......@@ -2258,7 +2258,7 @@ void x264_threads_merge_ratecontrol( x264_t *h )
for( int row = t->i_threadslice_start; row < t->i_threadslice_end; row++ )
size += h->fdec->i_row_satd[row];
int bits = t->stat.frame.i_mv_bits + t->stat.frame.i_tex_bits + t->stat.frame.i_misc_bits;
int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->sps->i_mb_width;
int mb_count = (t->i_threadslice_end - t->i_threadslice_start) * h->mb.i_mb_width;
update_predictor( &rc->pred[h->sh.i_type+5*i], qp2qscale( rct->qpa_rc/mb_count ), size, bits );
}
if( !i )
......
......@@ -119,7 +119,7 @@ static NOINLINE unsigned int x264_weight_cost( x264_t *h, x264_frame_t *fenc, pi
if( h->param.i_slice_count )
numslices = h->param.i_slice_count;
else if( h->param.i_slice_max_mbs )
numslices = (h->sps->i_mb_width * h->sps->i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
numslices = (h->mb.i_mb_width * h->mb.i_mb_height + h->param.i_slice_max_mbs-1) / h->param.i_slice_max_mbs;
else
numslices = 1;
/* FIXME: find a way to account for --slice-max-size?
......@@ -231,16 +231,16 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
const int b_bidir = (b < p1);
const int i_mb_x = h->mb.i_mb_x;
const int i_mb_y = h->mb.i_mb_y;
const int i_mb_stride = h->sps->i_mb_width;
const int i_mb_stride = h->mb.i_mb_width;
const int i_mb_xy = i_mb_x + i_mb_y * i_mb_stride;
const int i_stride = fenc->i_stride_lowres;
const int i_pel_offset = 8 * (i_mb_x + i_mb_y * i_stride);
const int i_bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor>>2) : 32;
int16_t (*fenc_mvs[2])[2] = { &frames[b]->lowres_mvs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mvs[1][p1-b-1][i_mb_xy] };
int (*fenc_costs[2]) = { &frames[b]->lowres_mv_costs[0][b-p0-1][i_mb_xy], &frames[b]->lowres_mv_costs[1][p1-b-1][i_mb_xy] };
int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->sps->i_mb_width - 1 &&
i_mb_y > 0 && i_mb_y < h->sps->i_mb_height - 1) ||
h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2;
int b_frame_score_mb = (i_mb_x > 0 && i_mb_x < h->mb.i_mb_width - 1 &&
i_mb_y > 0 && i_mb_y < h->mb.i_mb_height - 1) ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
ALIGNED_ARRAY_8( pixel, pix1,[9*FDEC_STRIDE] );
pixel *pix2 = pix1+8;
......@@ -256,13 +256,13 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
// no need for h->mb.mv_min[]
h->mb.mv_min_fpel[0] = -8*h->mb.i_mb_x - 4;
h->mb.mv_max_fpel[0] = 8*( h->sps->i_mb_width - h->mb.i_mb_x - 1 ) + 4;
h->mb.mv_max_fpel[0] = 8*( h->mb.i_mb_width - h->mb.i_mb_x - 1 ) + 4;
h->mb.mv_min_spel[0] = 4*( h->mb.mv_min_fpel[0] - 8 );
h->mb.mv_max_spel[0] = 4*( h->mb.mv_max_fpel[0] + 8 );
if( h->mb.i_mb_x >= h->sps->i_mb_width - 2 )
if( h->mb.i_mb_x >= h->mb.i_mb_width - 2 )
{
h->mb.mv_min_fpel[1] = -8*h->mb.i_mb_y - 4;
h->mb.mv_max_fpel[1] = 8*( h->sps->i_mb_height - h->mb.i_mb_y - 1 ) + 4;
h->mb.mv_max_fpel[1] = 8*( h->mb.i_mb_height - h->mb.i_mb_y - 1 ) + 4;
h->mb.mv_min_spel[1] = 4*( h->mb.mv_min_fpel[1] - 8 );
h->mb.mv_max_spel[1] = 4*( h->mb.mv_max_fpel[1] + 8 );
}
......@@ -364,14 +364,14 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
M32( mvc[0] ) = 0;
M32( mvc[2] ) = 0;
#define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
if( i_mb_x < h->sps->i_mb_width - 1 )
if( i_mb_x < h->mb.i_mb_width - 1 )
MVC( fenc_mv[1] );
if( i_mb_y < h->sps->i_mb_height - 1 )
if( i_mb_y < h->mb.i_mb_height - 1 )
{
MVC( fenc_mv[i_mb_stride] );
if( i_mb_x > 0 )
MVC( fenc_mv[i_mb_stride-1] );
if( i_mb_x < h->sps->i_mb_width - 1 )
if( i_mb_x < h->mb.i_mb_width - 1 )
MVC( fenc_mv[i_mb_stride+1] );
}
#undef MVC
......@@ -501,9 +501,9 @@ lowres_intra_mb:
#undef TRY_BIDIR
#define NUM_MBS\
(h->sps->i_mb_width > 2 && h->sps->i_mb_height > 2 ?\
(h->sps->i_mb_width - 2) * (h->sps->i_mb_height - 2) :\
h->sps->i_mb_width * h->sps->i_mb_height)
(h->mb.i_mb_width > 2 && h->mb.i_mb_height > 2 ?\
(h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) :\
h->mb.i_mb_width * h->mb.i_mb_height)
static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
......@@ -559,21 +559,21 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
{
for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
{
row_satd[h->mb.i_mb_y] = 0;
if( !frames[b]->b_intra_calculated )
row_satd_intra[h->mb.i_mb_y] = 0;
for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
}
}
else
{
for( h->mb.i_mb_y = h->sps->i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = h->sps->i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
for( h->mb.i_mb_y = h->mb.i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
}
......@@ -604,19 +604,19 @@ static int x264_slicetype_frame_cost_recalculate( x264_t *h, x264_frame_t **fram
int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
float *qp_offset = IS_X264_TYPE_B(frames[b]->i_type) ? frames[b]->f_qp_offset_aq : frames[b]->f_qp_offset;
x264_emms();
for( h->mb.i_mb_y = h->sps->i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
{
row_satd[ h->mb.i_mb_y ] = 0;
for( h->mb.i_mb_x = h->sps->i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
{
int i_mb_xy = h->mb.i_mb_x + h->mb.i_mb_y*h->mb.i_mb_stride;
int i_mb_cost = frames[b]->lowres_costs[b-p0][p1-b][i_mb_xy] & LOWRES_COST_MASK;
float qp_adj = qp_offset[i_mb_xy];
i_mb_cost = (i_mb_cost * x264_exp2fix8(qp_adj) + 128) >> 8;
row_satd[ h->mb.i_mb_y ] += i_mb_cost;
if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->sps->i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1) ||
h->sps->i_mb_width <= 2 || h->sps->i_mb_height <= 2 )
if( (h->mb.i_mb_y > 0 && h->mb.i_mb_y < h->mb.i_mb_height - 1 &&
h->mb.i_mb_x > 0 && h->mb.i_mb_x < h->mb.i_mb_width - 1) ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
{
i_score += i_mb_cost;
}
......@@ -659,17 +659,17 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
/* For non-reffed frames the source costs are always zero, so just memset one row and re-use it. */
if( !referenced )
memset( frames[b]->i_propagate_cost, 0, h->sps->i_mb_width * sizeof(uint16_t) );
memset( frames[b]->i_propagate_cost, 0, h->mb.i_mb_width * sizeof(uint16_t) );
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->sps->i_mb_height; h->mb.i_mb_y++ )
for( h->mb.i_mb_y = 0; h->mb.i_mb_y < h->mb.i_mb_height; h->mb.i_mb_y++ )
{
int mb_index = h->mb.i_mb_y*h->mb.i_mb_stride;
h->mc.mbtree_propagate_cost( buf, propagate_cost,
frames[b]->i_intra_cost+mb_index, frames[b]->lowres_costs[b-p0][p1-b]+mb_index,
frames[b]->i_inv_qscale_factor+mb_index, h->sps->i_mb_width );
frames[b]->i_inv_qscale_factor+mb_index, h->mb.i_mb_width );
if( referenced )
propagate_cost += h->sps->i_mb_width;
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->sps->i_mb_width; h->mb.i_mb_x++, mb_index++ )
propagate_cost += h->mb.i_mb_width;
for( h->mb.i_mb_x = 0; h->mb.i_mb_x < h->mb.i_mb_width; h->mb.i_mb_x++, mb_index++ )
{
int propagate_amount = buf[h->mb.i_mb_x];
/* Don't propagate for an intra block. */
......@@ -711,7 +711,7 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
/* We could just clip the MVs, but pixels that lie outside the frame probably shouldn't
* be counted. */
if( mbx < h->sps->i_mb_width-1 && mby < h->sps->i_mb_height-1 && mbx >= 0 && mby >= 0 )
if( mbx < h->mb.i_mb_width-1 && mby < h->mb.i_mb_height-1 && mbx >= 0 && mby >= 0 )
{
CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
......@@ -720,13 +720,13 @@ static void x264_macroblock_tree_propagate( x264_t *h, x264_frame_t **frames, in
}
else /* Check offsets individually */
{
if( mbx < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx >= 0 && mby >= 0 )
if( mbx < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx >= 0 && mby >= 0 )
CLIP_ADD( ref_costs[list][idx0], (listamount*idx0weight+512)>>10 );
if( mbx+1 < h->sps->i_mb_width && mby < h->sps->i_mb_height && mbx+1 >= 0 && mby >= 0 )
if( mbx+1 < h->mb.i_mb_width && mby < h->mb.i_mb_height && mbx+1 >= 0 && mby >= 0 )
CLIP_ADD( ref_costs[list][idx1], (listamount*idx1weight+512)>>10 );
if( mbx < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx >= 0 && mby+1 >= 0 )
if( mbx < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx >= 0 && mby+1 >= 0 )
CLIP_ADD( ref_costs[list][idx2], (listamount*idx2weight+512)>>10 );
if( mbx+1 < h->sps->i_mb_width && mby+1 < h->sps->i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
if( mbx+1 < h->mb.i_mb_width && mby+1 < h->mb.i_mb_height && mbx+1 >= 0 && mby+1 >= 0 )
CLIP_ADD( ref_costs[list][idx3], (listamount*idx3weight+512)>>10 );