Commit df700eae authored by Fiona Glaser's avatar Fiona Glaser

Threaded lookahead

Split each lookahead frame analysis call into multiple threads.  Has a small
impact on quality, but does not seem to be consistently any worse.

This helps alleviate bottlenecks with many cores and frame threads. In many
case, this massively increases performance on many-core systems.  For example,
over 100% faster 1080p encoding with --preset veryfast on a 12-core i7 system.
Realtime 1080p30 at --preset slow should now be feasible on real systems.

For sliced-threads, this patch should be faster regardless of settings (~10%).

By default, lookahead threads are 1/6 of regular threads.  This isn't exacting,
but it seems to work well for all presets on real systems.  With sliced-threads,
it's the same as the number of encoding threads.
parent 7cfe43cc
......@@ -50,6 +50,7 @@ void x264_param_default( x264_param_t *param )
/* CPU autodetect */
param->cpu = x264_cpu_detect();
param->i_threads = X264_THREADS_AUTO;
param->i_lookahead_threads = X264_THREADS_AUTO;
param->b_deterministic = 1;
param->i_sync_lookahead = X264_SYNC_LOOKAHEAD_AUTO;
......@@ -632,6 +633,13 @@ int x264_param_parse( x264_param_t *p, const char *name, const char *value )
else
p->i_threads = atoi(value);
}
OPT("lookahead-threads")
{
if( !strcmp(value, "auto") )
p->i_lookahead_threads = X264_THREADS_AUTO;
else
p->i_lookahead_threads = atoi(value);
}
OPT("sliced-threads")
p->b_sliced_threads = atobool(value);
OPT("sync-lookahead")
......@@ -1285,6 +1293,7 @@ char *x264_param2string( x264_param_t *p, int b_res )
s += sprintf( s, " fast_pskip=%d", p->analyse.b_fast_pskip );
s += sprintf( s, " chroma_qp_offset=%d", p->analyse.i_chroma_qp_offset );
s += sprintf( s, " threads=%d", p->i_threads );
s += sprintf( s, " lookahead_threads=%d", p->i_lookahead_threads );
s += sprintf( s, " sliced_threads=%d", p->b_sliced_threads );
if( p->i_slice_count )
s += sprintf( s, " slices=%d", p->i_slice_count );
......
......@@ -56,6 +56,7 @@ do {\
#define X264_BFRAME_MAX 16
#define X264_REF_MAX 16
#define X264_THREAD_MAX 128
#define X264_LOOKAHEAD_THREAD_MAX 16
#define X264_PCM_COST (FRAME_SIZE(256*BIT_DEPTH)+16)
#define X264_LOOKAHEAD_MAX 250
#define QP_BD_OFFSET (6*(BIT_DEPTH-8))
......@@ -469,6 +470,7 @@ struct x264_t
x264_param_t param;
x264_t *thread[X264_THREAD_MAX+1];
x264_t *lookahead_thread[X264_LOOKAHEAD_THREAD_MAX];
int b_thread_active;
int i_thread_phase; /* which thread to use for the next frame */
int i_thread_idx; /* which thread this is */
......@@ -476,6 +478,7 @@ struct x264_t
int i_threadslice_end; /* row after the end of this thread slice */
int i_threadslice_pass; /* which pass of encoding we are on */
x264_threadpool_t *threadpool;
x264_threadpool_t *lookaheadpool;
x264_pthread_mutex_t mutex;
x264_pthread_cond_t cv;
......@@ -915,6 +918,7 @@ struct x264_t
/* Buffers that are allocated per-thread even in sliced threads. */
void *scratch_buffer; /* for any temporary storage that doesn't want repeated malloc */
void *scratch_buffer2; /* if the first one's already in use */
pixel *intra_border_backup[5][3]; /* bottom pixels of the previous mb row, used for intra prediction after the framebuffer has been deblocked */
/* Deblock strength values are stored for each 4x4 partition. In MBAFF
* there are four extra values that need to be stored, located in [4][i]. */
......
......@@ -401,6 +401,9 @@ int x264_macroblock_thread_allocate( x264_t *h, int b_lookahead )
else
h->scratch_buffer = NULL;
int buf_lookahead_threads = (h->mb.i_mb_height + (4 + 32) * h->param.i_lookahead_threads) * sizeof(int) * 2;
CHECKED_MALLOC( h->scratch_buffer2, buf_lookahead_threads );
return 0;
fail:
return -1;
......@@ -418,6 +421,7 @@ void x264_macroblock_thread_free( x264_t *h, int b_lookahead )
x264_free( h->intra_border_backup[i][j] - 16 );
}
x264_free( h->scratch_buffer );
x264_free( h->scratch_buffer2 );
}
void x264_macroblock_slice_init( x264_t *h )
......
......@@ -66,7 +66,7 @@ static void x264_threadpool_thread( x264_threadpool_t *pool )
x264_pthread_mutex_unlock( &pool->run.mutex );
if( !job )
continue;
job->ret = job->func( job->arg ); /* execute the function */
job->ret = (void*)x264_stack_align( job->func, job->arg ); /* execute the function */
x264_sync_frame_list_push( &pool->done, (void*)job );
}
}
......@@ -83,7 +83,7 @@ int x264_threadpool_init( x264_threadpool_t **p_pool, int threads,
pool->init_func = init_func;
pool->init_arg = init_arg;
pool->threads = X264_MIN( threads, X264_THREAD_MAX );
pool->threads = threads;
CHECKED_MALLOC( pool->thread_handle, pool->threads * sizeof(x264_pthread_t) );
......
......@@ -395,6 +395,15 @@ static void x264_encoder_thread_init( x264_t *h )
x264_cpu_mask_misalign_sse();
#endif
}
static void x264_lookahead_thread_init( x264_t *h )
{
#if HAVE_MMX
/* Misalign mask has to be set separately for each thread. */
if( h->param.cpu&X264_CPU_SSE_MISALIGN )
x264_cpu_mask_misalign_sse();
#endif
}
#endif
/****************************************************************************
......@@ -494,6 +503,9 @@ static int x264_validate_parameters( x264_t *h, int b_open )
if( h->param.i_threads == X264_THREADS_AUTO )
h->param.i_threads = x264_cpu_num_processors() * (h->param.b_sliced_threads?2:3)/2;
if( h->param.i_lookahead_threads == X264_THREADS_AUTO )
h->param.i_lookahead_threads = h->param.i_threads / (h->param.b_sliced_threads?1:6);
int max_sliced_threads = X264_MAX( 1, (h->param.i_height+15)/16 / 4 );
if( h->param.i_threads > 1 )
{
#if !HAVE_THREAD
......@@ -503,14 +515,15 @@ static int x264_validate_parameters( x264_t *h, int b_open )
/* Avoid absurdly small thread slices as they can reduce performance
* and VBV compliance. Capped at an arbitrary 4 rows per thread. */
if( h->param.b_sliced_threads )
{
int max_threads = (h->param.i_height+15)/16 / 4;
h->param.i_threads = X264_MIN( h->param.i_threads, max_threads );
}
h->param.i_threads = X264_MIN( h->param.i_threads, max_sliced_threads );
}
h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_THREAD_MAX );
h->param.i_lookahead_threads = x264_clip3( h->param.i_lookahead_threads, 1, X264_MIN( max_sliced_threads, X264_LOOKAHEAD_THREAD_MAX ) );
if( h->param.i_threads == 1 )
{
h->param.b_sliced_threads = 0;
h->param.i_lookahead_threads = 1;
}
h->i_thread_frames = h->param.b_sliced_threads ? 1 : h->param.i_threads;
if( h->i_thread_frames > 1 )
h->param.nalu_process = NULL;
......@@ -1271,10 +1284,19 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( h->param.i_threads > 1 &&
x264_threadpool_init( &h->threadpool, h->param.i_threads, (void*)x264_encoder_thread_init, h ) )
goto fail;
if( h->param.i_lookahead_threads > 1 &&
x264_threadpool_init( &h->lookaheadpool, h->param.i_lookahead_threads, (void*)x264_lookahead_thread_init, h ) )
goto fail;
h->thread[0] = h;
for( int i = 1; i < h->param.i_threads + !!h->param.i_sync_lookahead; i++ )
CHECKED_MALLOC( h->thread[i], sizeof(x264_t) );
if( h->param.i_lookahead_threads > 1 )
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
CHECKED_MALLOC( h->lookahead_thread[i], sizeof(x264_t) );
*h->lookahead_thread[i] = *h;
}
for( int i = 0; i < h->param.i_threads; i++ )
{
......@@ -3457,6 +3479,8 @@ void x264_encoder_close ( x264_t *h )
x264_threadpool_wait_all( h );
if( h->param.i_threads > 1 )
x264_threadpool_delete( h->threadpool );
if( h->param.i_lookahead_threads > 1 )
x264_threadpool_delete( h->lookaheadpool );
if( h->i_thread_frames > 1 )
{
for( int i = 0; i < h->i_thread_frames; i++ )
......@@ -3766,6 +3790,10 @@ void x264_encoder_close ( x264_t *h )
if( h->thread[i]->fref[0][j] && h->thread[i]->fref[0][j]->b_duplicate )
x264_frame_delete( h->thread[i]->fref[0][j] );
if( h->param.i_lookahead_threads > 1 )
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
x264_free( h->lookahead_thread[i] );
for( int i = h->param.i_threads - 1; i >= 0; i-- )
{
x264_frame_t **frame;
......
......@@ -424,9 +424,21 @@ static void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *r
}
}
/* Output buffers are separated by 128 bytes to avoid false sharing of cachelines
* in multithreaded lookahead. */
#define PAD_SIZE 32
/* cost_est, cost_est_aq, intra_mbs, num rows */
#define NUM_INTS 4
#define COST_EST 0
#define COST_EST_AQ 1
#define INTRA_MBS 2
#define NUM_ROWS 3
#define ROW_SATD (NUM_INTS + (h->mb.i_mb_y - h->i_threadslice_start))
static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
int dist_scale_factor, int do_search[2], const x264_weight_t *w )
int dist_scale_factor, int do_search[2], const x264_weight_t *w,
int *output_inter, int *output_intra )
{
x264_frame_t *fref0 = frames[p0];
x264_frame_t *fref1 = frames[p1];
......@@ -571,7 +583,7 @@ static void x264_slicetype_mb_cost( x264_t *h, x264_mb_analysis_t *a,
#define MVC(mv) { CP32( mvc[i_mvc], mv ); i_mvc++; }
if( i_mb_x < h->mb.i_mb_width - 1 )
MVC( fenc_mv[1] );
if( i_mb_y < h->mb.i_mb_height - 1 )
if( i_mb_y < h->i_threadslice_end - 1 )
{
MVC( fenc_mv[i_mb_stride] );
if( i_mb_x > 0 )
......@@ -653,11 +665,11 @@ lowres_intra_mb:
int i_icost_aq = i_icost;
if( h->param.rc.i_aq_mode )
i_icost_aq = (i_icost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
fenc->i_row_satds[0][0][h->mb.i_mb_y] += i_icost_aq;
output_intra[ROW_SATD] += i_icost_aq;
if( b_frame_score_mb )
{
fenc->i_cost_est[0][0] += i_icost;
fenc->i_cost_est_aq[0][0] += i_icost_aq;
output_intra[COST_EST] += i_icost;
output_intra[COST_EST_AQ] += i_icost_aq;
}
}
i_bcost += lowres_penalty;
......@@ -674,7 +686,7 @@ lowres_intra_mb:
list_used = 0;
}
if( b_frame_score_mb )
fenc->i_intra_mbs[b-p0] += b_intra;
output_inter[INTRA_MBS] += b_intra;
}
/* In an I-frame, we've already added the results above in the intra section. */
......@@ -683,12 +695,12 @@ lowres_intra_mb:
int i_bcost_aq = i_bcost;
if( h->param.rc.i_aq_mode )
i_bcost_aq = (i_bcost_aq * fenc->i_inv_qscale_factor[i_mb_xy] + 128) >> 8;
fenc->i_row_satds[b-p0][p1-b][h->mb.i_mb_y] += i_bcost_aq;
output_inter[ROW_SATD] += i_bcost_aq;
if( b_frame_score_mb )
{
/* Don't use AQ-weighted costs for slicetype decision, only for ratecontrol. */
fenc->i_cost_est[b-p0][p1-b] += i_bcost;
fenc->i_cost_est_aq[b-p0][p1-b] += i_bcost_aq;
output_inter[COST_EST] += i_bcost;
output_inter[COST_EST_AQ] += i_bcost_aq;
}
}
......@@ -701,6 +713,43 @@ lowres_intra_mb:
(h->mb.i_mb_width - 2) * (h->mb.i_mb_height - 2) :\
h->mb.i_mb_width * h->mb.i_mb_height)
typedef struct
{
x264_t *h;
x264_mb_analysis_t *a;
x264_frame_t **frames;
int p0;
int p1;
int b;
int dist_scale_factor;
int *do_search;
const x264_weight_t *w;
int *output_inter;
int *output_intra;
} x264_slicetype_slice_t;
static void x264_slicetype_slice_cost( x264_slicetype_slice_t *s )
{
x264_t *h = s->h;
/* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
* This considerably improves MV prediction overall. */
/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
int do_edges = h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size || h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2;
int start_y = X264_MIN( h->i_threadslice_end - 1, h->mb.i_mb_height - 2 + do_edges );
int end_y = X264_MAX( h->i_threadslice_start, 1 - do_edges );
int start_x = h->mb.i_mb_width - 2 + do_edges;
int end_x = 1 - do_edges;
for( h->mb.i_mb_y = start_y; h->mb.i_mb_y >= end_y; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = start_x; h->mb.i_mb_x >= end_x; h->mb.i_mb_x-- )
x264_slicetype_mb_cost( h, s->a, s->frames, s->p0, s->p1, s->b, s->dist_scale_factor,
s->do_search, s->w, s->output_inter, s->output_intra );
}
static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
x264_frame_t **frames, int p0, int p1, int b,
int b_intra_penalty )
......@@ -708,77 +757,131 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
int i_score = 0;
int do_search[2];
const x264_weight_t *w = x264_weight_none;
x264_frame_t *fenc = frames[b];
/* Check whether we already evaluated this frame
* If we have tried this frame as P, then we have also tried
* the preceding frames as B. (is this still true?) */
/* Also check that we already calculated the row SATDs for the current frame. */
if( frames[b]->i_cost_est[b-p0][p1-b] >= 0 && (!h->param.rc.i_vbv_buffer_size || frames[b]->i_row_satds[b-p0][p1-b][0] != -1) )
i_score = frames[b]->i_cost_est[b-p0][p1-b];
if( fenc->i_cost_est[b-p0][p1-b] >= 0 && (!h->param.rc.i_vbv_buffer_size || fenc->i_row_satds[b-p0][p1-b][0] != -1) )
i_score = fenc->i_cost_est[b-p0][p1-b];
else
{
int dist_scale_factor = 128;
int *row_satd = frames[b]->i_row_satds[b-p0][p1-b];
int *row_satd_intra = frames[b]->i_row_satds[0][0];
/* For each list, check to see whether we have lowres motion-searched this reference frame before. */
do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
do_search[0] = b != p0 && fenc->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
do_search[1] = b != p1 && fenc->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
if( do_search[0] )
{
if( h->param.analyse.i_weighted_pred && b == p1 )
{
x264_emms();
x264_weights_analyse( h, frames[b], frames[p0], 1 );
w = frames[b]->weight[0];
x264_weights_analyse( h, fenc, frames[p0], 1 );
w = fenc->weight[0];
}
frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
fenc->lowres_mvs[0][b-p0-1][0][0] = 0;
}
if( do_search[1] ) frames[b]->lowres_mvs[1][p1-b-1][0][0] = 0;
if( do_search[1] ) fenc->lowres_mvs[1][p1-b-1][0][0] = 0;
if( b == p1 )
frames[b]->i_intra_mbs[b-p0] = 0;
if( !frames[b]->b_intra_calculated )
{
frames[b]->i_cost_est[0][0] = 0;
frames[b]->i_cost_est_aq[0][0] = 0;
}
if( p1 != p0 )
dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
frames[b]->i_cost_est[b-p0][p1-b] = 0;
frames[b]->i_cost_est_aq[b-p0][p1-b] = 0;
/* Lowres lookahead goes backwards because the MVs are used as predictors in the main encode.
* This considerably improves MV prediction overall. */
int output_buf_size = h->mb.i_mb_height + (NUM_INTS + PAD_SIZE) * h->param.i_lookahead_threads;
int *output_inter[X264_LOOKAHEAD_THREAD_MAX+1];
int *output_intra[X264_LOOKAHEAD_THREAD_MAX+1];
output_inter[0] = h->scratch_buffer2;
output_intra[0] = output_inter[0] + output_buf_size;
/* The edge mbs seem to reduce the predictive quality of the
* whole frame's score, but are needed for a spatial distribution. */
if( h->param.rc.b_mb_tree || h->param.rc.i_vbv_buffer_size ||
h->mb.i_mb_width <= 2 || h->mb.i_mb_height <= 2 )
if( h->param.i_lookahead_threads > 1 )
{
for( h->mb.i_mb_y = h->mb.i_mb_height - 1; h->mb.i_mb_y >= 0; h->mb.i_mb_y-- )
x264_slicetype_slice_t s[X264_LOOKAHEAD_THREAD_MAX];
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
row_satd[h->mb.i_mb_y] = 0;
if( !frames[b]->b_intra_calculated )
row_satd_intra[h->mb.i_mb_y] = 0;
for( h->mb.i_mb_x = h->mb.i_mb_width - 1; h->mb.i_mb_x >= 0; h->mb.i_mb_x-- )
x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
x264_t *t = h->lookahead_thread[i];
/* FIXME move this somewhere else */
t->mb.i_me_method = h->mb.i_me_method;
t->mb.i_subpel_refine = h->mb.i_subpel_refine;
t->mb.b_chroma_me = h->mb.b_chroma_me;
s[i] = (x264_slicetype_slice_t){ t, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[i], output_intra[i] };
t->i_threadslice_start = ((h->mb.i_mb_height * i + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
t->i_threadslice_end = ((h->mb.i_mb_height * (i+1) + h->param.i_lookahead_threads/2) / h->param.i_lookahead_threads);
int thread_height = t->i_threadslice_end - t->i_threadslice_start;
int thread_output_size = thread_height + NUM_INTS;
memset( output_inter[i], 0, thread_output_size * sizeof(int) );
memset( output_intra[i], 0, thread_output_size * sizeof(int) );
output_inter[i][NUM_ROWS] = output_intra[i][NUM_ROWS] = thread_height;
output_inter[i+1] = output_inter[i] + thread_output_size + PAD_SIZE;
output_intra[i+1] = output_intra[i] + thread_output_size + PAD_SIZE;
x264_threadpool_run( h->lookaheadpool, (void*)x264_slicetype_slice_cost, &s[i] );
}
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
x264_threadpool_wait( h->lookaheadpool, &s[i] );
}
else
{
for( h->mb.i_mb_y = h->mb.i_mb_height - 2; h->mb.i_mb_y >= 1; h->mb.i_mb_y-- )
for( h->mb.i_mb_x = h->mb.i_mb_width - 2; h->mb.i_mb_x >= 1; h->mb.i_mb_x-- )
x264_slicetype_mb_cost( h, a, frames, p0, p1, b, dist_scale_factor, do_search, w );
h->i_threadslice_start = 0;
h->i_threadslice_end = h->mb.i_mb_height;
memset( output_inter[0], 0, (output_buf_size - PAD_SIZE) * sizeof(int) );
memset( output_intra[0], 0, (output_buf_size - PAD_SIZE) * sizeof(int) );
output_inter[0][NUM_ROWS] = output_intra[0][NUM_ROWS] = h->mb.i_mb_height;
x264_slicetype_slice_t s = (x264_slicetype_slice_t){ h, a, frames, p0, p1, b, dist_scale_factor, do_search, w,
output_inter[0], output_intra[0] };
x264_slicetype_slice_cost( &s );
}
/* Sum up accumulators */
if( b == p1 )
fenc->i_intra_mbs[b-p0] = 0;
if( !fenc->b_intra_calculated )
{
fenc->i_cost_est[0][0] = 0;
fenc->i_cost_est_aq[0][0] = 0;
}
fenc->i_cost_est[b-p0][p1-b] = 0;
fenc->i_cost_est_aq[b-p0][p1-b] = 0;
int *row_satd_inter = fenc->i_row_satds[b-p0][p1-b];
int *row_satd_intra = fenc->i_row_satds[0][0];
for( int i = 0; i < h->param.i_lookahead_threads; i++ )
{
if( b == p1 )
fenc->i_intra_mbs[b-p0] += output_inter[i][INTRA_MBS];
if( !fenc->b_intra_calculated )
{
fenc->i_cost_est[0][0] += output_intra[i][COST_EST];
fenc->i_cost_est_aq[0][0] += output_intra[i][COST_EST_AQ];
}
fenc->i_cost_est[b-p0][p1-b] += output_inter[i][COST_EST];
fenc->i_cost_est_aq[b-p0][p1-b] += output_inter[i][COST_EST_AQ];
if( h->param.rc.i_vbv_buffer_size )
{
int row_count = output_inter[i][NUM_ROWS];
memcpy( row_satd_inter, output_inter[i] + NUM_INTS, row_count * sizeof(int) );
if( !fenc->b_intra_calculated )
memcpy( row_satd_intra, output_intra[i] + NUM_INTS, row_count * sizeof(int) );
row_satd_inter += row_count;
row_satd_intra += row_count;
}
}
i_score = frames[b]->i_cost_est[b-p0][p1-b];
i_score = fenc->i_cost_est[b-p0][p1-b];
if( b != p1 )
i_score = (uint64_t)i_score * 100 / (120 + h->param.i_bframe_bias);
else
frames[b]->b_intra_calculated = 1;
fenc->b_intra_calculated = 1;
frames[b]->i_cost_est[b-p0][p1-b] = i_score;
fenc->i_cost_est[b-p0][p1-b] = i_score;
x264_emms();
}
......@@ -786,7 +889,7 @@ static int x264_slicetype_frame_cost( x264_t *h, x264_mb_analysis_t *a,
{
// arbitrary penalty for I-blocks after B-frames
int nmb = NUM_MBS;
i_score += (uint64_t)i_score * frames[b]->i_intra_mbs[b-p0] / (nmb * 8);
i_score += (uint64_t)i_score * fenc->i_intra_mbs[b-p0] / (nmb * 8);
}
return i_score;
}
......
......@@ -797,6 +797,7 @@ static void help( x264_param_t *defaults, int longhelp )
H1( " --psnr Enable PSNR computation\n" );
H1( " --ssim Enable SSIM computation\n" );
H1( " --threads <integer> Force a specific number of threads\n" );
H2( " --lookahead-threads <integer> Force a specific number of lookahead threads\n" );
H2( " --sliced-threads Low-latency but lower-efficiency threading\n" );
H2( " --thread-input Run Avisynth in its own thread\n" );
H2( " --sync-lookahead <integer> Number of buffer frames for threaded lookahead\n" );
......@@ -965,6 +966,7 @@ static struct option long_options[] =
{ "zones", required_argument, NULL, 0 },
{ "qpfile", required_argument, NULL, OPT_QPFILE },
{ "threads", required_argument, NULL, 0 },
{ "lookahead-threads", required_argument, NULL, 0 },
{ "sliced-threads", no_argument, NULL, 0 },
{ "no-sliced-threads", no_argument, NULL, 0 },
{ "slice-max-size", required_argument, NULL, 0 },
......
......@@ -41,7 +41,7 @@
#include "x264_config.h"
#define X264_BUILD 124
#define X264_BUILD 125
/* Application developers planning to link against a shared library version of
* libx264 from a Microsoft Visual Studio or similar development environment
......@@ -254,7 +254,8 @@ typedef struct x264_param_t
{
/* CPU flags */
unsigned int cpu;
int i_threads; /* encode multiple frames in parallel */
int i_threads; /* encode multiple frames in parallel */
int i_lookahead_threads; /* multiple threads for lookahead analysis */
int b_sliced_threads; /* Whether to use slice-based threading. */
int b_deterministic; /* whether to allow non-deterministic optimizations when threaded */
int b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment