Commit 10851d0e authored by Loren Merritt's avatar Loren Merritt

multithreading (via slices)


git-svn-id: svn://svn.videolan.org/x264/trunk@240 df754926-b1dd-0310-bc7b-ec298dee348c
parent 36f6321d
......@@ -45,6 +45,7 @@ void x264_param_default( x264_param_t *param )
/* CPU autodetect */
param->cpu = x264_cpu_detect();
param->i_threads = 1;
/* Video properties */
param->i_csp = X264_CSP_I420;
......
......@@ -121,6 +121,7 @@ typedef struct
int i_type;
int i_first_mb;
int i_last_mb;
int i_pps_id;
......@@ -195,6 +196,8 @@ static const int x264_scan8[16+2*4] =
*/
#define X264_BFRAME_MAX 16
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
typedef struct x264_ratecontrol_t x264_ratecontrol_t;
typedef struct x264_vlc_table_t x264_vlc_table_t;
......@@ -204,11 +207,13 @@ struct x264_t
/* encoder parameters */
x264_param_t param;
x264_t *thread[X264_SLICE_MAX];
/* bitstream output */
struct
{
int i_nal;
x264_nal_t nal[5]; /* for now 5 is enough */
x264_nal_t nal[X264_NAL_MAX];
int i_bitstream; /* size of p_bitstream */
uint8_t *p_bitstream; /* will hold data for all nal */
bs_t bs;
......@@ -223,6 +228,10 @@ struct x264_t
int i_poc_lsb; /* decoding only */
int i_poc; /* decoding only */
int i_thread_num; /* threads only */
int i_nal_type; /* threads only */
int i_nal_ref_idc; /* threads only */
/* We use only one SPS and one PPS */
x264_sps_t sps_array[32];
x264_sps_t *sps;
......@@ -315,7 +324,10 @@ struct x264_t
int mv_min_fpel[2];
int mv_max_fpel[2];
/* neighboring MBs */
unsigned int i_neighbour;
int i_mb_type_top;
int i_mb_type_left;
/* mb table */
int8_t *type; /* mb type */
......
......@@ -522,7 +522,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
}
/* spatial */
if( h->mb.i_mb_x > 0 )
if( h->mb.i_neighbour & MB_LEFT )
{
int i_mb_l = h->mb.i_mb_xy - 1;
/* skip MBs didn't go through the whole search process, so mvr is undefined */
......@@ -533,7 +533,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
i++;
}
}
if( h->mb.i_mb_y > 0 )
if( h->mb.i_neighbour & MB_TOP )
{
int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( !IS_SKIP( h->mb.type[i_mb_t] ) )
......@@ -543,7 +543,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
i++;
}
if( h->mb.i_mb_x > 0 && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
if( h->mb.i_neighbour & MB_TOPLEFT && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
{
mvc[i][0] = mvr[i_mb_t - 1][0];
mvc[i][1] = mvr[i_mb_t - 1][1];
......@@ -944,7 +944,6 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.pic.i_stride[i] = i_stride;
h->mb.pic.p_fenc[i] = &h->fenc->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
h->mb.pic.p_fdec[i] = &h->fdec->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
for( j = 0; j < h->i_ref0; j++ )
......@@ -960,9 +959,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
/* load cache */
if( i_mb_y > 0 )
if( h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride )
{
i_top_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
h->mb.i_mb_type_top =
i_top_type= h->mb.type[i_top_xy];
h->mb.i_neighbour |= MB_TOP;
......@@ -987,6 +987,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
else
{
h->mb.i_mb_type_top = -1;
/* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[1] - 8] =
......@@ -1005,9 +1007,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
if( i_mb_x > 0 )
if( i_mb_x > 0 && h->mb.i_mb_xy > h->sh.i_first_mb )
{
i_left_xy = h->mb.i_mb_xy - 1;
h->mb.i_mb_type_left =
i_left_type= h->mb.type[i_left_xy];
h->mb.i_neighbour |= MB_LEFT;
......@@ -1032,6 +1035,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
else
{
h->mb.i_mb_type_left = -1;
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] =
......@@ -1048,10 +1053,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80;
}
if( i_mb_y > 0 && i_mb_x < h->sps->i_mb_width - 1 )
{
if( i_mb_x < h->sps->i_mb_width - 1 && h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride - 1 )
h->mb.i_neighbour |= MB_TOPRIGHT;
}
if( i_mb_x > 0 && h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride + 1 )
h->mb.i_neighbour |= MB_TOPLEFT;
/* load ref/mv/mvd */
if( h->sh.i_type != SLICE_TYPE_I )
......@@ -1059,20 +1064,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
const int s8x8 = h->mb.i_b8_stride;
const int s4x4 = h->mb.i_b4_stride;
int i_top_left_xy = -1;
int i_top_right_xy = -1;
int i_list;
if( h->mb.i_mb_y > 0 && h->mb.i_mb_x > 0 )
{
i_top_left_xy = i_top_xy - 1;
}
if( h->mb.i_mb_y > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
{
i_top_right_xy = i_top_xy + 1;
}
for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{
/*
......@@ -1081,7 +1074,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.cache.ref[i_list][x264_scan8[13]+1] = -2;
*/
if( i_top_left_xy >= 0 )
if( h->mb.i_neighbour & MB_TOPLEFT )
{
const int i8 = x264_scan8[0] - 1 - 1*8;
const int ir = i_mb_8x8 - s8x8 - 1;
......@@ -1126,7 +1119,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
}
if( i_top_right_xy >= 0 )
if( h->mb.i_neighbour & MB_TOPRIGHT )
{
const int i8 = x264_scan8[0] + 4 - 1*8;
const int ir = i_mb_8x8 - s8x8 + 2;
......
......@@ -29,6 +29,7 @@ enum macroblock_position_e
MB_LEFT = 0x01,
MB_TOP = 0x02,
MB_TOPRIGHT = 0x04,
MB_TOPLEFT = 0x08,
MB_PRIVATE = 0x10,
};
......
......@@ -10,6 +10,7 @@ echo " --help print this message"
echo " --enable-avis-input enables avisynth input (win32 only)"
echo " --enable-mp4-output enables mp4 output (using gpac)"
echo " --enable-vfw compiles the VfW frontend"
echo " --enable-pthread enables multithreaded encoding"
echo " --enable-debug adds -g, doesn't strip"
echo " --enable-visualize enables visualization (X11 only)"
echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
......@@ -20,6 +21,7 @@ fi
avis_input="no"
mp4_output="no"
pthread="no"
debug="no"
vfw="no"
vis="no"
......@@ -127,6 +129,15 @@ for opt do
LDFLAGS="$LDFLAGS ${opt#--extra-ldflags=}"
VFW_LDFLAGS="${opt#--extra-ldflags=}"
;;
--enable-pthread)
CFLAGS="$CFLAGS -DHAVE_PTHREAD=1"
pthread="yes"
if [ $SYS = MINGW ]; then
LDFLAGS="$LDFLAGS -lpthreadGC2"
else
LDFLAGS="$LDFLAGS -lpthread"
fi
;;
--enable-debug)
CFLAGS="$CFLAGS -g"
debug="yes"
......@@ -179,6 +190,7 @@ echo "Platform: $ARCH"
echo "System: $SYS"
echo "avis input: $avis_input"
echo "mp4 output: $mp4_output"
echo "pthread: $pthread"
echo "vfw: $vfw"
echo "debug: $debug"
echo "visualize: $vis"
......
......@@ -237,7 +237,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
}
/* Fast intra decision */
if( h->mb.i_mb_xy > 4 )
if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
{
const unsigned int i_neighbour = h->mb.i_neighbour;
if( ((i_neighbour&MB_LEFT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - 1] ))
......@@ -245,7 +245,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
|| (((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT)) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] ))
|| ((i_neighbour&MB_TOPRIGHT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] ))
|| (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
|| (h->mb.i_mb_xy < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) )
|| (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) )
{ /* intra is likely */ }
else
{
......
......@@ -83,11 +83,11 @@ static void x264_cabac_mb_type( x264_t *h )
if( h->sh.i_type == SLICE_TYPE_I )
{
int ctx = 0;
if( h->mb.i_mb_x > 0 && h->mb.type[h->mb.i_mb_xy - 1] != I_4x4 )
if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
{
ctx++;
}
if( h->mb.i_mb_y > 0 && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != I_4x4 )
if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
{
ctx++;
}
......@@ -136,11 +136,11 @@ static void x264_cabac_mb_type( x264_t *h )
else if( h->sh.i_type == SLICE_TYPE_B )
{
int ctx = 0;
if( h->mb.i_mb_x > 0 && h->mb.type[h->mb.i_mb_xy - 1] != B_SKIP && h->mb.type[h->mb.i_mb_xy - 1] != B_DIRECT )
if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
{
ctx++;
}
if( h->mb.i_mb_y > 0 && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != B_SKIP && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != B_DIRECT )
if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
{
ctx++;
}
......@@ -274,11 +274,11 @@ static void x264_cabac_mb_intra8x8_pred_mode( x264_t *h )
int ctx = 0;
/* No need to test for I4x4 or I_16x16 as cache_save handle that */
if( h->mb.i_mb_x > 0 && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 )
if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 )
{
ctx++;
}
if( h->mb.i_mb_y > 0 && h->mb.chroma_pred_mode[h->mb.i_mb_xy - h->mb.i_mb_stride] != 0 )
if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - h->mb.i_mb_stride] != 0 )
{
ctx++;
}
......@@ -312,12 +312,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h )
if( x > 0 )
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 )
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( y > 0 )
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 )
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
......@@ -351,12 +351,12 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h )
int ctx;
/* No need to test for SKIP/PCM */
if( h->mb.i_mb_x > 0 )
if( h->mb.i_neighbour & MB_LEFT )
{
cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3;
}
if( h->mb.i_mb_y > 0 )
if( h->mb.i_neighbour & MB_TOP )
{
cbp_b = (h->mb.cbp[h->mb.i_mb_xy - h->mb.i_mb_stride] >> 4)&0x3;
}
......@@ -388,7 +388,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h )
int ctx;
/* No need to test for PCM / SKIP */
if( i_mbn_xy >= 0 && h->mb.i_last_dqp != 0 &&
if( i_mbn_xy >= h->sh.i_first_mb && h->mb.i_last_dqp != 0 &&
( h->mb.type[i_mbn_xy] == I_16x16 || (h->mb.cbp[i_mbn_xy]&0x3f) ) )
ctx = 1;
else
......@@ -410,11 +410,11 @@ void x264_cabac_mb_skip( x264_t *h, int b_skip )
{
int ctx = 0;
if( h->mb.i_mb_x > 0 && !IS_SKIP( h->mb.type[h->mb.i_mb_xy -1]) )
if( h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ) )
{
ctx++;
}
if( h->mb.i_mb_y > 0 && !IS_SKIP( h->mb.type[h->mb.i_mb_xy -h->mb.i_mb_stride]) )
if( h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top ) )
{
ctx++;
}
......@@ -708,7 +708,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( i_cat == 0 )
{
if( h->mb.i_mb_x > 0 )
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy -1;
if( h->mb.type[i_mba_xy] == I_16x16 )
......@@ -716,7 +716,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
i_nza = h->mb.cbp[i_mba_xy]&0x100;
}
}
if( h->mb.i_mb_y > 0 )
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( h->mb.type[i_mbb_xy] == I_16x16 )
......@@ -732,12 +732,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( x > 0 )
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 )
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy -1;
if( y > 0 )
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 )
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
/* no need to test for skip/pcm */
......@@ -761,7 +761,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
else if( i_cat == 3 )
{
/* no need to test skip/pcm */
if( h->mb.i_mb_x > 0 )
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy -1;
if( h->mb.cbp[i_mba_xy]&0x30 )
......@@ -769,7 +769,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
i_nza = h->mb.cbp[i_mba_xy]&( 0x02 << ( 8 + i_idx) );
}
}
if( h->mb.i_mb_y > 0 )
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( h->mb.cbp[i_mbb_xy]&0x30 )
......@@ -784,12 +784,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( idxc == 1 || idxc == 3 )
i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 )
else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1;
if( idxc == 2 || idxc == 3 )
i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 )
else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
/* no need to test skip/pcm */
......
......@@ -24,9 +24,12 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#if HAVE_PTHREAD
#include <pthread.h>
#endif
#include "common/common.h"
#include "common/cpu.h"
......@@ -134,6 +137,7 @@ static void x264_slice_header_init( x264_t *h, x264_slice_header_t *sh,
sh->i_type = i_type;
sh->i_first_mb = 0;
sh->i_last_mb = h->sps->i_mb_width * h->sps->i_mb_height;
sh->i_pps_id = pps->i_id;
sh->i_frame_num = i_frame;
......@@ -355,6 +359,16 @@ static int x264_validate_parameters( x264_t *h )
return -1;
}
h->param.i_threads = x264_clip3( h->param.i_threads, 1, X264_SLICE_MAX );
h->param.i_threads = X264_MIN( h->param.i_threads, h->param.i_height / 16 );
#if !(HAVE_PTHREAD)
if( h->param.i_threads > 1 )
{
x264_log( h, X264_LOG_WARNING, "not compiled with pthread support!\n");
x264_log( h, X264_LOG_WARNING, "multislicing anyway, but you won't see any speed gain.\n" );
}
#endif
h->param.i_frame_reference = x264_clip3( h->param.i_frame_reference, 1, 16 );
if( h->param.i_keyint_max <= 0 )
h->param.i_keyint_max = 1;
......@@ -373,6 +387,9 @@ static int x264_validate_parameters( x264_t *h )
h->param.i_deblocking_filter_beta = x264_clip3( h->param.i_deblocking_filter_beta, -6, 6 );
h->param.i_cabac_init_idc = x264_clip3( h->param.i_cabac_init_idc, -1, 2 );
/* don't yet support merging of cabac stats */
if( h->param.i_threads > 1 && h->param.i_cabac_init_idc == -1 )
h->param.i_cabac_init_idc = 0;
if( h->param.analyse.i_me_method < X264_ME_DIA ||
h->param.analyse.i_me_method > X264_ME_ESA )
......@@ -524,9 +541,6 @@ x264_t *x264_encoder_open ( x264_param_t *param )
if( x264_ratecontrol_new( h ) < 0 )
return NULL;
h->i_last_intra_size = 0;
h->i_last_inter_size = 0;
/* stat */
for( i_slice = 0; i_slice < 5; i_slice++ )
{
......@@ -550,6 +564,10 @@ x264_t *x264_encoder_open ( x264_param_t *param )
param->cpu&X264_CPU_3DNOW ? "3DNow! " : "",
param->cpu&X264_CPU_ALTIVEC ? "Altivec " : "" );
h->thread[0] = h;
for( i = 1; i < param->i_threads; i++ )
h->thread[i] = x264_malloc( sizeof(x264_t) );
return h;
}
......@@ -859,27 +877,18 @@ static inline void x264_slice_init( x264_t *h, int i_nal_type, int i_slice_type,
x264_macroblock_slice_init( h );
}
static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_idc )
static int x264_slice_write( x264_t *h )
{
int i_skip;
int mb_xy;
int i;
/* Init stats */
h->stat.frame.i_hdr_bits =
h->stat.frame.i_itex_bits =
h->stat.frame.i_ptex_bits =
h->stat.frame.i_misc_bits =
h->stat.frame.i_intra_cost =
h->stat.frame.i_inter_cost = 0;
for( i = 0; i < 18; i++ )
h->stat.frame.i_mb_count[i] = 0;
memset( &h->stat.frame, 0, sizeof(h->stat.frame) );
/* Slice */
x264_nal_start( h, i_nal_type, i_nal_ref_idc );
x264_nal_start( h, h->i_nal_type, h->i_nal_ref_idc );
/* Slice header */
x264_slice_header_write( &h->out.bs, &h->sh, i_nal_ref_idc );
x264_slice_header_write( &h->out.bs, &h->sh, h->i_nal_ref_idc );
if( h->param.b_cabac )
{
/* alignment needed */
......@@ -897,7 +906,7 @@ static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_id
x264_visualize_init( h );
#endif
for( mb_xy = 0, i_skip = 0; mb_xy < h->sps->i_mb_width * h->sps->i_mb_height; mb_xy++ )
for( mb_xy = h->sh.i_first_mb, i_skip = 0; mb_xy < h->sh.i_last_mb; mb_xy++ )
{
const int i_mb_y = mb_xy / h->sps->i_mb_width;
const int i_mb_x = mb_xy % h->sps->i_mb_width;
......@@ -921,38 +930,24 @@ static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_id
TIMER_STOP( i_mtime_encode );
TIMER_START( i_mtime_write );
if( IS_SKIP( h->mb.i_type ) )
if( h->param.b_cabac )
{
if( h->param.b_cabac )
{
if( mb_xy > 0 )
{
/* not end_of_slice_flag */
x264_cabac_encode_terminal( &h->cabac, 0 );
}
if( mb_xy > h->sh.i_first_mb )
x264_cabac_encode_terminal( &h->cabac, 0 );
if( IS_SKIP( h->mb.i_type ) )
x264_cabac_mb_skip( h, 1 );
}
else
{
i_skip++;
}
}
else
{
if( h->param.b_cabac )
{
if( mb_xy > 0 )
{
/* not end_of_slice_flag */
x264_cabac_encode_terminal( &h->cabac, 0 );
}
if( h->sh.i_type != SLICE_TYPE_I )
{
x264_cabac_mb_skip( h, 0 );
}
x264_macroblock_write_cabac( h, &h->out.bs );
}
}
else
{
if( IS_SKIP( h->mb.i_type ) )
i_skip++;
else
{
if( h->sh.i_type != SLICE_TYPE_I )
......@@ -1024,6 +1019,68 @@ static inline void x264_slice_write( x264_t *h, int i_nal_type, int i_nal_ref_id
- h->stat.frame.i_itex_bits
- h->stat.frame.i_ptex_bits
- h->stat.frame.i_hdr_bits;
return 0;
}
static inline int x264_slices_write( x264_t *h )
{
if( h->param.i_threads == 1 )
{
x264_slice_write( h );
return h->out.nal[h->out.i_nal-1].i_payload;
}
else
{
int i_nal = h->out.i_nal;
int i_bs_size = h->out.i_bitstream / h->param.i_threads;
int i_frame_size;
int i;
/* duplicate contexts */
for( i = 0; i < h->param.i_threads; i++ )
{
x264_t *t = h->thread[i];
if( i > 0 )
{
memcpy( t, h, sizeof(x264_t) );
t->out.p_bitstream += i*i_bs_size;
bs_init( &t->out.bs, t->out.p_bitstream, i_bs_size );
}
t->sh.i_first_mb = (i * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width;
t->sh.i_last_mb = ((i+1) * h->sps->i_mb_height / h->param.i_threads) * h->sps->i_mb_width;
t->out.i_nal = i_nal + i;
}
/* dispatch */
#if HAVE_PTHREAD
{
pthread_t handles[X264_SLICE_MAX];
void *status;
for( i = 0; i < h->param.i_threads; i++ )
pthread_create( &handles[i], NULL, (void*)x264_slice_write, (void*)h->thread[i] );
for( i = 0; i < h->param.i_threads; i++ )
pthread_join( handles[i], &status );
}
#else
for( i = 0; i < h->param.i_threads; i++ )
x264_slice_write( h->thread[i] );
#endif
/* merge contexts */
i_frame_size = h->out.nal[i_nal].i_payload;
for( i = 1; i < h->param.i_threads; i++ )
{
int j;
x264_t *t = h->thread[i];
h->out.nal[i_nal+i] = t->out.nal[i_nal+i];
i_frame_size += t->out.nal[i_nal+i].i_payload;
// all entries in stat.frame are ints
for( j = 0; j < sizeof(h->stat.frame) / sizeof(int); j++ )
((int*)&h->stat.frame)[j] += ((int*)&t->stat.frame)[j];
}
h->out.i_nal = i_nal + h->param.i_threads;
return i_frame_size;
}
}