Commit 10851d0e authored by Loren Merritt's avatar Loren Merritt

multithreading (via slices)


git-svn-id: svn://svn.videolan.org/x264/trunk@240 df754926-b1dd-0310-bc7b-ec298dee348c
parent 36f6321d
...@@ -45,6 +45,7 @@ void x264_param_default( x264_param_t *param ) ...@@ -45,6 +45,7 @@ void x264_param_default( x264_param_t *param )
/* CPU autodetect */ /* CPU autodetect */
param->cpu = x264_cpu_detect(); param->cpu = x264_cpu_detect();
param->i_threads = 1;
/* Video properties */ /* Video properties */
param->i_csp = X264_CSP_I420; param->i_csp = X264_CSP_I420;
......
...@@ -121,6 +121,7 @@ typedef struct ...@@ -121,6 +121,7 @@ typedef struct
int i_type; int i_type;
int i_first_mb; int i_first_mb;
int i_last_mb;
int i_pps_id; int i_pps_id;
...@@ -195,6 +196,8 @@ static const int x264_scan8[16+2*4] = ...@@ -195,6 +196,8 @@ static const int x264_scan8[16+2*4] =
*/ */
#define X264_BFRAME_MAX 16 #define X264_BFRAME_MAX 16
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
typedef struct x264_ratecontrol_t x264_ratecontrol_t; typedef struct x264_ratecontrol_t x264_ratecontrol_t;
typedef struct x264_vlc_table_t x264_vlc_table_t; typedef struct x264_vlc_table_t x264_vlc_table_t;
...@@ -204,11 +207,13 @@ struct x264_t ...@@ -204,11 +207,13 @@ struct x264_t
/* encoder parameters */ /* encoder parameters */
x264_param_t param; x264_param_t param;
x264_t *thread[X264_SLICE_MAX];
/* bitstream output */ /* bitstream output */
struct struct
{ {
int i_nal; int i_nal;
x264_nal_t nal[5]; /* for now 5 is enough */ x264_nal_t nal[X264_NAL_MAX];
int i_bitstream; /* size of p_bitstream */ int i_bitstream; /* size of p_bitstream */
uint8_t *p_bitstream; /* will hold data for all nal */ uint8_t *p_bitstream; /* will hold data for all nal */
bs_t bs; bs_t bs;
...@@ -223,6 +228,10 @@ struct x264_t ...@@ -223,6 +228,10 @@ struct x264_t
int i_poc_lsb; /* decoding only */ int i_poc_lsb; /* decoding only */
int i_poc; /* decoding only */ int i_poc; /* decoding only */
int i_thread_num; /* threads only */
int i_nal_type; /* threads only */
int i_nal_ref_idc; /* threads only */
/* We use only one SPS and one PPS */ /* We use only one SPS and one PPS */
x264_sps_t sps_array[32]; x264_sps_t sps_array[32];
x264_sps_t *sps; x264_sps_t *sps;
...@@ -315,7 +324,10 @@ struct x264_t ...@@ -315,7 +324,10 @@ struct x264_t
int mv_min_fpel[2]; int mv_min_fpel[2];
int mv_max_fpel[2]; int mv_max_fpel[2];
/* neighboring MBs */
unsigned int i_neighbour; unsigned int i_neighbour;
int i_mb_type_top;
int i_mb_type_left;
/* mb table */ /* mb table */
int8_t *type; /* mb type */ int8_t *type; /* mb type */
......
...@@ -522,7 +522,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2 ...@@ -522,7 +522,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
} }
/* spatial */ /* spatial */
if( h->mb.i_mb_x > 0 ) if( h->mb.i_neighbour & MB_LEFT )
{ {
int i_mb_l = h->mb.i_mb_xy - 1; int i_mb_l = h->mb.i_mb_xy - 1;
/* skip MBs didn't go through the whole search process, so mvr is undefined */ /* skip MBs didn't go through the whole search process, so mvr is undefined */
...@@ -533,7 +533,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2 ...@@ -533,7 +533,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
i++; i++;
} }
} }
if( h->mb.i_mb_y > 0 ) if( h->mb.i_neighbour & MB_TOP )
{ {
int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride; int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( !IS_SKIP( h->mb.type[i_mb_t] ) ) if( !IS_SKIP( h->mb.type[i_mb_t] ) )
...@@ -543,7 +543,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2 ...@@ -543,7 +543,7 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
i++; i++;
} }
if( h->mb.i_mb_x > 0 && !IS_SKIP( h->mb.type[i_mb_t - 1] ) ) if( h->mb.i_neighbour & MB_TOPLEFT && !IS_SKIP( h->mb.type[i_mb_t - 1] ) )
{ {
mvc[i][0] = mvr[i_mb_t - 1][0]; mvc[i][0] = mvr[i_mb_t - 1][0];
mvc[i][1] = mvr[i_mb_t - 1][1]; mvc[i][1] = mvr[i_mb_t - 1][1];
...@@ -944,7 +944,6 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -944,7 +944,6 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.pic.i_stride[i] = i_stride; h->mb.pic.i_stride[i] = i_stride;
h->mb.pic.p_fenc[i] = &h->fenc->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )]; h->mb.pic.p_fenc[i] = &h->fenc->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
h->mb.pic.p_fdec[i] = &h->fdec->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )]; h->mb.pic.p_fdec[i] = &h->fdec->plane[i][ w * ( i_mb_x + i_mb_y * i_stride )];
for( j = 0; j < h->i_ref0; j++ ) for( j = 0; j < h->i_ref0; j++ )
...@@ -960,9 +959,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -960,9 +959,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
/* load cache */ /* load cache */
if( i_mb_y > 0 ) if( h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride )
{ {
i_top_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_top_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
h->mb.i_mb_type_top =
i_top_type= h->mb.type[i_top_xy]; i_top_type= h->mb.type[i_top_xy];
h->mb.i_neighbour |= MB_TOP; h->mb.i_neighbour |= MB_TOP;
...@@ -987,6 +987,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -987,6 +987,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
else else
{ {
h->mb.i_mb_type_top = -1;
/* load intra4x4 */ /* load intra4x4 */
h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] = h->mb.cache.intra4x4_pred_mode[x264_scan8[0] - 8] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[1] - 8] = h->mb.cache.intra4x4_pred_mode[x264_scan8[1] - 8] =
...@@ -1005,9 +1007,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1005,9 +1007,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
if( i_mb_x > 0 ) if( i_mb_x > 0 && h->mb.i_mb_xy > h->sh.i_first_mb )
{ {
i_left_xy = h->mb.i_mb_xy - 1; i_left_xy = h->mb.i_mb_xy - 1;
h->mb.i_mb_type_left =
i_left_type= h->mb.type[i_left_xy]; i_left_type= h->mb.type[i_left_xy];
h->mb.i_neighbour |= MB_LEFT; h->mb.i_neighbour |= MB_LEFT;
...@@ -1032,6 +1035,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1032,6 +1035,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
else else
{ {
h->mb.i_mb_type_left = -1;
h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[0 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[2 ] - 1] =
h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] = h->mb.cache.intra4x4_pred_mode[x264_scan8[8 ] - 1] =
...@@ -1048,10 +1053,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1048,10 +1053,10 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80; h->mb.cache.non_zero_count[x264_scan8[16+4+2] - 1] = 0x80;
} }
if( i_mb_y > 0 && i_mb_x < h->sps->i_mb_width - 1 ) if( i_mb_x < h->sps->i_mb_width - 1 && h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride - 1 )
{
h->mb.i_neighbour |= MB_TOPRIGHT; h->mb.i_neighbour |= MB_TOPRIGHT;
} if( i_mb_x > 0 && h->mb.i_mb_xy >= h->sh.i_first_mb + h->mb.i_mb_stride + 1 )
h->mb.i_neighbour |= MB_TOPLEFT;
/* load ref/mv/mvd */ /* load ref/mv/mvd */
if( h->sh.i_type != SLICE_TYPE_I ) if( h->sh.i_type != SLICE_TYPE_I )
...@@ -1059,20 +1064,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1059,20 +1064,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
const int s8x8 = h->mb.i_b8_stride; const int s8x8 = h->mb.i_b8_stride;
const int s4x4 = h->mb.i_b4_stride; const int s4x4 = h->mb.i_b4_stride;
int i_top_left_xy = -1;
int i_top_right_xy = -1;
int i_list; int i_list;
if( h->mb.i_mb_y > 0 && h->mb.i_mb_x > 0 )
{
i_top_left_xy = i_top_xy - 1;
}
if( h->mb.i_mb_y > 0 && h->mb.i_mb_x < h->sps->i_mb_width - 1 )
{
i_top_right_xy = i_top_xy + 1;
}
for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ ) for( i_list = 0; i_list < (h->sh.i_type == SLICE_TYPE_B ? 2 : 1 ); i_list++ )
{ {
/* /*
...@@ -1081,7 +1074,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1081,7 +1074,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
h->mb.cache.ref[i_list][x264_scan8[13]+1] = -2; h->mb.cache.ref[i_list][x264_scan8[13]+1] = -2;
*/ */
if( i_top_left_xy >= 0 ) if( h->mb.i_neighbour & MB_TOPLEFT )
{ {
const int i8 = x264_scan8[0] - 1 - 1*8; const int i8 = x264_scan8[0] - 1 - 1*8;
const int ir = i_mb_8x8 - s8x8 - 1; const int ir = i_mb_8x8 - s8x8 - 1;
...@@ -1126,7 +1119,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y ) ...@@ -1126,7 +1119,7 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
} }
} }
if( i_top_right_xy >= 0 ) if( h->mb.i_neighbour & MB_TOPRIGHT )
{ {
const int i8 = x264_scan8[0] + 4 - 1*8; const int i8 = x264_scan8[0] + 4 - 1*8;
const int ir = i_mb_8x8 - s8x8 + 2; const int ir = i_mb_8x8 - s8x8 + 2;
......
...@@ -29,6 +29,7 @@ enum macroblock_position_e ...@@ -29,6 +29,7 @@ enum macroblock_position_e
MB_LEFT = 0x01, MB_LEFT = 0x01,
MB_TOP = 0x02, MB_TOP = 0x02,
MB_TOPRIGHT = 0x04, MB_TOPRIGHT = 0x04,
MB_TOPLEFT = 0x08,
MB_PRIVATE = 0x10, MB_PRIVATE = 0x10,
}; };
......
...@@ -10,6 +10,7 @@ echo " --help print this message" ...@@ -10,6 +10,7 @@ echo " --help print this message"
echo " --enable-avis-input enables avisynth input (win32 only)" echo " --enable-avis-input enables avisynth input (win32 only)"
echo " --enable-mp4-output enables mp4 output (using gpac)" echo " --enable-mp4-output enables mp4 output (using gpac)"
echo " --enable-vfw compiles the VfW frontend" echo " --enable-vfw compiles the VfW frontend"
echo " --enable-pthread enables multithreaded encoding"
echo " --enable-debug adds -g, doesn't strip" echo " --enable-debug adds -g, doesn't strip"
echo " --enable-visualize enables visualization (X11 only)" echo " --enable-visualize enables visualization (X11 only)"
echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS" echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS"
...@@ -20,6 +21,7 @@ fi ...@@ -20,6 +21,7 @@ fi
avis_input="no" avis_input="no"
mp4_output="no" mp4_output="no"
pthread="no"
debug="no" debug="no"
vfw="no" vfw="no"
vis="no" vis="no"
...@@ -127,6 +129,15 @@ for opt do ...@@ -127,6 +129,15 @@ for opt do
LDFLAGS="$LDFLAGS ${opt#--extra-ldflags=}" LDFLAGS="$LDFLAGS ${opt#--extra-ldflags=}"
VFW_LDFLAGS="${opt#--extra-ldflags=}" VFW_LDFLAGS="${opt#--extra-ldflags=}"
;; ;;
--enable-pthread)
CFLAGS="$CFLAGS -DHAVE_PTHREAD=1"
pthread="yes"
if [ $SYS = MINGW ]; then
LDFLAGS="$LDFLAGS -lpthreadGC2"
else
LDFLAGS="$LDFLAGS -lpthread"
fi
;;
--enable-debug) --enable-debug)
CFLAGS="$CFLAGS -g" CFLAGS="$CFLAGS -g"
debug="yes" debug="yes"
...@@ -179,6 +190,7 @@ echo "Platform: $ARCH" ...@@ -179,6 +190,7 @@ echo "Platform: $ARCH"
echo "System: $SYS" echo "System: $SYS"
echo "avis input: $avis_input" echo "avis input: $avis_input"
echo "mp4 output: $mp4_output" echo "mp4 output: $mp4_output"
echo "pthread: $pthread"
echo "vfw: $vfw" echo "vfw: $vfw"
echo "debug: $debug" echo "debug: $debug"
echo "visualize: $vis" echo "visualize: $vis"
......
...@@ -237,7 +237,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -237,7 +237,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
} }
/* Fast intra decision */ /* Fast intra decision */
if( h->mb.i_mb_xy > 4 ) if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 )
{ {
const unsigned int i_neighbour = h->mb.i_neighbour; const unsigned int i_neighbour = h->mb.i_neighbour;
if( ((i_neighbour&MB_LEFT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - 1] )) if( ((i_neighbour&MB_LEFT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - 1] ))
...@@ -245,7 +245,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) ...@@ -245,7 +245,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
|| (((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT)) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] )) || (((i_neighbour&(MB_TOP|MB_LEFT)) == (MB_TOP|MB_LEFT)) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride-1 ] ))
|| ((i_neighbour&MB_TOPRIGHT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] )) || ((i_neighbour&MB_TOPRIGHT) && IS_INTRA( h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride+1 ] ))
|| (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] )) || (h->sh.i_type == SLICE_TYPE_P && IS_INTRA( h->fref0[0]->mb_type[h->mb.i_mb_xy] ))
|| (h->mb.i_mb_xy < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) ) || (h->mb.i_mb_xy - h->sh.i_first_mb < 3*(h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16])) )
{ /* intra is likely */ } { /* intra is likely */ }
else else
{ {
......
...@@ -83,11 +83,11 @@ static void x264_cabac_mb_type( x264_t *h ) ...@@ -83,11 +83,11 @@ static void x264_cabac_mb_type( x264_t *h )
if( h->sh.i_type == SLICE_TYPE_I ) if( h->sh.i_type == SLICE_TYPE_I )
{ {
int ctx = 0; int ctx = 0;
if( h->mb.i_mb_x > 0 && h->mb.type[h->mb.i_mb_xy - 1] != I_4x4 ) if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
{ {
ctx++; ctx++;
} }
if( h->mb.i_mb_y > 0 && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != I_4x4 ) if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
{ {
ctx++; ctx++;
} }
...@@ -136,11 +136,11 @@ static void x264_cabac_mb_type( x264_t *h ) ...@@ -136,11 +136,11 @@ static void x264_cabac_mb_type( x264_t *h )
else if( h->sh.i_type == SLICE_TYPE_B ) else if( h->sh.i_type == SLICE_TYPE_B )
{ {
int ctx = 0; int ctx = 0;
if( h->mb.i_mb_x > 0 && h->mb.type[h->mb.i_mb_xy - 1] != B_SKIP && h->mb.type[h->mb.i_mb_xy - 1] != B_DIRECT ) if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
{ {
ctx++; ctx++;
} }
if( h->mb.i_mb_y > 0 && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != B_SKIP && h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride] != B_DIRECT ) if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
{ {
ctx++; ctx++;
} }
...@@ -274,11 +274,11 @@ static void x264_cabac_mb_intra8x8_pred_mode( x264_t *h ) ...@@ -274,11 +274,11 @@ static void x264_cabac_mb_intra8x8_pred_mode( x264_t *h )
int ctx = 0; int ctx = 0;
/* No need to test for I4x4 or I_16x16 as cache_save handle that */ /* No need to test for I4x4 or I_16x16 as cache_save handle that */
if( h->mb.i_mb_x > 0 && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 ) if( (h->mb.i_neighbour & MB_LEFT) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - 1] != 0 )
{ {
ctx++; ctx++;
} }
if( h->mb.i_mb_y > 0 && h->mb.chroma_pred_mode[h->mb.i_mb_xy - h->mb.i_mb_stride] != 0 ) if( (h->mb.i_neighbour & MB_TOP) && h->mb.chroma_pred_mode[h->mb.i_mb_xy - h->mb.i_mb_stride] != 0 )
{ {
ctx++; ctx++;
} }
...@@ -312,12 +312,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h ) ...@@ -312,12 +312,12 @@ static void x264_cabac_mb_cbp_luma( x264_t *h )
if( x > 0 ) if( x > 0 )
i_mba_xy = h->mb.i_mb_xy; i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 ) else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1; i_mba_xy = h->mb.i_mb_xy - 1;
if( y > 0 ) if( y > 0 )
i_mbb_xy = h->mb.i_mb_xy; i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 ) else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
...@@ -351,12 +351,12 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h ) ...@@ -351,12 +351,12 @@ static void x264_cabac_mb_cbp_chroma( x264_t *h )
int ctx; int ctx;
/* No need to test for SKIP/PCM */ /* No need to test for SKIP/PCM */
if( h->mb.i_mb_x > 0 ) if( h->mb.i_neighbour & MB_LEFT )
{ {
cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3; cbp_a = (h->mb.cbp[h->mb.i_mb_xy - 1] >> 4)&0x3;
} }
if( h->mb.i_mb_y > 0 ) if( h->mb.i_neighbour & MB_TOP )
{ {
cbp_b = (h->mb.cbp[h->mb.i_mb_xy - h->mb.i_mb_stride] >> 4)&0x3; cbp_b = (h->mb.cbp[h->mb.i_mb_xy - h->mb.i_mb_stride] >> 4)&0x3;
} }
...@@ -388,7 +388,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h ) ...@@ -388,7 +388,7 @@ static void x264_cabac_mb_qp_delta( x264_t *h )
int ctx; int ctx;
/* No need to test for PCM / SKIP */ /* No need to test for PCM / SKIP */
if( i_mbn_xy >= 0 && h->mb.i_last_dqp != 0 && if( i_mbn_xy >= h->sh.i_first_mb && h->mb.i_last_dqp != 0 &&
( h->mb.type[i_mbn_xy] == I_16x16 || (h->mb.cbp[i_mbn_xy]&0x3f) ) ) ( h->mb.type[i_mbn_xy] == I_16x16 || (h->mb.cbp[i_mbn_xy]&0x3f) ) )
ctx = 1; ctx = 1;
else else
...@@ -410,11 +410,11 @@ void x264_cabac_mb_skip( x264_t *h, int b_skip ) ...@@ -410,11 +410,11 @@ void x264_cabac_mb_skip( x264_t *h, int b_skip )
{ {
int ctx = 0; int ctx = 0;
if( h->mb.i_mb_x > 0 && !IS_SKIP( h->mb.type[h->mb.i_mb_xy -1]) ) if( h->mb.i_mb_type_left >= 0 && !IS_SKIP( h->mb.i_mb_type_left ) )
{ {
ctx++; ctx++;
} }
if( h->mb.i_mb_y > 0 && !IS_SKIP( h->mb.type[h->mb.i_mb_xy -h->mb.i_mb_stride]) ) if( h->mb.i_mb_type_top >= 0 && !IS_SKIP( h->mb.i_mb_type_top ) )
{ {
ctx++; ctx++;
} }
...@@ -708,7 +708,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -708,7 +708,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( i_cat == 0 ) if( i_cat == 0 )
{ {
if( h->mb.i_mb_x > 0 ) if( h->mb.i_neighbour & MB_LEFT )
{ {
i_mba_xy = h->mb.i_mb_xy -1; i_mba_xy = h->mb.i_mb_xy -1;
if( h->mb.type[i_mba_xy] == I_16x16 ) if( h->mb.type[i_mba_xy] == I_16x16 )
...@@ -716,7 +716,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -716,7 +716,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
i_nza = h->mb.cbp[i_mba_xy]&0x100; i_nza = h->mb.cbp[i_mba_xy]&0x100;
} }
} }
if( h->mb.i_mb_y > 0 ) if( h->mb.i_neighbour & MB_TOP )
{ {
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( h->mb.type[i_mbb_xy] == I_16x16 ) if( h->mb.type[i_mbb_xy] == I_16x16 )
...@@ -732,12 +732,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -732,12 +732,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( x > 0 ) if( x > 0 )
i_mba_xy = h->mb.i_mb_xy; i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 ) else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy -1; i_mba_xy = h->mb.i_mb_xy -1;
if( y > 0 ) if( y > 0 )
i_mbb_xy = h->mb.i_mb_xy; i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 ) else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
/* no need to test for skip/pcm */ /* no need to test for skip/pcm */
...@@ -761,7 +761,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -761,7 +761,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
else if( i_cat == 3 ) else if( i_cat == 3 )
{ {
/* no need to test skip/pcm */ /* no need to test skip/pcm */
if( h->mb.i_mb_x > 0 ) if( h->mb.i_neighbour & MB_LEFT )
{ {
i_mba_xy = h->mb.i_mb_xy -1; i_mba_xy = h->mb.i_mb_xy -1;
if( h->mb.cbp[i_mba_xy]&0x30 ) if( h->mb.cbp[i_mba_xy]&0x30 )
...@@ -769,7 +769,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -769,7 +769,7 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
i_nza = h->mb.cbp[i_mba_xy]&( 0x02 << ( 8 + i_idx) ); i_nza = h->mb.cbp[i_mba_xy]&( 0x02 << ( 8 + i_idx) );
} }
} }
if( h->mb.i_mb_y > 0 ) if( h->mb.i_neighbour & MB_TOP )
{ {
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
if( h->mb.cbp[i_mbb_xy]&0x30 ) if( h->mb.cbp[i_mbb_xy]&0x30 )
...@@ -784,12 +784,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx ) ...@@ -784,12 +784,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( idxc == 1 || idxc == 3 ) if( idxc == 1 || idxc == 3 )
i_mba_xy = h->mb.i_mb_xy; i_mba_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_x > 0 ) else if( h->mb.i_neighbour & MB_LEFT )
i_mba_xy = h->mb.i_mb_xy - 1; i_mba_xy = h->mb.i_mb_xy - 1;
if( idxc == 2 || idxc == 3 ) if( idxc == 2 || idxc == 3 )
i_mbb_xy = h->mb.i_mb_xy; i_mbb_xy = h->mb.i_mb_xy;
else if( h->mb.i_mb_y > 0 ) else if( h->mb.i_neighbour & MB_TOP )
i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride; i_mbb_xy = h->mb.i_mb_xy - h->mb.i_mb_stride;
/* no need to test skip/pcm */ /* no need to test skip/pcm */
......
...@@ -24,9 +24,12 @@ ...@@ -24,9 +24,12 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include <math.h>