Commit 86145948 authored by Loren Merritt's avatar Loren Merritt

SATD-based decision for 8x8 transform in inter-MBs.

Enable 8x8 intra.
CLI options: --8x8dct, --analyse i8x8.


git-svn-id: svn://svn.videolan.org/x264/trunk@251 df754926-b1dd-0310-bc7b-ec298dee348c
parent 6bf1398b
......@@ -105,7 +105,8 @@ void x264_param_default( x264_param_t *param )
/* */
param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8
| X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_me_method = X264_ME_HEX;
param->analyse.i_me_range = 16;
......
......@@ -398,8 +398,9 @@ struct x264_t
int16_t direct_mv[2][X264_SCAN8_SIZE][2];
int8_t direct_ref[2][X264_SCAN8_SIZE];
/* top and left neighbors. 1=>8x8, 0=>4x4 */
int8_t transform_size[2];
/* number of neighbors (top and left) that used 8x8 dct */
int i_neighbour_transform_size;
int b_transform_8x8_allowed;
} cache;
/* */
......@@ -437,6 +438,7 @@ struct x264_t
int i_misc_bits;
/* MB type counts */
int i_mb_count[19];
int i_mb_count_i;
int i_mb_count_p;
int i_mb_count_skip;
/* Estimated (SATD) cost as Intra/Predicted frame */
......
......@@ -135,22 +135,28 @@ static void idct4x4dc( int16_t d[4][4] )
}
}
static void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
static inline void pixel_sub_wxh( int16_t *diff, int i_size,
uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int16_t d[4][4];
int16_t tmp[4][4];
int y, x;
int i;
for( y = 0; y < 4; y++ )
for( y = 0; y < i_size; y++ )
{
for( x = 0; x < 4; x++ )
for( x = 0; x < i_size; x++ )
{
d[y][x] = pix1[x] - pix2[x];
diff[x + y*i_size] = pix1[x] - pix2[x];
}
pix1 += i_pix1;
pix2 += i_pix2;
}
}
static void sub4x4_dct( int16_t dct[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int16_t d[4][4];
int16_t tmp[4][4];
int i;
pixel_sub_wxh( (int16_t*)d, 4, pix1, i_pix1, pix2, i_pix2 );
for( i = 0; i < 4; i++ )
{
......@@ -289,17 +295,9 @@ static void add16x16_idct( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int y, x, i;
int i;
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
dct[y][x] = pix1[x] - pix2[x];
}
pix1 += i_pix1;
pix2 += i_pix2;
}
pixel_sub_wxh( (int16_t*)dct, 8, pix1, i_pix1, pix2, i_pix2 );
#define SRC(x) dct[i][x]
for( i = 0; i < 8; i++ )
......
......@@ -140,11 +140,13 @@ int x264_mb_predict_non_zero_code( x264_t *h, int idx )
return i_ret & 0x7f;
}
int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type )
int x264_mb_transform_8x8_allowed( x264_t *h )
{
int i;
if( i_mb_type == P_8x8 || i_mb_type == B_8x8 )
if( IS_SKIP( h->mb.i_type ) )
return 0;
if( h->mb.i_type == P_8x8 || h->mb.i_type == B_8x8 )
{
int i;
for( i = 0; i < 4; i++ )
if( !IS_SUB8x8(h->mb.i_sub_partition[i])
|| ( h->mb.i_sub_partition[i] == D_DIRECT_8x8 && !h->sps->b_direct8x8_inference ) )
......@@ -152,7 +154,7 @@ int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type )
return 0;
}
}
if( i_mb_type == B_DIRECT && !h->sps->b_direct8x8_inference )
if( h->mb.i_type == B_DIRECT && !h->sps->b_direct8x8_inference )
return 0;
return 1;
......@@ -1178,10 +1180,9 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
if( h->param.analyse.b_transform_8x8 )
{
h->mb.cache.transform_size[0] = (h->mb.i_neighbour&MB_LEFT)
&& h->mb.mb_transform_size[i_left_xy];
h->mb.cache.transform_size[1] = (h->mb.i_neighbour&MB_TOP)
&& h->mb.mb_transform_size[i_top_xy];
h->mb.cache.i_neighbour_transform_size =
( i_left_type >= 0 && h->mb.mb_transform_size[i_left_xy] )
+ ( i_top_type >= 0 && h->mb.mb_transform_size[i_top_xy] );
}
/* load ref/mv/mvd */
......@@ -1352,8 +1353,6 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y )
}
}
// FIXME skip this if I_4x4 and I_8x8 are disabled?
// assumes MB_TOPRIGHT = MB_TOP<<1
h->mb.i_neighbour4[0] =
h->mb.i_neighbour8[0] = (h->mb.i_neighbour & (MB_TOP|MB_LEFT|MB_TOPLEFT))
| ((h->mb.i_neighbour & MB_TOP) ? MB_TOPRIGHT : 0);
......@@ -1389,6 +1388,8 @@ void x264_macroblock_cache_save( x264_t *h )
int i;
h->mb.type[i_mb_xy] = i_mb_type;
if( IS_SKIP( h->mb.i_type ) )
h->mb.qp[i_mb_xy] = h->mb.i_last_qp;
......
......@@ -201,7 +201,12 @@ void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[5][2
int x264_mb_predict_intra4x4_mode( x264_t *h, int idx );
int x264_mb_predict_non_zero_code( x264_t *h, int idx );
int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type );
/* x264_mb_transform_8x8_allowed:
* check whether any partition is smaller than 8x8 (or at least
* might be, according to just partition type.)
* doesn't check for intra or cbp */
int x264_mb_transform_8x8_allowed( x264_t *h );
void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale );
void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale );
......
......@@ -104,20 +104,25 @@ PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 )
PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 )
static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
static inline void pixel_sub_wxh( int16_t *diff, int i_size,
uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int y, x;
for( y = 0; y < 4; y++ )
for( y = 0; y < i_size; y++ )
{
for( x = 0; x < 4; x++ )
for( x = 0; x < i_size; x++ )
{
diff[y][x] = pix1[x] - pix2[x];
diff[x + y*i_size] = pix1[x] - pix2[x];
}
pix1 += i_pix1;
pix2 += i_pix2;
}
}
/****************************************************************************
* pixel_satd_WxH: sum of 4x4 Hadamard transformed differences
****************************************************************************/
static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
{
int16_t tmp[4][4];
......@@ -131,7 +136,7 @@ static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2,
{
int d;
pixel_sub_4x4( diff, &pix1[x], i_pix1, &pix2[x], i_pix2 );
pixel_sub_wxh( (int16_t*)diff, 4, &pix1[x], i_pix1, &pix2[x], i_pix2 );
for( d = 0; d < 4; d++ )
{
......@@ -179,6 +184,83 @@ PIXEL_SATD_C( pixel_satd_4x8, 4, 8 )
PIXEL_SATD_C( pixel_satd_4x4, 4, 4 )
/****************************************************************************
* pixel_sa8d_WxH: sum of 8x8 Hadamard transformed differences
****************************************************************************/
#define SA8D_1D {\
const int a0 = SRC(0) + SRC(4);\
const int a4 = SRC(0) - SRC(4);\
const int a1 = SRC(1) + SRC(5);\
const int a5 = SRC(1) - SRC(5);\
const int a2 = SRC(2) + SRC(6);\
const int a6 = SRC(2) - SRC(6);\
const int a3 = SRC(3) + SRC(7);\
const int a7 = SRC(3) - SRC(7);\
const int b0 = a0 + a2;\
const int b2 = a0 - a2;\
const int b1 = a1 + a3;\
const int b3 = a1 - a3;\
const int b4 = a4 + a6;\
const int b6 = a4 - a6;\
const int b5 = a5 + a7;\
const int b7 = a5 - a7;\
DST(0, b0 + b1);\
DST(1, b0 - b1);\
DST(2, b2 + b3);\
DST(3, b2 - b3);\
DST(4, b4 + b5);\
DST(5, b4 - b5);\
DST(6, b6 + b7);\
DST(7, b6 - b7);\
}
static inline int pixel_sa8d_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2,
int i_width, int i_height )
{
int16_t diff[8][8];
int i_satd = 0;
int x, y;
for( y = 0; y < i_height; y += 8 )
{
for( x = 0; x < i_width; x += 8 )
{
int i;
pixel_sub_wxh( (int16_t*)diff, 8, pix1+x, i_pix1, pix2+x, i_pix2 );
#define SRC(x) diff[i][x]
#define DST(x,rhs) diff[i][x] = (rhs)
for( i = 0; i < 8; i++ )
SA8D_1D
#undef SRC
#undef DST
#define SRC(x) diff[x][i]
#define DST(x,rhs) i_satd += abs(rhs)
for( i = 0; i < 8; i++ )
SA8D_1D
#undef SRC
#undef DST
}
pix1 += 8 * i_pix1;
pix2 += 8 * i_pix2;
}
return i_satd;
}
#define PIXEL_SA8D_C( width, height ) \
static int pixel_sa8d_##width##x##height( uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2 ) \
{ \
return ( pixel_sa8d_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ) + 2 ) >> 2; \
}
PIXEL_SA8D_C( 16, 16 )
PIXEL_SA8D_C( 16, 8 )
PIXEL_SA8D_C( 8, 16 )
PIXEL_SA8D_C( 8, 8 )
static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height )
{
int x, y;
......@@ -291,6 +373,11 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->satd[PIXEL_4x8] = pixel_satd_4x8;
pixf->satd[PIXEL_4x4] = pixel_satd_4x4;
pixf->sa8d[PIXEL_16x16]= pixel_sa8d_16x16;
pixf->sa8d[PIXEL_16x8] = pixel_sa8d_16x8;
pixf->sa8d[PIXEL_8x16] = pixel_sa8d_8x16;
pixf->sa8d[PIXEL_8x8] = pixel_sa8d_8x8;
pixf->avg[PIXEL_16x16]= pixel_avg_16x16;
pixf->avg[PIXEL_16x8] = pixel_avg_16x8;
pixf->avg[PIXEL_8x16] = pixel_avg_8x16;
......
......@@ -24,9 +24,7 @@
#ifndef _PIXEL_H
#define _PIXEL_H 1
typedef int (*x264_pixel_sad_t) ( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_ssd_t) ( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_satd_t)( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_cmp_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_weight_t) ( uint8_t *, int, uint8_t *, int, int );
......@@ -65,9 +63,10 @@ static const int x264_size2pixel[5][5] = {
typedef struct
{
x264_pixel_sad_t sad[7];
x264_pixel_ssd_t ssd[7];
x264_pixel_satd_t satd[7];
x264_pixel_cmp_t sad[7];
x264_pixel_cmp_t ssd[7];
x264_pixel_cmp_t satd[7];
x264_pixel_cmp_t sa8d[4];
x264_pixel_avg_t avg[10];
x264_pixel_avg_weight_t avg_weight[10];
} x264_pixel_function_t;
......
......@@ -686,27 +686,27 @@ static void predict_8x8_dc_128( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_dc_left( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_LEFT
const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
PREDICT_8x8_DC(dc);
}
static void predict_8x8_dc_top( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_TOP
const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
PREDICT_8x8_DC(dc);
}
static void predict_8x8_dc( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOP
const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
PREDICT_8x8_DC(dc);
}
static void predict_8x8_h( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_LEFT
#define ROW(y) ((uint32_t*)(src+y*i_stride))[0] =\
((uint32_t*)(src+y*i_stride))[1] = 0x01010101U * l##y
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
......@@ -729,8 +729,8 @@ static void predict_8x8_v( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_TOPRIGHT;
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
......@@ -749,9 +749,9 @@ static void predict_8x8_ddl( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_TOPLEFT;
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOPLEFT
SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
......@@ -771,9 +771,9 @@ static void predict_8x8_ddr( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_TOPLEFT;
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOPLEFT
/* produce warning as l7 is unused */
SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
......@@ -800,9 +800,9 @@ static void predict_8x8_vr( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_TOPLEFT;
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_LEFT
PREDICT_8x8_LOAD_TOPLEFT
/* produce warning as t7 is unused */
SRC(0,7)= (l6 + l7 + 1) >> 1;
SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
......@@ -829,8 +829,8 @@ static void predict_8x8_hd( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_TOP;
PREDICT_8x8_LOAD_TOPRIGHT;
PREDICT_8x8_LOAD_TOP
PREDICT_8x8_LOAD_TOPRIGHT
SRC(0,0)= (t0 + t1 + 1) >> 1;
SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
......@@ -856,7 +856,7 @@ static void predict_8x8_vl( uint8_t *src, int i_stride, int i_neighbor )
}
static void predict_8x8_hu( uint8_t *src, int i_stride, int i_neighbor )
{
PREDICT_8x8_LOAD_LEFT;
PREDICT_8x8_LOAD_LEFT
SRC(0,0)= (l0 + l1 + 1) >> 1;
SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
......
......@@ -514,6 +514,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos
i_mode = predict_mode[i];
h->predict_8x8[i_mode]( p_dst_by, i_stride, h->mb.i_neighbour );
/* could use sa8d, but it doesn't seem worth the speed cost (without mmx at least) */
i_sad = h->pixf.satd[PIXEL_8x8]( p_dst_by, i_stride,
p_src_by, i_stride );
......@@ -533,7 +534,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *res, int i_cos
x264_macroblock_cache_intra8x8_pred( h, 2*x, 2*y, res->i_predict4x4[x][y] );
}
// res->i_sad_i8x8 += res->i_lambda * something; // FIXME
// FIXME some bias like in i4x4?
if( h->sh.i_type == SLICE_TYPE_B )
res->i_sad_i8x8 += res->i_lambda * i_mb_b_cost_table[I_8x8];
}
......@@ -1342,6 +1343,29 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16];
}
static inline void x264_mb_analyse_transform( x264_t *h )
{
if( h->param.analyse.b_transform_8x8
&& !IS_INTRA( h->mb.i_type )
&& x264_mb_transform_8x8_allowed( h ) )
{
int i_cost4, i_cost8;
/* FIXME only luma mc is needed */
x264_mb_mc( h );
i_cost8 = h->pixf.sa8d[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
i_cost4 = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
h->mb.b_transform_8x8 = i_cost8 < i_cost4;
h->mb.cache.b_transform_8x8_allowed = 1;
}
else
h->mb.cache.b_transform_8x8_allowed = 0;
}
/*****************************************************************************
* x264_macroblock_analyse:
*****************************************************************************/
......@@ -1552,6 +1576,11 @@ void x264_macroblock_analyse( x264_t *h )
i_intra_type = I_16x16;
i_intra_cost = analysis.i_sad_i16x16;
if( analysis.i_sad_i8x8 < i_intra_cost )
{
i_intra_type = I_8x8;
i_intra_cost = analysis.i_sad_i8x8;
}
if( analysis.i_sad_i4x4 < i_intra_cost )
{
i_intra_type = I_4x4;
......@@ -1740,6 +1769,11 @@ void x264_macroblock_analyse( x264_t *h )
h->mb.i_type = I_16x16;
i_cost = analysis.i_sad_i16x16;
}
if( analysis.i_sad_i8x8 < i_cost )
{
h->mb.i_type = I_8x8;
i_cost = analysis.i_sad_i8x8;
}
if( analysis.i_sad_i4x4 < i_cost )
{
h->mb.i_type = I_4x4;
......@@ -1749,7 +1783,6 @@ void x264_macroblock_analyse( x264_t *h )
}
/*-------------------- Update MB from the analysis ----------------------*/
h->mb.type[h->mb.i_mb_xy] = x264_mb_type_fix[h->mb.i_type];
switch( h->mb.i_type )
{
case I_4x4:
......@@ -1902,6 +1935,8 @@ void x264_macroblock_analyse( x264_t *h )
break;
}
}
x264_mb_analyse_transform( h );
}
#include "slicetype_decision.c"
......
......@@ -78,7 +78,7 @@ static inline void x264_cabac_mb_type_intra( x264_t *h, int i_mb_type,
static void x264_cabac_mb_type( x264_t *h )
{
const int i_mb_type = x264_mb_type_fix[h->mb.i_type];
const int i_mb_type = h->mb.i_type;
if( h->sh.i_type == SLICE_TYPE_I )
{
......@@ -557,9 +557,8 @@ static inline void x264_cabac_mb_sub_b_partition( x264_t *h, int i_sub )
static inline void x264_cabac_mb_transform_size( x264_t *h )
{
int ctx = ( h->mb.cache.transform_size[0] == 1 )
+ ( h->mb.cache.transform_size[1] == 1 );
x264_cabac_encode_decision( &h->cabac, 399 + ctx, h->mb.b_transform_8x8 );
int ctx = 399 + h->mb.cache.i_neighbour_transform_size;
x264_cabac_encode_decision( &h->cabac, ctx, h->mb.b_transform_8x8 );
}
static inline void x264_cabac_mb_ref( x264_t *h, int i_list, int idx )
......@@ -1154,8 +1153,7 @@ void x264_macroblock_write_cabac( x264_t *h, bs_t *s )
x264_cabac_mb_cbp_chroma( h );
}
if( h->pps->b_transform_8x8_mode && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type)
&& x264_mb_transform_8x8_allowed( h, i_mb_type ) )
if( h->mb.cache.b_transform_8x8_allowed && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type) )
{
x264_cabac_mb_transform_size( h );
}
......
......@@ -337,6 +337,29 @@ static void x264_sub_mb_mv_write_cavlc( x264_t *h, bs_t *s, int i_list )
}
}
void x264_macroblock_luma_write_cavlc( x264_t *h, bs_t *s )
{
int i8, i4, i;
if( h->mb.b_transform_8x8 )
{
/* shuffle 8x8 dct coeffs into 4x4 lists */
for( i8 = 0; i8 < 4; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
{
for( i = 0; i < 16; i++ )
h->dct.block[i4+i8*4].luma4x4[i] = h->dct.luma8x8[i8][i4+i*4];
h->mb.cache.non_zero_count[x264_scan8[i4+i8*4]] =
array_non_zero_count( h->dct.block[i4+i8*4].luma4x4, 16 );
}
}
for( i8 = 0; i8 < 4; i8++ )
if( h->mb.i_cbp_luma & (1 << i8) )
for( i4 = 0; i4 < 4; i4++ )
block_residual_write_cavlc( h, s, i4+i8*4, h->dct.block[i4+i8*4].luma4x4, 16 );
}
/*****************************************************************************
* x264_macroblock_write:
*****************************************************************************/
......@@ -653,8 +676,7 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
}
/* transform size 8x8 flag */
if( h->pps->b_transform_8x8_mode && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type)
&& x264_mb_transform_8x8_allowed( h, i_mb_type ) )
if( h->mb.cache.b_transform_8x8_allowed && h->mb.i_cbp_luma && !IS_INTRA(i_mb_type) )
{
bs_write1( s, h->mb.b_transform_8x8 );
}
......@@ -667,39 +689,15 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
/* DC Luma */
block_residual_write_cavlc( h, s, BLOCK_INDEX_LUMA_DC , h->dct.luma16x16_dc, 16 );
/* AC Luma */
if( h->mb.i_cbp_luma != 0 )
{
/* AC Luma */
for( i = 0; i < 16; i++ )
{
block_residual_write_cavlc( h, s, i, h->dct.block[i].residual_ac, 15 );
}
}
}
else if( h->mb.i_cbp_luma != 0 || h->mb.i_cbp_chroma != 0 )
{
bs_write_se( s, h->mb.qp[h->mb.i_mb_xy] - h->mb.i_last_qp );
/* shuffle 8x8 dct coeffs into 4x4 lists */
if( h->mb.b_transform_8x8 )
{
int i4;
for( i4 = 0; i4 < 16; i4++ )
{
for( i = 0; i < 16; i++ )
h->dct.block[i4].luma4x4[i] = h->dct.luma8x8[i4>>2][(i4&3)+i*4];
h->mb.cache.non_zero_count[x264_scan8[i4]] =
array_non_zero_count( h->dct.block[i4].luma4x4, 16 );
}
}
for( i = 0; i < 16; i++ )
{
if( h->mb.i_cbp_luma & ( 1 << ( i / 4 ) ) )
{
block_residual_write_cavlc( h, s, i, h->dct.block[i].luma4x4, 16 );
}
}
x264_macroblock_luma_write_cavlc( h, s );
}
if( h->mb.i_cbp_chroma != 0 )
{
......@@ -707,12 +705,8 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[0], 4 );
block_residual_write_cavlc( h, s, BLOCK_INDEX_CHROMA_DC, h->dct.chroma_dc[1], 4 );
if( h->mb.i_cbp_chroma&0x02 ) /* Chroma AC residual present */
{
for( i = 0; i < 8; i++ )
{
block_residual_write_cavlc( h, s, 16 + i, h->dct.block[16+i].residual_ac, 15 );
}
}
}
if( IS_INTRA( i_mb_type ) )
......
......@@ -1321,9 +1321,10 @@ do_encode:
if( i_slice_type == SLICE_TYPE_P && !h->param.rc.b_stat_read
&& h->param.i_scenecut_threshold >= 0 )
{
int i_mb_i = h->stat.frame.i_mb_count[I_4x4] + h->stat.frame.i_mb_count[I_16x16];
int i_mb_p = h->stat.frame.i_mb_count[P_L0] + h->stat.frame.i_mb_count[P_8x8];
int i_mb_s = h->stat.frame.i_mb_count[P_SKIP];
const int *mbs = h->stat.frame.i_mb_count;