Commit 1ab45c8f authored by Loren Merritt's avatar Loren Merritt

8x8 transform and 8x8 intra prediction.

(backend only, not yet used by mb analysis)


git-svn-id: svn://svn.videolan.org/x264/trunk@246 df754926-b1dd-0310-bc7b-ec298dee348c
parent e46db685
...@@ -48,17 +48,17 @@ BITS 64 ...@@ -48,17 +48,17 @@ BITS 64
SECTION .text SECTION .text
cglobal predict_8x8_v_mmx cglobal predict_8x8c_v_mmx
cglobal predict_16x16_v_mmx cglobal predict_16x16_v_mmx
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; ;
; void predict_8x8_v_mmx( uint8_t *src, int i_stride ) ; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
; ;
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
ALIGN 16 ALIGN 16
predict_8x8_v_mmx : predict_8x8c_v_mmx :
movsxd rcx, esi ; i_stride movsxd rcx, esi ; i_stride
sub rdi , rcx ; esi <-- line -1 sub rdi , rcx ; esi <-- line -1
......
...@@ -35,7 +35,7 @@ static int binCount = 0; ...@@ -35,7 +35,7 @@ static int binCount = 0;
#endif #endif
static const int x264_cabac_context_init_I[399][2] = static const int x264_cabac_context_init_I[460][2] =
{ {
/* 0 - 10 */ /* 0 - 10 */
{ 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 }, { 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 },
...@@ -181,10 +181,30 @@ static const int x264_cabac_context_init_I[399][2] = ...@@ -181,10 +181,30 @@ static const int x264_cabac_context_init_I[399][2] =
{ 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 }, { 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 },
{ 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 }, { 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 },
{ 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 }, { 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 },
{ 29, 39 }, { 19, 66 } { 29, 39 }, { 19, 66 },
/* 399 -> 435 */
{ 31, 21 }, { 31, 31 }, { 25, 50 },
{ -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 },
{ -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 },
{ -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 },
{ -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 },
{ 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 },
{ 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 },
{ -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 },
{ 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 },
{ 0, 68 }, { -9, 92 },
/* 436 -> 459 */
{ -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 },
{ -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 },
{ -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 },
{ -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 },
{ 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 },
{ 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }
}; };
static const int x264_cabac_context_init_PB[3][399][2] = static const int x264_cabac_context_init_PB[3][460][2] =
{ {
/* i_cabac_init_idc == 0 */ /* i_cabac_init_idc == 0 */
{ {
...@@ -321,7 +341,25 @@ static const int x264_cabac_context_init_PB[3][399][2] = ...@@ -321,7 +341,25 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 }, { 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 },
{ 11, 86 }, { 11, 86 },
/* 399 -> 435 */
{ 12, 40 }, { 11, 51 }, { 14, 59 },
{ -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 },
{ -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 },
{ -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 },
{ -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 },
{ 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 },
{ 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 },
{ -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 },
{ -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 },
{ -8, 66 }, { -8, 76 },
/* 436 -> 459 */
{ -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
{ -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 },
{ -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
{ -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 },
{ 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 },
{ 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 },
}, },
/* i_cabac_init_idc == 1 */ /* i_cabac_init_idc == 1 */
...@@ -459,6 +497,25 @@ static const int x264_cabac_context_init_PB[3][399][2] = ...@@ -459,6 +497,25 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 }, { 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 },
{ 11, 83 }, { 11, 83 },
/* 399 -> 435 */
{ 24, 32 }, { 21, 49 }, { 21, 54 },
{ -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
{ -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 8 },
{ -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
{ -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 },
{ 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
{ 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
{ -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 },
{ -2, 52 }, { -9, 57 }, { -6, 53 }, { -4, 65 },
{ -4, 67 }, { -7, 82 },
/* 436 -> 459 */
{ -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 },
{ -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 },
{ -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 },
{ -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 },
{ 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
{ 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
}, },
/* i_cabac_init_idc == 2 */ /* i_cabac_init_idc == 2 */
...@@ -595,6 +652,26 @@ static const int x264_cabac_context_init_PB[3][399][2] = ...@@ -595,6 +652,26 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 }, { 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 },
{ 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 }, { 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 },
{ 25, 61 }, { 25, 61 },
/* 399 -> 435 */
{ 21, 33 }, { 19, 50 }, { 17, 61 },
{ -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
{ -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
{ -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
{ -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
{ 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
{ 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
{ -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 },
{ -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 },
{ -6, 68 }, { -10, 79 },
/* 436 -> 459 */
{ -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
{ -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
{ -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
{ -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
{ 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
{ 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
} }
}; };
...@@ -720,7 +797,7 @@ static const int x264_cabac_entropy[128] = ...@@ -720,7 +797,7 @@ static const int x264_cabac_entropy[128] =
*****************************************************************************/ *****************************************************************************/
void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model ) void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
{ {
const int (*cabac_context_init)[399][2]; const int (*cabac_context_init)[460][2];
int i; int i;
if( i_slice_type == SLICE_TYPE_I ) if( i_slice_type == SLICE_TYPE_I )
...@@ -732,7 +809,7 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int ...@@ -732,7 +809,7 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
cabac_context_init = &x264_cabac_context_init_PB[i_model]; cabac_context_init = &x264_cabac_context_init_PB[i_model];
} }
for( i = 0; i < 399; i++ ) for( i = 0; i < 436; i++ )
{ {
int i_pre_state; int i_pre_state;
...@@ -865,7 +942,7 @@ void x264_cabac_model_update( x264_cabac_t *cb, int i_slice_type, int i_qp ) ...@@ -865,7 +942,7 @@ void x264_cabac_model_update( x264_cabac_t *cb, int i_slice_type, int i_qp )
i_cost = 0; /* fix8 */ i_cost = 0; /* fix8 */
for( i_ctx = 0; i_ctx < 399; i_ctx++ ) for( i_ctx = 0; i_ctx < 436; i_ctx++ )
{ {
int i_weight; int i_weight;
int i_model_state; int i_model_state;
......
...@@ -34,12 +34,13 @@ typedef struct ...@@ -34,12 +34,13 @@ typedef struct
} slice[3]; } slice[3];
/* context */ /* context */
/* states 436-459 are for interlacing, so are omitted for now */
struct struct
{ {
int i_state; int i_state;
int i_mps; int i_mps;
int i_count; int i_count;
} ctxstate[399]; } ctxstate[436];
/* state */ /* state */
int i_low; int i_low;
......
...@@ -104,7 +104,7 @@ void x264_param_default( x264_param_t *param ) ...@@ -104,7 +104,7 @@ void x264_param_default( x264_param_t *param )
param->i_log_level = X264_LOG_INFO; param->i_log_level = X264_LOG_INFO;
/* */ /* */
param->analyse.intra = X264_ANALYSE_I4x4; param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16; param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL; param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_me_method = X264_ME_HEX; param->analyse.i_me_method = X264_ME_HEX;
......
...@@ -289,6 +289,8 @@ struct x264_t ...@@ -289,6 +289,8 @@ struct x264_t
{ {
DECLARE_ALIGNED( int, luma16x16_dc[16], 16 ); DECLARE_ALIGNED( int, luma16x16_dc[16], 16 );
DECLARE_ALIGNED( int, chroma_dc[2][4], 16 ); DECLARE_ALIGNED( int, chroma_dc[2][4], 16 );
// FIXME merge with union
DECLARE_ALIGNED( int, luma8x8[4][64], 16 );
union union
{ {
DECLARE_ALIGNED( int, residual_ac[15], 16 ); DECLARE_ALIGNED( int, residual_ac[15], 16 );
...@@ -326,6 +328,8 @@ struct x264_t ...@@ -326,6 +328,8 @@ struct x264_t
/* neighboring MBs */ /* neighboring MBs */
unsigned int i_neighbour; unsigned int i_neighbour;
unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */
unsigned int i_neighbour4[16]; /* at the time the block is coded */
int i_mb_type_top; int i_mb_type_top;
int i_mb_type_left; int i_mb_type_left;
int i_mb_type_topleft; int i_mb_type_topleft;
...@@ -343,11 +347,13 @@ struct x264_t ...@@ -343,11 +347,13 @@ struct x264_t
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */ int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */ int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */ int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
/* current value */ /* current value */
int i_type; int i_type;
int i_partition; int i_partition;
int i_sub_partition[4]; int i_sub_partition[4];
int b_transform_8x8;
int i_cbp_luma; int i_cbp_luma;
int i_cbp_chroma; int i_cbp_chroma;
...@@ -373,7 +379,7 @@ struct x264_t ...@@ -373,7 +379,7 @@ struct x264_t
/* cache */ /* cache */
struct struct
{ {
/* real intra4x4_pred_mode if I_4X4, I_PRED_4x4_DC if mb available, -1 if not */ /* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
int intra4x4_pred_mode[X264_SCAN8_SIZE]; int intra4x4_pred_mode[X264_SCAN8_SIZE];
/* i_non_zero_count if availble else 0x80 */ /* i_non_zero_count if availble else 0x80 */
...@@ -391,6 +397,9 @@ struct x264_t ...@@ -391,6 +397,9 @@ struct x264_t
int16_t direct_mv[2][X264_SCAN8_SIZE][2]; int16_t direct_mv[2][X264_SCAN8_SIZE][2];
int8_t direct_ref[2][X264_SCAN8_SIZE]; int8_t direct_ref[2][X264_SCAN8_SIZE];
/* top and left neighbors. 1=>8x8, 0=>4x4 */
int8_t transform_size[2];
} cache; } cache;
/* */ /* */
...@@ -427,7 +436,7 @@ struct x264_t ...@@ -427,7 +436,7 @@ struct x264_t
/* ? */ /* ? */
int i_misc_bits; int i_misc_bits;
/* MB type counts */ /* MB type counts */
int i_mb_count[18]; int i_mb_count[19];
int i_mb_count_p; int i_mb_count_p;
int i_mb_count_skip; int i_mb_count_skip;
/* Estimated (SATD) cost as Intra/Predicted frame */ /* Estimated (SATD) cost as Intra/Predicted frame */
...@@ -449,13 +458,14 @@ struct x264_t ...@@ -449,13 +458,14 @@ struct x264_t
float f_psnr_mean_u[5]; float f_psnr_mean_u[5];
float f_psnr_mean_v[5]; float f_psnr_mean_v[5];
/* */ /* */
int64_t i_mb_count[5][18]; int64_t i_mb_count[5][19];
} stat; } stat;
/* CPU functions dependants */ /* CPU functions dependants */
x264_predict_t predict_16x16[4+3]; x264_predict_t predict_16x16[4+3];
x264_predict_t predict_8x8[4+3]; x264_predict_t predict_8x8c[4+3];
x264_predict8x8_t predict_8x8[9+3];
x264_predict_t predict_4x4[9+3]; x264_predict_t predict_4x4[9+3];
x264_pixel_function_t pixf; x264_pixel_function_t pixf;
......
...@@ -256,6 +256,136 @@ static void add16x16_idct( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] ) ...@@ -256,6 +256,136 @@ static void add16x16_idct( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
add8x8_idct( &p_dst[8*i_dst+8], i_dst, &dct[12] ); add8x8_idct( &p_dst[8*i_dst+8], i_dst, &dct[12] );
} }
/****************************************************************************
* 8x8 transform:
****************************************************************************/
static inline void dct8_1d( int16_t src[8][8], int16_t dst[8][8] )
{
int i;
for( i = 0; i < 8; i++ )
{
const int s07 = src[i][0] + src[i][7];
const int s16 = src[i][1] + src[i][6];
const int s25 = src[i][2] + src[i][5];
const int s34 = src[i][3] + src[i][4];
const int a0 = s07 + s34;
const int a1 = s16 + s25;
const int a2 = s07 - s34;
const int a3 = s16 - s25;
const int d07 = src[i][0] - src[i][7];
const int d16 = src[i][1] - src[i][6];
const int d25 = src[i][2] - src[i][5];
const int d34 = src[i][3] - src[i][4];
const int a4 = d16 + d25 + (d07 + (d07>>1));
const int a5 = d07 - d34 - (d25 + (d25>>1));
const int a6 = d07 + d34 - (d16 + (d16>>1));
const int a7 = d16 - d25 + (d34 + (d34>>1));
dst[0][i] = a0 + a1;
dst[1][i] = a4 + (a7>>2);
dst[2][i] = a2 + (a3>>1);
dst[3][i] = a5 + (a6>>2);
dst[4][i] = a0 - a1;
dst[5][i] = a6 - (a5>>2);
dst[6][i] = (a2>>1) - a3;
dst[7][i] = (a4>>2) - a7;
}
}
static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int16_t d[8][8];
int16_t tmp[8][8];
int y, x;
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
d[y][x] = pix1[x] - pix2[x];
}
pix1 += i_pix1;
pix2 += i_pix2;
}
dct8_1d( d, tmp );
dct8_1d( tmp, dct );
}
static void sub16x16_dct8( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
sub8x8_dct8( dct[0], pix1, i_pix1, pix2, i_pix2 );
sub8x8_dct8( dct[1], &pix1[8], i_pix1, &pix2[8], i_pix2 );
sub8x8_dct8( dct[2], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
sub8x8_dct8( dct[3], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
}
static inline void idct8_1d( int16_t src[8][8], int16_t dst[8][8] )
{
int i;
for( i = 0; i < 8; i++ )
{
const int a0 = src[i][0] + src[i][4];
const int a2 = src[i][0] - src[i][4];
const int a4 = (src[i][2]>>1) - src[i][6];
const int a6 = (src[i][6]>>1) + src[i][2];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
dst[0][i] = b0 + b7;
dst[7][i] = b0 - b7;
dst[1][i] = b2 + b5;
dst[6][i] = b2 - b5;
dst[2][i] = b4 + b3;
dst[5][i] = b4 - b3;
dst[3][i] = b6 + b1;
dst[4][i] = b6 - b1;
}
}
static void add8x8_idct8( uint8_t *p_dst, int i_dst, int16_t dct[8][8] )
{
int16_t d[8][8];
int16_t tmp[8][8];
int y, x;
idct8_1d( dct, tmp );
idct8_1d( tmp, d );
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
p_dst[x] = clip_uint8( p_dst[x] + ((d[y][x] + 32) >> 6) );
}
p_dst += i_dst;
}
}
static void add16x16_idct8( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] )
{
add8x8_idct8( &p_dst[0], i_dst, dct[0] );
add8x8_idct8( &p_dst[8], i_dst, dct[1] );
add8x8_idct8( &p_dst[8*i_dst], i_dst, dct[2] );
add8x8_idct8( &p_dst[8*i_dst+8], i_dst, dct[3] );
}
/**************************************************************************** /****************************************************************************
...@@ -269,8 +399,14 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) ...@@ -269,8 +399,14 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->sub8x8_dct = sub8x8_dct; dctf->sub8x8_dct = sub8x8_dct;
dctf->add8x8_idct = add8x8_idct; dctf->add8x8_idct = add8x8_idct;
dctf->sub16x16_dct = sub16x16_dct; dctf->sub16x16_dct = sub16x16_dct;
dctf->add16x16_idct = add16x16_idct; dctf->add16x16_idct = add16x16_idct;
dctf->sub8x8_dct8 = sub8x8_dct8;
dctf->add8x8_idct8 = add8x8_idct8;
dctf->sub16x16_dct8 = sub16x16_dct8;
dctf->add16x16_idct8 = add16x16_idct8;
dctf->dct4x4dc = dct4x4dc; dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc; dctf->idct4x4dc = idct4x4dc;
......
...@@ -35,6 +35,11 @@ typedef struct ...@@ -35,6 +35,11 @@ typedef struct
void (*sub16x16_dct) ( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 ); void (*sub16x16_dct) ( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add16x16_idct) ( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] ); void (*add16x16_idct) ( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
void (*sub8x8_dct8) ( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add8x8_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[8][8] );
void (*sub16x16_dct8) ( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add16x16_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] );
void (*dct4x4dc) ( int16_t d[4][4] ); void (*dct4x4dc) ( int16_t d[4][4] );
void (*idct4x4dc)( int16_t d[4][4] ); void (*idct4x4dc)( int16_t d[4][4] );
......
...@@ -644,6 +644,18 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) ...@@ -644,6 +644,18 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x; const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
int i_edge; int i_edge;
int i_dir; int i_dir;
const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
* entropy coding, but per 64 coeffs for the purpose of deblocking */
if( !h->param.b_cabac && b_8x8_transform )
{
uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy];
if( nnz[0] ) nnz[0] = 0x01010101;
if( nnz[1] ) nnz[1] = 0x01010101;
if( nnz[2] ) nnz[2] = 0x01010101;
if( nnz[3] ) nnz[3] = 0x01010101;
}
/* i_dir == 0 -> vertical edge /* i_dir == 0 -> vertical edge
* i_dir == 1 -> horizontal edge */ * i_dir == 1 -> horizontal edge */
...@@ -719,9 +731,12 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) ...@@ -719,9 +731,12 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
if( i_dir == 0 ) if( i_dir == 0 )
{ {
/* vertical edge */ /* vertical edge */
deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge], if( !b_8x8_transform || !(i_edge & 1) )
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1); {
if( (i_edge % 2) == 0 ) deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1);
}
if( !(i_edge & 1) )
{ {
/* U/V planes */ /* U/V planes */
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] + int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
...@@ -735,10 +750,13 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type ) ...@@ -735,10 +750,13 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
else else
{ {
/* horizontal edge */ /* horizontal edge */
deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x], if( !b_8x8_transform || !(i_edge & 1) )
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 ); {
deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 );
}
/* U/V planes */ /* U/V planes */
if( ( i_edge % 2 ) == 0 ) if( !(i_edge & 1) )
{ {
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +