Commit 1ab45c8f authored by Loren Merritt's avatar Loren Merritt

8x8 transform and 8x8 intra prediction.

(backend only, not yet used by mb analysis)


git-svn-id: svn://svn.videolan.org/x264/trunk@246 df754926-b1dd-0310-bc7b-ec298dee348c
parent e46db685
......@@ -48,17 +48,17 @@ BITS 64
SECTION .text
cglobal predict_8x8_v_mmx
cglobal predict_8x8c_v_mmx
cglobal predict_16x16_v_mmx
;-----------------------------------------------------------------------------
;
; void predict_8x8_v_mmx( uint8_t *src, int i_stride )
; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8_v_mmx :
predict_8x8c_v_mmx :
movsxd rcx, esi ; i_stride
sub rdi , rcx ; esi <-- line -1
......
......@@ -35,7 +35,7 @@ static int binCount = 0;
#endif
static const int x264_cabac_context_init_I[399][2] =
static const int x264_cabac_context_init_I[460][2] =
{
/* 0 - 10 */
{ 20, -15 }, { 2, 54 }, { 3, 74 }, { 20, -15 },
......@@ -181,10 +181,30 @@ static const int x264_cabac_context_init_I[399][2] =
{ 31, -7 }, { 35, -15 }, { 34, -3 }, { 34, 3 },
{ 36, -1 }, { 34, 5 }, { 32, 11 }, { 35, 5 },
{ 34, 12 }, { 39, 11 }, { 30, 29 }, { 34, 26 },
{ 29, 39 }, { 19, 66 }
{ 29, 39 }, { 19, 66 },
/* 399 -> 435 */
{ 31, 21 }, { 31, 31 }, { 25, 50 },
{ -17, 120 }, { -20, 112 }, { -18, 114 }, { -11, 85 },
{ -15, 92 }, { -14, 89 }, { -26, 71 }, { -15, 81 },
{ -14, 80 }, { 0, 68 }, { -14, 70 }, { -24, 56 },
{ -23, 68 }, { -24, 50 }, { -11, 74 }, { 23, -13 },
{ 26, -13 }, { 40, -15 }, { 49, -14 }, { 44, 3 },
{ 45, 6 }, { 44, 34 }, { 33, 54 }, { 19, 82 },
{ -3, 75 }, { -1, 23 }, { 1, 34 }, { 1, 43 },
{ 0, 54 }, { -2, 55 }, { 0, 61 }, { 1, 64 },
{ 0, 68 }, { -9, 92 },
/* 436 -> 459 */
{ -14, 106 }, { -13, 97 }, { -15, 90 }, { -12, 90 },
{ -18, 88 }, { -10, 73 }, { -9, 79 }, { -14, 86 },
{ -10, 73 }, { -10, 70 }, { -10, 69 }, { -5, 66 },
{ -9, 64 }, { -5, 58 }, { 2, 59 }, { 21, -10 },
{ 24, -11 }, { 28, -8 }, { 28, -1 }, { 29, 3 },
{ 29, 9 }, { 35, 20 }, { 29, 36 }, { 14, 67 }
};
static const int x264_cabac_context_init_PB[3][399][2] =
static const int x264_cabac_context_init_PB[3][460][2] =
{
/* i_cabac_init_idc == 0 */
{
......@@ -321,7 +341,25 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 23, 42 }, { 19, 57 }, { 22, 53 }, { 22, 61 },
{ 11, 86 },
/* 399 -> 435 */
{ 12, 40 }, { 11, 51 }, { 14, 59 },
{ -4, 79 }, { -7, 71 }, { -5, 69 }, { -9, 70 },
{ -8, 66 }, { -10, 68 }, { -19, 73 }, { -12, 69 },
{ -16, 70 }, { -15, 67 }, { -20, 62 }, { -19, 70 },
{ -16, 66 }, { -22, 65 }, { -20, 63 }, { 9, -2 },
{ 26, -9 }, { 33, -9 }, { 39, -7 }, { 41, -2 },
{ 45, 3 }, { 49, 9 }, { 45, 27 }, { 36, 59 },
{ -6, 66 }, { -7, 35 }, { -7, 42 }, { -8, 45 },
{ -5, 48 }, { -12, 56 }, { -6, 60 }, { -5, 62 },
{ -8, 66 }, { -8, 76 },
/* 436 -> 459 */
{ -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
{ -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 83 },
{ -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
{ -14, 66 }, { 0, 59 }, { 2, 59 }, { 21, -13 },
{ 33, -14 }, { 39, -7 }, { 46, -2 }, { 51, 2 },
{ 60, 6 }, { 61, 17 }, { 55, 34 }, { 42, 62 },
},
/* i_cabac_init_idc == 1 */
......@@ -459,6 +497,25 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 18, 50 }, { 12, 70 }, { 21, 54 }, { 14, 71 },
{ 11, 83 },
/* 399 -> 435 */
{ 24, 32 }, { 21, 49 }, { 21, 54 },
{ -5, 85 }, { -6, 81 }, { -10, 77 }, { -7, 81 },
{ -17, 80 }, { -18, 73 }, { -4, 74 }, { -10, 8 },
{ -9, 71 }, { -9, 67 }, { -1, 61 }, { -8, 66 },
{ -14, 66 }, { 0, 59 }, { 2, 59 }, { 17, -10 },
{ 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
{ 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
{ -5, 71 }, { 0, 24 }, { -1, 36 }, { -2, 42 },
{ -2, 52 }, { -9, 57 }, { -6, 53 }, { -4, 65 },
{ -4, 67 }, { -7, 82 },
/* 436 -> 459 */
{ -3, 81 }, { -3, 76 }, { -7, 72 }, { -6, 78 },
{ -12, 72 }, { -14, 68 }, { -3, 70 }, { -6, 76 },
{ -5, 66 }, { -5, 62 }, { 0, 57 }, { -4, 61 },
{ -9, 60 }, { 1, 54 }, { 2, 58 }, { 17, -10 },
{ 32, -13 }, { 42, -9 }, { 49, -5 }, { 53, 0 },
{ 64, 3 }, { 68, 10 }, { 66, 27 }, { 47, 57 },
},
/* i_cabac_init_idc == 2 */
......@@ -595,6 +652,26 @@ static const int x264_cabac_context_init_PB[3][399][2] =
{ 22, 42 }, { 16, 60 }, { 15, 52 }, { 14, 60 },
{ 3, 78 }, { -16, 123 }, { 21, 53 }, { 22, 56 },
{ 25, 61 },
/* 399 -> 435 */
{ 21, 33 }, { 19, 50 }, { 17, 61 },
{ -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
{ -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
{ -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
{ -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
{ 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
{ 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
{ -9, 71 }, { -7, 37 }, { -8, 44 }, { -11, 49 },
{ -10, 56 }, { -12, 59 }, { -8, 63 }, { -9, 67 },
{ -6, 68 }, { -10, 79 },
/* 436 -> 459 */
{ -3, 78 }, { -8, 74 }, { -9, 72 }, { -10, 72 },
{ -18, 75 }, { -12, 71 }, { -11, 63 }, { -5, 70 },
{ -17, 75 }, { -14, 72 }, { -16, 67 }, { -8, 53 },
{ -14, 59 }, { -9, 52 }, { -11, 68 }, { 9, -2 },
{ 30, -10 }, { 31, -4 }, { 33, -1 }, { 33, 7 },
{ 31, 12 }, { 37, 23 }, { 31, 38 }, { 20, 64 },
}
};
......@@ -720,7 +797,7 @@ static const int x264_cabac_entropy[128] =
*****************************************************************************/
void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int i_model )
{
const int (*cabac_context_init)[399][2];
const int (*cabac_context_init)[460][2];
int i;
if( i_slice_type == SLICE_TYPE_I )
......@@ -732,7 +809,7 @@ void x264_cabac_context_init( x264_cabac_t *cb, int i_slice_type, int i_qp, int
cabac_context_init = &x264_cabac_context_init_PB[i_model];
}
for( i = 0; i < 399; i++ )
for( i = 0; i < 436; i++ )
{
int i_pre_state;
......@@ -865,7 +942,7 @@ void x264_cabac_model_update( x264_cabac_t *cb, int i_slice_type, int i_qp )
i_cost = 0; /* fix8 */
for( i_ctx = 0; i_ctx < 399; i_ctx++ )
for( i_ctx = 0; i_ctx < 436; i_ctx++ )
{
int i_weight;
int i_model_state;
......
......@@ -34,12 +34,13 @@ typedef struct
} slice[3];
/* context */
/* states 436-459 are for interlacing, so are omitted for now */
struct
{
int i_state;
int i_mps;
int i_count;
} ctxstate[399];
} ctxstate[436];
/* state */
int i_low;
......
......@@ -104,7 +104,7 @@ void x264_param_default( x264_param_t *param )
param->i_log_level = X264_LOG_INFO;
/* */
param->analyse.intra = X264_ANALYSE_I4x4;
param->analyse.intra = X264_ANALYSE_I4x4 | X264_ANALYSE_I8x8;
param->analyse.inter = X264_ANALYSE_I4x4 | X264_ANALYSE_PSUB16x16 | X264_ANALYSE_BSUB16x16;
param->analyse.i_direct_mv_pred = X264_DIRECT_PRED_TEMPORAL;
param->analyse.i_me_method = X264_ME_HEX;
......
......@@ -289,6 +289,8 @@ struct x264_t
{
DECLARE_ALIGNED( int, luma16x16_dc[16], 16 );
DECLARE_ALIGNED( int, chroma_dc[2][4], 16 );
// FIXME merge with union
DECLARE_ALIGNED( int, luma8x8[4][64], 16 );
union
{
DECLARE_ALIGNED( int, residual_ac[15], 16 );
......@@ -326,6 +328,8 @@ struct x264_t
/* neighboring MBs */
unsigned int i_neighbour;
unsigned int i_neighbour8[4]; /* neighbours of each 8x8 or 4x4 block that are available */
unsigned int i_neighbour4[16]; /* at the time the block is coded */
int i_mb_type_top;
int i_mb_type_left;
int i_mb_type_topleft;
......@@ -343,11 +347,13 @@ struct x264_t
int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */
int16_t (*mvr[2][16])[2]; /* 16x16 mv for each possible ref */
int8_t *skipbp; /* block pattern for SKIP or DIRECT (sub)mbs. B-frames + cabac only */
int8_t *mb_transform_size; /* transform_size_8x8_flag of each mb */
/* current value */
int i_type;
int i_partition;
int i_sub_partition[4];
int b_transform_8x8;
int i_cbp_luma;
int i_cbp_chroma;
......@@ -373,7 +379,7 @@ struct x264_t
/* cache */
struct
{
/* real intra4x4_pred_mode if I_4X4, I_PRED_4x4_DC if mb available, -1 if not */
/* real intra4x4_pred_mode if I_4X4 or I_8X8, I_PRED_4x4_DC if mb available, -1 if not */
int intra4x4_pred_mode[X264_SCAN8_SIZE];
/* i_non_zero_count if availble else 0x80 */
......@@ -391,6 +397,9 @@ struct x264_t
int16_t direct_mv[2][X264_SCAN8_SIZE][2];
int8_t direct_ref[2][X264_SCAN8_SIZE];
/* top and left neighbors. 1=>8x8, 0=>4x4 */
int8_t transform_size[2];
} cache;
/* */
......@@ -427,7 +436,7 @@ struct x264_t
/* ? */
int i_misc_bits;
/* MB type counts */
int i_mb_count[18];
int i_mb_count[19];
int i_mb_count_p;
int i_mb_count_skip;
/* Estimated (SATD) cost as Intra/Predicted frame */
......@@ -449,13 +458,14 @@ struct x264_t
float f_psnr_mean_u[5];
float f_psnr_mean_v[5];
/* */
int64_t i_mb_count[5][18];
int64_t i_mb_count[5][19];
} stat;
/* CPU functions dependants */
x264_predict_t predict_16x16[4+3];
x264_predict_t predict_8x8[4+3];
x264_predict_t predict_8x8c[4+3];
x264_predict8x8_t predict_8x8[9+3];
x264_predict_t predict_4x4[9+3];
x264_pixel_function_t pixf;
......
......@@ -256,6 +256,136 @@ static void add16x16_idct( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] )
add8x8_idct( &p_dst[8*i_dst+8], i_dst, &dct[12] );
}
/****************************************************************************
* 8x8 transform:
****************************************************************************/
static inline void dct8_1d( int16_t src[8][8], int16_t dst[8][8] )
{
int i;
for( i = 0; i < 8; i++ )
{
const int s07 = src[i][0] + src[i][7];
const int s16 = src[i][1] + src[i][6];
const int s25 = src[i][2] + src[i][5];
const int s34 = src[i][3] + src[i][4];
const int a0 = s07 + s34;
const int a1 = s16 + s25;
const int a2 = s07 - s34;
const int a3 = s16 - s25;
const int d07 = src[i][0] - src[i][7];
const int d16 = src[i][1] - src[i][6];
const int d25 = src[i][2] - src[i][5];
const int d34 = src[i][3] - src[i][4];
const int a4 = d16 + d25 + (d07 + (d07>>1));
const int a5 = d07 - d34 - (d25 + (d25>>1));
const int a6 = d07 + d34 - (d16 + (d16>>1));
const int a7 = d16 - d25 + (d34 + (d34>>1));
dst[0][i] = a0 + a1;
dst[1][i] = a4 + (a7>>2);
dst[2][i] = a2 + (a3>>1);
dst[3][i] = a5 + (a6>>2);
dst[4][i] = a0 - a1;
dst[5][i] = a6 - (a5>>2);
dst[6][i] = (a2>>1) - a3;
dst[7][i] = (a4>>2) - a7;
}
}
static void sub8x8_dct8( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
int16_t d[8][8];
int16_t tmp[8][8];
int y, x;
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
d[y][x] = pix1[x] - pix2[x];
}
pix1 += i_pix1;
pix2 += i_pix2;
}
dct8_1d( d, tmp );
dct8_1d( tmp, dct );
}
static void sub16x16_dct8( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
{
sub8x8_dct8( dct[0], pix1, i_pix1, pix2, i_pix2 );
sub8x8_dct8( dct[1], &pix1[8], i_pix1, &pix2[8], i_pix2 );
sub8x8_dct8( dct[2], &pix1[8*i_pix1], i_pix1, &pix2[8*i_pix2], i_pix2 );
sub8x8_dct8( dct[3], &pix1[8*i_pix1+8], i_pix1, &pix2[8*i_pix2+8], i_pix2 );
}
static inline void idct8_1d( int16_t src[8][8], int16_t dst[8][8] )
{
int i;
for( i = 0; i < 8; i++ )
{
const int a0 = src[i][0] + src[i][4];
const int a2 = src[i][0] - src[i][4];
const int a4 = (src[i][2]>>1) - src[i][6];
const int a6 = (src[i][6]>>1) + src[i][2];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
dst[0][i] = b0 + b7;
dst[7][i] = b0 - b7;
dst[1][i] = b2 + b5;
dst[6][i] = b2 - b5;
dst[2][i] = b4 + b3;
dst[5][i] = b4 - b3;
dst[3][i] = b6 + b1;
dst[4][i] = b6 - b1;
}
}
static void add8x8_idct8( uint8_t *p_dst, int i_dst, int16_t dct[8][8] )
{
int16_t d[8][8];
int16_t tmp[8][8];
int y, x;
idct8_1d( dct, tmp );
idct8_1d( tmp, d );
for( y = 0; y < 8; y++ )
{
for( x = 0; x < 8; x++ )
{
p_dst[x] = clip_uint8( p_dst[x] + ((d[y][x] + 32) >> 6) );
}
p_dst += i_dst;
}
}
static void add16x16_idct8( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] )
{
add8x8_idct8( &p_dst[0], i_dst, dct[0] );
add8x8_idct8( &p_dst[8], i_dst, dct[1] );
add8x8_idct8( &p_dst[8*i_dst], i_dst, dct[2] );
add8x8_idct8( &p_dst[8*i_dst+8], i_dst, dct[3] );
}
/****************************************************************************
......@@ -269,8 +399,14 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->sub8x8_dct = sub8x8_dct;
dctf->add8x8_idct = add8x8_idct;
dctf->sub16x16_dct = sub16x16_dct;
dctf->add16x16_idct = add16x16_idct;
dctf->sub16x16_dct = sub16x16_dct;
dctf->add16x16_idct = add16x16_idct;
dctf->sub8x8_dct8 = sub8x8_dct8;
dctf->add8x8_idct8 = add8x8_idct8;
dctf->sub16x16_dct8 = sub16x16_dct8;
dctf->add16x16_idct8 = add16x16_idct8;
dctf->dct4x4dc = dct4x4dc;
dctf->idct4x4dc = idct4x4dc;
......
......@@ -35,6 +35,11 @@ typedef struct
void (*sub16x16_dct) ( int16_t dct[16][4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add16x16_idct) ( uint8_t *p_dst, int i_dst, int16_t dct[16][4][4] );
void (*sub8x8_dct8) ( int16_t dct[8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add8x8_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[8][8] );
void (*sub16x16_dct8) ( int16_t dct[4][8][8], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 );
void (*add16x16_idct8) ( uint8_t *p_dst, int i_dst, int16_t dct[4][8][8] );
void (*dct4x4dc) ( int16_t d[4][4] );
void (*idct4x4dc)( int16_t d[4][4] );
......
......@@ -644,6 +644,18 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
const int mb_4x4 = 4 * s4x4 * mb_y + 4 * mb_x;
int i_edge;
int i_dir;
const int b_8x8_transform = h->mb.mb_transform_size[mb_xy];
/* cavlc + 8x8 transform stores nnz per 16 coeffs for the purpose of
* entropy coding, but per 64 coeffs for the purpose of deblocking */
if( !h->param.b_cabac && b_8x8_transform )
{
uint32_t *nnz = (uint32_t*)h->mb.non_zero_count[mb_xy];
if( nnz[0] ) nnz[0] = 0x01010101;
if( nnz[1] ) nnz[1] = 0x01010101;
if( nnz[2] ) nnz[2] = 0x01010101;
if( nnz[3] ) nnz[3] = 0x01010101;
}
/* i_dir == 0 -> vertical edge
* i_dir == 1 -> horizontal edge */
......@@ -719,9 +731,12 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
if( i_dir == 0 )
{
/* vertical edge */
deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1);
if( (i_edge % 2) == 0 )
if( !b_8x8_transform || !(i_edge & 1) )
{
deblocking_filter_edgev( h, &h->fdec->plane[0][16 * mb_y * h->fdec->i_stride[0]+ 16 * mb_x + 4 * i_edge],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1);
}
if( !(i_edge & 1) )
{
/* U/V planes */
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
......@@ -735,10 +750,13 @@ void x264_frame_deblocking_filter( x264_t *h, int i_slice_type )
else
{
/* horizontal edge */
deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 );
if( !b_8x8_transform || !(i_edge & 1) )
{
deblocking_filter_edgeh( h, &h->fdec->plane[0][(16*mb_y + 4 * i_edge) * h->fdec->i_stride[0]+ 16 * mb_x],
h->fdec->i_stride[0], bS, (i_qp+i_qpn+1) >> 1 );
}
/* U/V planes */
if( ( i_edge % 2 ) == 0 )
if( !(i_edge & 1) )
{
int i_qpc = ( i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )] +
i_chroma_qp_table[x264_clip3( i_qpn + h->pps->i_chroma_qp_index_offset, 0, 51 )] + 1 ) >> 1;
......
......@@ -56,17 +56,17 @@ SECTION .data
SECTION .text
cglobal predict_8x8_v_mmx
cglobal predict_8x8c_v_mmx
cglobal predict_16x16_v_mmx
;-----------------------------------------------------------------------------
;
; void predict_8x8_v_mmx( uint8_t *src, int i_stride )
; void predict_8x8c_v_mmx( uint8_t *src, int i_stride )
;
;-----------------------------------------------------------------------------
ALIGN 16
predict_8x8_v_mmx :
predict_8x8c_v_mmx :
;push edi
;push esi
......
......@@ -177,7 +177,7 @@ static void predict_16x16_v( uint8_t *src, int i_stride )
/****************************************************************************
* 8x8 prediction for intra chroma block DC, H, V, P
****************************************************************************/
static void predict_8x8_dc_128( uint8_t *src, int i_stride )
static void predict_8x8c_dc_128( uint8_t *src, int i_stride )
{
int y;
......@@ -191,7 +191,7 @@ static void predict_8x8_dc_128( uint8_t *src, int i_stride )
src += i_stride;
}
}
static void predict_8x8_dc_left( uint8_t *src, int i_stride )
static void predict_8x8c_dc_left( uint8_t *src, int i_stride )
{
int y;
uint32_t dc0 = 0, dc1 = 0;
......@@ -222,7 +222,7 @@ static void predict_8x8_dc_left( uint8_t *src, int i_stride )
}
}
static void predict_8x8_dc_top( uint8_t *src, int i_stride )
static void predict_8x8c_dc_top( uint8_t *src, int i_stride )
{
int y, x;
uint32_t dc0 = 0, dc1 = 0;
......@@ -244,7 +244,7 @@ static void predict_8x8_dc_top( uint8_t *src, int i_stride )
src += i_stride;
}
}
static void predict_8x8_dc( uint8_t *src, int i_stride )
static void predict_8x8c_dc( uint8_t *src, int i_stride )
{
int y;
int s0 = 0, s1 = 0, s2 = 0, s3 = 0;
......@@ -291,7 +291,7 @@ static void predict_8x8_dc( uint8_t *src, int i_stride )
}
}
static void predict_8x8_h( uint8_t *src, int i_stride )
static void predict_8x8c_h( uint8_t *src, int i_stride )
{
int i;
......@@ -307,10 +307,10 @@ static void predict_8x8_h( uint8_t *src, int i_stride )
}
}
extern void predict_8x8_v_mmx( uint8_t *src, int i_stride );
extern void predict_8x8c_v_mmx( uint8_t *src, int i_stride );
#if 0
static void predict_8x8_v( uint8_t *src, int i_stride )
static void predict_8x8c_v( uint8_t *src, int i_stride )
{
int i;
......@@ -326,7 +326,7 @@ static void predict_8x8_v( uint8_t *src, int i_stride )
/****************************************************************************
* 4x4 prediction for intra luma block DC, H, V, P
* 4x4 prediction for intra luma block
****************************************************************************/
static void predict_4x4_dc_128( uint8_t *src, int i_stride )
{
......@@ -422,14 +422,14 @@ void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] )
pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128;
}
void x264_predict_8x8_init_mmxext( x264_predict_t pf[7] )
void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] )
{
pf[I_PRED_CHROMA_V ] = predict_8x8_v_mmx;
pf[I_PRED_CHROMA_H ] = predict_8x8_h;
pf[I_PRED_CHROMA_DC] = predict_8x8_dc;
pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8_dc_left;
pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8_dc_top;
pf[I_PRED_CHROMA_DC_128 ]= predict_8x8_dc_128;
pf[I_PRED_CHROMA_V ] = predict_8x8c_v_mmx;
pf[I_PRED_CHROMA_H ] = predict_8x8c_h;
pf[I_PRED_CHROMA_DC] = predict_8x8c_dc;
pf[I_PRED_CHROMA_DC_LEFT]= predict_8x8c_dc_left;
pf[I_PRED_CHROMA_DC_TOP ]= predict_8x8c_dc_top;
pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128;
}
void x264_predict_4x4_init_mmxext( x264_predict_t pf[12] )
......
......@@ -25,7 +25,7 @@
#define _I386_PREDICT_H 1
void x264_predict_16x16_init_mmxext ( x264_predict_t pf[7] );
void x264_predict_8x8_init_mmxext ( x264_predict_t pf[7] );
void x264_predict_8x8c_init_mmxext ( x264_predict_t pf[7] );
void x264_predict_4x4_init_mmxext ( x264_predict_t pf[12] );
#endif
......@@ -54,23 +54,71 @@ static const int dequant_mf[6][4][4] =
{ {18, 23, 18, 23}, {23, 29, 23, 29}, {18, 23, 18, 23}, {23, 29, 23, 29} }
};
#if 0
static const int i_chroma_qp_table[52] =
static const int dequant8_mf[6][8][8] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
39, 39
{
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18},
{20, 19, 25, 19, 20, 19, 25, 19},
{19, 18, 24, 18, 19, 18, 24, 18},
{25, 24, 32, 24, 25, 24, 32, 24},
{19, 18, 24, 18, 19, 18, 24, 18}
}, {
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19},
{22, 21, 28, 21, 22, 21, 28, 21},
{21, 19, 26, 19, 21, 19, 26, 19},
{28, 26, 35, 26, 28, 26, 35, 26},
{21, 19, 26, 19, 21, 19, 26, 19}
}, {
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23},
{26, 24, 33, 24, 26, 24, 33, 24},
{24, 23, 31, 23, 24, 23, 31, 23},
{33, 31, 42, 31, 33, 31, 42, 31},
{24, 23, 31, 23, 24, 23, 31, 23}
}, {
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25},
{28, 26, 35, 26, 28, 26, 35, 26},
{26, 25, 33, 25, 26, 25, 33, 25},
{35, 33, 45, 33, 35, 33, 45, 33},
{26, 25, 33, 25, 26, 25, 33, 25}
}, {
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28},
{32, 30, 40, 30, 32, 30, 40, 30},
{30, 28, 38, 28, 30, 28, 38, 28},
{40, 38, 51, 38, 40, 38, 51, 38},
{30, 28, 38, 28, 30, 28, 38, 28}
}, {
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32},
{36, 34, 46, 34, 36, 34, 46, 34},
{34, 32, 43, 32, 34, 32, 43, 32},
{46, 43, 58, 43, 46, 43, 58, 43},
{34, 32, 43, 32, 34, 32, 43, 32}
}
};
#endif
int x264_mb_predict_intra4x4_mode( x264_t *h, int idx )
{
const int ma = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 1];
const int mb = h->mb.cache.intra4x4_pred_mode[x264_scan8[idx] - 8];
const int m = X264_MIN( ma, mb );
const int m = X264_MIN( x264_mb_pred_mode4x4_fix(ma),
x264_mb_pred_mode4x4_fix(mb) );
if( m < 0 )
return I_PRED_4x4_DC;
......@@ -92,6 +140,24 @@ int x264_mb_predict_non_zero_code( x264_t *h, int idx )
return i_ret & 0x7f;
}
int x264_mb_transform_8x8_allowed( x264_t *h, int i_mb_type )
{
int i;
if( i_mb_type == P_8x8 || i_mb_type == B_8x8 )