Commit 6b4ad5f5 authored by Fiona Glaser's avatar Fiona Glaser

Fix and enable I_PCM macroblock support

In RD mode, always consider PCM as a macroblock mode possibility
Fix bitstream writing for PCM blocks in CAVLC and CABAC, and a few other minor changes to make PCM work.
PCM macroblocks improve compression at very low QPs (1-5) and in lossless mode.
parent 05d7fb66
......@@ -50,6 +50,7 @@
#define X264_THREAD_MAX 128
#define X264_SLICE_MAX 4
#define X264_NAL_MAX (4 + X264_SLICE_MAX)
#define X264_PCM_COST (386*8)
// number of pixels (per thread) in progress at any given time.
// 16 for the macroblock in progress + 3 for deblocking + 3 for motion compensation filter + 2 for extra safety
......
......@@ -1399,7 +1399,7 @@ void x264_macroblock_cache_save( x264_t *h )
if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
h->mb.i_qp = h->mb.i_last_qp;
h->mb.qp[i_mb_xy] = h->mb.i_qp;
h->mb.qp[i_mb_xy] = i_mb_type != I_PCM ? h->mb.i_qp : 0;
h->mb.i_last_dqp = h->mb.i_qp - h->mb.i_last_qp;
h->mb.i_last_qp = h->mb.i_qp;
......@@ -1418,7 +1418,10 @@ void x264_macroblock_cache_save( x264_t *h )
if( i_mb_type == I_PCM )
{
h->mb.i_cbp_chroma = 2;
h->mb.i_cbp_luma = 0xf;
h->mb.cbp[i_mb_xy] = 0x72f; /* all set */
h->mb.b_transform_8x8 = 0;
for( i = 0; i < 16 + 2*4; i++ )
non_zero_count[i] = 16;
}
......
......@@ -55,7 +55,7 @@ static const uint8_t x264_pred_i4x4_neighbors[12] =
/* XXX mb_type isn't the one written in the bitstream -> only internal usage */
#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 )
#define IS_INTRA(type) ( (type) == I_4x4 || (type) == I_8x8 || (type) == I_16x16 || (type) == I_PCM )
#define IS_SKIP(type) ( (type) == P_SKIP || (type) == B_SKIP )
#define IS_DIRECT(type) ( (type) == B_DIRECT )
enum mb_class_e
......
......@@ -96,6 +96,8 @@ typedef struct
int i_satd_i4x4;
int i_predict4x4[16];
int i_satd_pcm;
/* Chroma part */
int i_satd_i8x8chroma;
int i_satd_i8x8chroma_dir[4];
......@@ -223,6 +225,9 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
a->i_satd_i4x4 =
a->i_satd_i8x8chroma = COST_MAX;
/* non-RD PCM decision is inaccurate, so don't do it */
a->i_satd_pcm = a->b_mbrd ? ((uint64_t)X264_PCM_COST*a->i_lambda2 + 128) >> 8 : COST_MAX;
a->b_fast_intra = 0;
h->mb.i_skip_intra =
h->mb.b_lossless ? 0 :
......@@ -2066,15 +2071,12 @@ void x264_macroblock_analyse( x264_t *h )
i_cost = analysis.i_satd_i16x16;
h->mb.i_type = I_16x16;
if( analysis.i_satd_i4x4 < i_cost )
{
i_cost = analysis.i_satd_i4x4;
h->mb.i_type = I_4x4;
}
if( analysis.i_satd_i8x8 < i_cost )
h->mb.i_type = I_8x8;
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 );
if( analysis.i_satd_pcm < i_cost )
h->mb.i_type = I_PCM;
if( h->mb.i_subpel_refine >= 7 )
else if( h->mb.i_subpel_refine >= 7 )
x264_intra_rd_refine( h, &analysis );
}
else if( h->sh.i_type == SLICE_TYPE_P )
......@@ -2285,6 +2287,7 @@ void x264_macroblock_analyse( x264_t *h )
i_intra_cost = analysis.i_satd_i16x16;
COPY2_IF_LT( i_intra_cost, analysis.i_satd_i8x8, i_intra_type, I_8x8 );
COPY2_IF_LT( i_intra_cost, analysis.i_satd_i4x4, i_intra_type, I_4x4 );
COPY2_IF_LT( i_intra_cost, analysis.i_satd_pcm, i_intra_type, I_PCM );
COPY2_IF_LT( i_cost, i_intra_cost, i_type, i_intra_type );
if( i_intra_cost == COST_MAX )
......@@ -2295,7 +2298,7 @@ void x264_macroblock_analyse( x264_t *h )
h->stat.frame.i_inter_cost += i_cost;
h->stat.frame.i_mbs_analysed++;
if( h->mb.i_subpel_refine >= 7 )
if( h->mb.i_subpel_refine >= 7 && h->mb.i_type != I_PCM )
{
if( IS_INTRA( h->mb.i_type ) )
{
......@@ -2566,11 +2569,12 @@ void x264_macroblock_analyse( x264_t *h )
COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );
COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM );
h->mb.i_type = i_type;
h->mb.i_partition = i_partition;
if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) )
if( analysis.b_mbrd && h->mb.i_subpel_refine >= 7 && IS_INTRA( i_type ) && i_type != I_PCM )
x264_intra_rd_refine( h, &analysis );
else if( h->param.analyse.b_bidir_me )
refine_bidir( h, &analysis );
......@@ -2612,6 +2616,9 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a )
x264_mb_analyse_intra_chroma( h, a );
break;
case I_PCM:
break;
case P_L0:
switch( h->mb.i_partition )
{
......
......@@ -543,14 +543,12 @@ static int x264_cabac_mb_cbf_ctxidxinc( x264_t *h, int i_cat, int i_idx )
if( h->mb.i_neighbour & MB_LEFT )
{
i_mba_xy = h->mb.i_mb_xy - 1;
if( h->mb.i_mb_type_left == I_16x16 )
i_nza = h->mb.cbp[i_mba_xy] & 0x100;
i_nza = h->mb.cbp[i_mba_xy] & 0x100;
}
if( h->mb.i_neighbour & MB_TOP )
{
i_mbb_xy = h->mb.i_mb_top_xy;
if( h->mb.i_mb_type_top == I_16x16 )
i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
i_nzb = h->mb.cbp[i_mbb_xy] & 0x100;
}
}
else if( i_cat == DCT_LUMA_AC || i_cat == DCT_LUMA_4x4 )
......@@ -785,36 +783,35 @@ void x264_macroblock_write_cabac( x264_t *h, x264_cabac_t *cb )
/* Write the MB type */
x264_cabac_mb_type( h, cb );
/* PCM special block type UNTESTED */
#ifndef RDO_SKIP_BS
if( i_mb_type == I_PCM )
{
#ifdef RDO_SKIP_BS
cb->f8_bits_encoded += (384*8) << 8;
#else
if( cb->p + 385 >= cb->p_end )
return; //FIXME throw an error
/* Luma */
for( i = 0; i < 16; i++ )
{
memcpy( cb->p, h->fenc->plane[0] + i*h->mb.pic.i_stride[0], 16 );
cb->p += 16;
}
/* Cb */
i_mb_pos_tex = x264_cabac_pos( cb );
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
memcpy( cb->p, h->mb.pic.p_fenc[0], 256 );
cb->p += 256;
for( i = 0; i < 8; i++ )
{
memcpy( cb->p, h->fenc->plane[1] + i*h->mb.pic.i_stride[1], 8 );
cb->p += 8;
}
/* Cr */
memcpy( cb->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
cb->p += 64;
for( i = 0; i < 8; i++ )
{
memcpy( cb->p, h->fenc->plane[2] + i*h->mb.pic.i_stride[2], 8 );
cb->p += 8;
}
x264_cabac_encode_init( cb, cb->p, cb->p_end );
#endif
memcpy( cb->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
cb->p += 64;
cb->i_low = 0;
cb->i_range = 0x01FE;
cb->i_queue = -1;
cb->i_bytes_outstanding = 0;
/* if PCM is chosen, we need to store reconstructed frame data */
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
h->stat.frame.i_itex_bits += x264_cabac_pos( cb ) - i_mb_pos_tex;
return;
}
#endif
if( IS_INTRA( i_mb_type ) )
{
......
......@@ -339,44 +339,39 @@ void x264_macroblock_write_cavlc( x264_t *h, bs_t *s )
bs_write1( s, h->mb.b_interlaced );
}
/* Write:
- type
- prediction
- mv */
if( i_mb_type == I_PCM )
#ifndef RDO_SKIP_BS
if( i_mb_type == I_PCM)
{
/* Untested */
bs_write_ue( s, i_mb_i_offset + 25 );
i_mb_pos_tex = bs_pos( s );
h->stat.frame.i_hdr_bits += i_mb_pos_tex - i_mb_pos_start;
#ifdef RDO_SKIP_BS
s->i_bits_encoded += 384*8;
#else
bs_align_0( s );
/* Luma */
for( i = 0; i < 16*16; i++ )
{
const int x = 16 * h->mb.i_mb_x + (i % 16);
const int y = 16 * h->mb.i_mb_y + (i / 16);
bs_write( s, 8, h->fenc->plane[0][y*h->mb.pic.i_stride[0]+x] );
}
/* Cb */
for( i = 0; i < 8*8; i++ )
{
const int x = 8 * h->mb.i_mb_x + (i % 8);
const int y = 8 * h->mb.i_mb_y + (i / 8);
bs_write( s, 8, h->fenc->plane[1][y*h->mb.pic.i_stride[1]+x] );
}
/* Cr */
for( i = 0; i < 8*8; i++ )
{
const int x = 8 * h->mb.i_mb_x + (i % 8);
const int y = 8 * h->mb.i_mb_y + (i / 8);
bs_write( s, 8, h->fenc->plane[2][y*h->mb.pic.i_stride[2]+x] );
}
#endif
memcpy( s->p, h->mb.pic.p_fenc[0], 256 );
s->p += 256;
for( i = 0; i < 8; i++ )
memcpy( s->p + i*8, h->mb.pic.p_fenc[1] + i*FENC_STRIDE, 8 );
s->p += 64;
for( i = 0; i < 8; i++ )
memcpy( s->p + i*8, h->mb.pic.p_fenc[2] + i*FENC_STRIDE, 8 );
s->p += 64;
/* if PCM is chosen, we need to store reconstructed frame data */
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc[0], FENC_STRIDE, 16 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, 8 );
h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, 8 );
h->stat.frame.i_itex_bits += bs_pos(s) - i_mb_pos_tex;
return;
}
else if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
#endif
/* Write:
- type
- prediction
- mv */
if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
{
int di = i_mb_type == I_8x8 ? 4 : 1;
bs_write_ue( s, i_mb_i_offset + 0 );
......
......@@ -1841,10 +1841,11 @@ void x264_encoder_close ( x264_t *h )
const int64_t *i_mb_count = h->stat.i_mb_count[SLICE_TYPE_I];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_I] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
"mb I I16..4: %4.1f%% %4.1f%% %4.1f%%\n",
"mb I I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%%\n",
i_mb_count[I_16x16]/ i_count,
i_mb_count[I_8x8] / i_count,
i_mb_count[I_4x4] / i_count );
i_mb_count[I_4x4] / i_count,
i_mb_count[I_PCM] / i_count );
}
if( h->stat.i_slice_count[SLICE_TYPE_P] > 0 )
{
......@@ -1852,10 +1853,11 @@ void x264_encoder_close ( x264_t *h )
const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_P];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_P] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
"mb P I16..4: %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n",
"mb P I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% P16..4: %4.1f%% %4.1f%% %4.1f%% %4.1f%% %4.1f%% skip:%4.1f%%\n",
i_mb_count[I_16x16]/ i_count,
i_mb_count[I_8x8] / i_count,
i_mb_count[I_4x4] / i_count,
i_mb_count[I_PCM] / i_count,
i_mb_size[PIXEL_16x16] / (i_count*4),
(i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4),
i_mb_size[PIXEL_8x8] / (i_count*4),
......@@ -1869,10 +1871,11 @@ void x264_encoder_close ( x264_t *h )
const int64_t *i_mb_size = h->stat.i_mb_count_size[SLICE_TYPE_B];
const double i_count = h->stat.i_slice_count[SLICE_TYPE_B] * h->mb.i_mb_count / 100.0;
x264_log( h, X264_LOG_INFO,
"mb B I16..4: %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n",
"mb B I16..4..PCM: %4.1f%% %4.1f%% %4.1f%% %4.1f%% B16..8: %4.1f%% %4.1f%% %4.1f%% direct:%4.1f%% skip:%4.1f%%\n",
i_mb_count[I_16x16] / i_count,
i_mb_count[I_8x8] / i_count,
i_mb_count[I_4x4] / i_count,
i_mb_count[I_PCM] / i_count,
i_mb_size[PIXEL_16x16] / (i_count*4),
(i_mb_size[PIXEL_16x8] + i_mb_size[PIXEL_8x16]) / (i_count*4),
i_mb_size[PIXEL_8x8] / (i_count*4),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment