Commit a9e86d24 authored by Fiona Glaser's avatar Fiona Glaser
Browse files

Replace High 4:4:4 profile lossless with High 4:4:4 Predictive.

This improves lossless compression by about 4-25% depending on source.
The benefit is generally higher for intra-only compression.
Also add support for 8x8dct and i8x8 blocks in lossless mode; this improves compression very slightly.
In some rare cases 8x8dct can hurt compression in lossless mode, but its usually helpful, albeit marginally.
Note that 8x8dct is only available with CABAC as it is never useful with CAVLC.
High 4:4:4 Predictive replaced the previous profile in a 2007 revision to the H.264 standard.
The only known compliant decoder for this profile is the latest version of CoreAVC.
As I write this, JM does not actually correctly decode this profile.
Hopefully this lack of support will soon change with this commit, as x264 will be (to my knowledge) the first compliant encoder.
parent adccf49a
......@@ -476,6 +476,8 @@ struct x264_t
/* pointer over mb of the frame to be compressed */
uint8_t *p_fenc[3];
/* pointer to the actual source frame, not a block copy */
uint8_t *p_fenc_plane[3];
/* pointer over mb of the frame to be reconstructed */
uint8_t *p_fdec[3];
......
......@@ -460,45 +460,62 @@ void x264_dct_init_weights( void )
// gcc pessimizes multi-dimensional arrays here, even with constant indices
#define ZIG(i,y,x) level[i] = dct[0][x*8+y];
#define ZIGZAG8_FRAME\
ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)\
ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)\
ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)\
ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)\
ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)\
ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)\
ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)\
ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)\
ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)\
ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)\
ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)\
ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)\
ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)\
ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)\
#define ZIGZAG8_FIELD\
ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)\
ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)\
ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)\
ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)\
ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)\
ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)\
ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)\
ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)\
ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)\
ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)\
ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)\
ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)\
ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)\
ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)\
ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)\
ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
#define ZIGZAG4_FRAME\
ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)\
ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)\
ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)\
ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
#define ZIGZAG4_FIELD\
ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)\
ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)\
ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)\
ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
static void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
{
ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
ZIGZAG8_FRAME
}
static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
{
ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,2,0) ZIG( 3,0,1)
ZIG( 4,1,1) ZIG( 5,3,0) ZIG( 6,4,0) ZIG( 7,2,1)
ZIG( 8,0,2) ZIG( 9,3,1) ZIG(10,5,0) ZIG(11,6,0)
ZIG(12,7,0) ZIG(13,4,1) ZIG(14,1,2) ZIG(15,0,3)
ZIG(16,2,2) ZIG(17,5,1) ZIG(18,6,1) ZIG(19,7,1)
ZIG(20,3,2) ZIG(21,1,3) ZIG(22,0,4) ZIG(23,2,3)
ZIG(24,4,2) ZIG(25,5,2) ZIG(26,6,2) ZIG(27,7,2)
ZIG(28,3,3) ZIG(29,1,4) ZIG(30,0,5) ZIG(31,2,4)
ZIG(32,4,3) ZIG(33,5,3) ZIG(34,6,3) ZIG(35,7,3)
ZIG(36,3,4) ZIG(37,1,5) ZIG(38,0,6) ZIG(39,2,5)
ZIG(40,4,4) ZIG(41,5,4) ZIG(42,6,4) ZIG(43,7,4)
ZIG(44,3,5) ZIG(45,1,6) ZIG(46,2,6) ZIG(47,4,5)
ZIG(48,5,5) ZIG(49,6,5) ZIG(50,7,5) ZIG(51,3,6)
ZIG(52,0,7) ZIG(53,1,7) ZIG(54,4,6) ZIG(55,5,6)
ZIG(56,6,6) ZIG(57,7,6) ZIG(58,2,7) ZIG(59,3,7)
ZIG(60,4,7) ZIG(61,5,7) ZIG(62,6,7) ZIG(63,7,7)
ZIGZAG8_FIELD
}
#undef ZIG
......@@ -506,10 +523,7 @@ static void zigzag_scan_8x8_field( int16_t level[64], int16_t dct[8][8] )
static void zigzag_scan_4x4_frame( int16_t level[16], int16_t dct[4][4] )
{
ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
ZIGZAG4_FRAME
}
static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
......@@ -531,26 +545,40 @@ static void zigzag_scan_4x4_field( int16_t level[16], int16_t dct[4][4] )
*(uint32_t*)(p_dst+0*FDEC_STRIDE) = *(uint32_t*)(p_src+0*FENC_STRIDE);\
*(uint32_t*)(p_dst+1*FDEC_STRIDE) = *(uint32_t*)(p_src+1*FENC_STRIDE);\
*(uint32_t*)(p_dst+2*FDEC_STRIDE) = *(uint32_t*)(p_src+2*FENC_STRIDE);\
*(uint32_t*)(p_dst+3*FDEC_STRIDE) = *(uint32_t*)(p_src+3*FENC_STRIDE);\
*(uint32_t*)(p_dst+3*FDEC_STRIDE) = *(uint32_t*)(p_src+3*FENC_STRIDE);
#define COPY8x8\
*(uint64_t*)(p_dst+0*FDEC_STRIDE) = *(uint64_t*)(p_src+0*FENC_STRIDE);\
*(uint64_t*)(p_dst+1*FDEC_STRIDE) = *(uint64_t*)(p_src+1*FENC_STRIDE);\
*(uint64_t*)(p_dst+2*FDEC_STRIDE) = *(uint64_t*)(p_src+2*FENC_STRIDE);\
*(uint64_t*)(p_dst+3*FDEC_STRIDE) = *(uint64_t*)(p_src+3*FENC_STRIDE);\
*(uint64_t*)(p_dst+4*FDEC_STRIDE) = *(uint64_t*)(p_src+4*FENC_STRIDE);\
*(uint64_t*)(p_dst+5*FDEC_STRIDE) = *(uint64_t*)(p_src+5*FENC_STRIDE);\
*(uint64_t*)(p_dst+6*FDEC_STRIDE) = *(uint64_t*)(p_src+6*FENC_STRIDE);\
*(uint64_t*)(p_dst+7*FDEC_STRIDE) = *(uint64_t*)(p_src+7*FENC_STRIDE);
static void zigzag_sub_4x4_frame( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
{
ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
ZIGZAG4_FRAME
COPY4x4
}
static void zigzag_sub_4x4_field( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst )
{
ZIG( 0,0,0) ZIG( 1,1,0) ZIG( 2,0,1) ZIG( 3,2,0)
ZIG( 4,3,0) ZIG( 5,1,1) ZIG( 6,2,1) ZIG( 7,3,1)
ZIG( 8,0,2) ZIG( 9,1,2) ZIG(10,2,2) ZIG(11,3,2)
ZIG(12,0,3) ZIG(13,1,3) ZIG(14,2,3) ZIG(15,3,3)
ZIGZAG4_FIELD
COPY4x4
}
static void zigzag_sub_8x8_frame( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
{
ZIGZAG8_FRAME
COPY8x8
}
static void zigzag_sub_8x8_field( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst )
{
ZIGZAG8_FIELD
COPY8x8
}
#undef ZIG
#undef COPY4x4
......@@ -560,6 +588,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
{
pf->scan_8x8 = zigzag_scan_8x8_field;
pf->scan_4x4 = zigzag_scan_4x4_field;
pf->sub_8x8 = zigzag_sub_8x8_field;
pf->sub_4x4 = zigzag_sub_4x4_field;
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMXEXT )
......@@ -575,6 +604,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
{
pf->scan_8x8 = zigzag_scan_8x8_frame;
pf->scan_4x4 = zigzag_scan_4x4_frame;
pf->sub_8x8 = zigzag_sub_8x8_frame;
pf->sub_4x4 = zigzag_sub_4x4_frame;
#ifdef HAVE_MMX
if( cpu&X264_CPU_MMX )
......
......@@ -118,6 +118,7 @@ typedef struct
{
void (*scan_8x8)( int16_t level[64], int16_t dct[8][8] );
void (*scan_4x4)( int16_t level[16], int16_t dct[4][4] );
void (*sub_8x8)( int16_t level[64], const uint8_t *p_src, uint8_t *p_dst );
void (*sub_4x4)( int16_t level[16], const uint8_t *p_src, uint8_t *p_dst );
} x264_zigzag_function_t;
......
......@@ -945,8 +945,9 @@ static void ALWAYS_INLINE x264_macroblock_load_pic_pointers( x264_t *h, int i_mb
if( h->mb.b_interlaced )
ref_pix_offset[1] += (1-2*(i_mb_y&1)) * i_stride;
h->mb.pic.i_stride[i] = i_stride2;
h->mb.pic.p_fenc_plane[i] = &h->fenc->plane[i][i_pix_offset];
h->mc.copy[i?PIXEL_8x8:PIXEL_16x16]( h->mb.pic.p_fenc[i], FENC_STRIDE,
&h->fenc->plane[i][i_pix_offset], i_stride2, w );
h->mb.pic.p_fenc_plane[i], i_stride2, w );
memcpy( &h->mb.pic.p_fdec[i][-1-FDEC_STRIDE], intra_fdec-1, w*3/2+1 );
if( h->mb.b_interlaced )
{
......
......@@ -339,6 +339,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
pf->avg[PIXEL_2x4] = pixel_avg_2x4;
pf->avg[PIXEL_2x2] = pixel_avg_2x2;
pf->copy_16x16_unaligned = mc_copy_w16;
pf->copy[PIXEL_16x16] = mc_copy_w16;
pf->copy[PIXEL_8x8] = mc_copy_w8;
pf->copy[PIXEL_4x4] = mc_copy_w4;
......
......@@ -49,6 +49,7 @@ typedef struct
/* only 16x16, 8x8, and 4x4 defined */
void (*copy[7])( uint8_t *dst, int, uint8_t *src, int, int i_height );
void (*copy_16x16_unaligned)( uint8_t *dst, int, uint8_t *src, int, int i_height );
void (*plane_copy)( uint8_t *dst, int i_dst,
uint8_t *src, int i_src, int w, int h);
......
......@@ -28,11 +28,12 @@ enum profile_e
{
PROFILE_BASELINE = 66,
PROFILE_MAIN = 77,
PROFILE_EXTENTED = 88,
PROFILE_EXTENDED = 88,
PROFILE_HIGH = 100,
PROFILE_HIGH10 = 110,
PROFILE_HIGH422 = 122,
PROFILE_HIGH444 = 144
PROFILE_HIGH444 = 144,
PROFILE_HIGH444_PREDICTIVE = 244,
};
enum cqm4_e
......
......@@ -231,6 +231,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
if( !(cpu&X264_CPU_MMX) )
return;
pf->copy_16x16_unaligned = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
......
......@@ -569,8 +569,13 @@ static void x264_mb_analyse_intra_chroma( x264_t *h, x264_mb_analysis_t *a )
int i_mode = predict_mode[i];
/* we do the prediction */
h->predict_8x8c[i_mode]( p_dstc[0] );
h->predict_8x8c[i_mode]( p_dstc[1] );
if( h->mb.b_lossless )
x264_predict_lossless_8x8_chroma( h, i_mode );
else
{
h->predict_8x8c[i_mode]( p_dstc[0] );
h->predict_8x8c[i_mode]( p_dstc[1] );
}
/* we calculate the cost */
i_satd = h->pixf.mbcmp[PIXEL_8x8]( p_dstc[0], FDEC_STRIDE,
......@@ -596,7 +601,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
int i, idx;
int i_max;
int predict_mode[9];
int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16;
int b_merged_satd = !!h->pixf.intra_mbcmp_x3_16x16 && !h->mb.b_lossless;
/*---------------- Try all mode and calculate their score ---------------*/
......@@ -621,7 +626,11 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
{
int i_satd;
int i_mode = predict_mode[i];
h->predict_16x16[i_mode]( p_dst );
if( h->mb.b_lossless )
x264_predict_lossless_16x16( h, i_mode );
else
h->predict_16x16[i_mode]( p_dst );
i_satd = h->pixf.mbcmp[PIXEL_16x16]( p_dst, FDEC_STRIDE, p_src, FENC_STRIDE ) +
a->i_lambda * bs_size_ue( x264_mb_pred_mode16x16_fix[i_mode] );
......@@ -681,7 +690,10 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
int i_satd;
int i_mode = predict_mode[i];
h->predict_8x8[i_mode]( p_dst_by, edge );
if( h->mb.b_lossless )
x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
else
h->predict_8x8[i_mode]( p_dst_by, edge );
i_satd = sa8d( p_dst_by, FDEC_STRIDE, p_src_by, FENC_STRIDE )
+ a->i_lambda * (i_pred_mode == x264_mb_pred_mode4x4_fix(i_mode) ? 1 : 4);
......@@ -763,8 +775,10 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
{
int i_satd;
int i_mode = predict_mode[i];
h->predict_4x4[i_mode]( p_dst_by );
if( h->mb.b_lossless )
x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
else
h->predict_4x4[i_mode]( p_dst_by );
i_satd = h->pixf.mbcmp[PIXEL_4x4]( p_dst_by, FDEC_STRIDE,
p_src_by, FENC_STRIDE )
......@@ -876,7 +890,10 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
for( i = 0; i < i_max; i++ )
{
i_mode = predict_mode[i];
h->predict_4x4[i_mode]( p_dst_by );
if( h->mb.b_lossless )
x264_predict_lossless_4x4( h, p_dst_by, idx, i_mode );
else
h->predict_4x4[i_mode]( p_dst_by );
i_satd = x264_rd_cost_i4x4( h, a->i_lambda2, idx, i_mode );
if( i_best > i_satd )
......@@ -928,7 +945,10 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
i_mode = predict_mode[i];
if( a->i_satd_i8x8_dir[i_mode][idx] > i_thresh )
continue;
h->predict_8x8[i_mode]( p_dst_by, edge );
if( h->mb.b_lossless )
x264_predict_lossless_8x8( h, p_dst_by, idx, i_mode, edge );
else
h->predict_8x8[i_mode]( p_dst_by, edge );
i_satd = x264_rd_cost_i8x8( h, a->i_lambda2, idx, i_mode );
if( i_best > i_satd )
......@@ -980,8 +1000,13 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
for( i = 0; i < i_max; i++ )
{
i_mode = predict_mode[i];
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
if( h->mb.b_lossless )
x264_predict_lossless_8x8_chroma( h, i_mode );
else
{
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
}
/* if we've already found a mode that needs no residual, then
* probably any mode with a residual will be worse.
* so avoid dct on the remaining modes to improve speed. */
......@@ -2035,7 +2060,7 @@ static void refine_bidir( x264_t *h, x264_mb_analysis_t *a )
static inline void x264_mb_analyse_transform( x264_t *h )
{
if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 )
if( x264_mb_transform_8x8_allowed( h ) && h->param.analyse.b_transform_8x8 && !h->mb.b_lossless )
{
int i_cost4, i_cost8;
/* Only luma MC is really needed, but the full MC is re-used in macroblock_encode. */
......
......@@ -403,7 +403,6 @@ static int x264_validate_parameters( x264_t *h )
h->param.rc.i_rc_method = X264_RC_CQP;
h->param.rc.f_ip_factor = 1;
h->param.rc.f_pb_factor = 1;
h->param.analyse.b_transform_8x8 = 0;
h->param.analyse.b_psnr = 0;
h->param.analyse.b_ssim = 0;
h->param.analyse.i_chroma_qp_offset = 0;
......@@ -411,6 +410,9 @@ static int x264_validate_parameters( x264_t *h )
h->param.analyse.b_fast_pskip = 0;
h->param.analyse.i_noise_reduction = 0;
h->param.analyse.f_psy_rd = 0;
/* 8x8dct is not useful at all in CAVLC lossless */
if( !h->param.b_cabac )
h->param.analyse.b_transform_8x8 = 0;
}
if( h->param.rc.i_rc_method == X264_RC_CQP )
{
......
......@@ -133,6 +133,12 @@ void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qp )
uint8_t *p_dst = &h->mb.pic.p_fdec[0][x+y*FDEC_STRIDE];
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
if( h->mb.b_lossless )
{
h->zigzagf.sub_8x8( h->dct.luma8x8[idx], p_src, p_dst );
return;
}
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
x264_quant_8x8( h, dct8x8, i_qp, 1, idx );
......@@ -322,6 +328,74 @@ static void x264_macroblock_encode_pskip( x264_t *h )
x264_macroblock_encode_skip( h );
}
/*****************************************************************************
* Intra prediction for predictive lossless mode.
*****************************************************************************/
/* Note that these functions take a shortcut (mc.copy instead of actual pixel prediction) which assumes
* that the edge pixels of the reconstructed frame are the same as that of the source frame. This means
* they will only work correctly if the neighboring blocks are losslessly coded. In practice, this means
* lossless mode cannot be mixed with lossy mode within a frame. */
/* This can be resolved by explicitly copying the edge pixels after doing the mc.copy, but this doesn't
* need to be done unless we decide to allow mixing lossless and lossy compression. */
void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode )
{
int stride = h->fenc->i_stride[1] << h->mb.b_interlaced;
if( i_mode == I_PRED_CHROMA_V )
{
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-stride, stride, 8 );
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-stride, stride, 8 );
}
else if( i_mode == I_PRED_CHROMA_H )
{
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc_plane[1]-1, stride, 8 );
h->mc.copy[PIXEL_8x8]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc_plane[2]-1, stride, 8 );
}
else
{
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
}
}
void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode )
{
int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + block_idx_x[idx]*4 + block_idx_y[idx]*4 * stride;
if( i_mode == I_PRED_4x4_V )
h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-stride, stride, 4 );
else if( i_mode == I_PRED_4x4_H )
h->mc.copy[PIXEL_4x4]( p_dst, FDEC_STRIDE, p_src-1, stride, 4 );
else
h->predict_4x4[i_mode]( p_dst );
}
void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] )
{
int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
uint8_t *p_src = h->mb.pic.p_fenc_plane[0] + (idx&1)*8 + (idx>>1)*8*stride;
if( i_mode == I_PRED_8x8_V )
h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-stride, stride, 8 );
else if( i_mode == I_PRED_8x8_H )
h->mc.copy[PIXEL_8x8]( p_dst, FDEC_STRIDE, p_src-1, stride, 8 );
else
h->predict_8x8[i_mode]( p_dst, edge );
}
void x264_predict_lossless_16x16( x264_t *h, int i_mode )
{
int stride = h->fenc->i_stride[0] << h->mb.b_interlaced;
if( i_mode == I_PRED_16x16_V )
h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-stride, stride, 16 );
else if( i_mode == I_PRED_16x16_H )
h->mc.copy_16x16_unaligned( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.p_fenc_plane[0]-1, stride, 16 );
else
h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
}
/*****************************************************************************
* x264_macroblock_encode:
*****************************************************************************/
......@@ -370,8 +444,11 @@ void x264_macroblock_encode( x264_t *h )
{
const int i_mode = h->mb.i_intra16x16_pred_mode;
h->mb.b_transform_8x8 = 0;
/* do the right prediction */
h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
if( h->mb.b_lossless )
x264_predict_lossless_16x16( h, i_mode );
else
h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0] );
/* encode the 16x16 macroblock */
x264_mb_encode_i16x16( h, i_qp );
......@@ -392,9 +469,13 @@ void x264_macroblock_encode( x264_t *h )
{
uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * FDEC_STRIDE];
int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
x264_predict_8x8_filter( p_dst, edge, h->mb.i_neighbour8[i], x264_pred_i4x4_neighbors[i_mode] );
h->predict_8x8[i_mode]( p_dst, edge );
if( h->mb.b_lossless )
x264_predict_lossless_8x8( h, p_dst, i, i_mode, edge );
else
h->predict_8x8[i_mode]( p_dst, edge );
x264_mb_encode_i8x8( h, i, i_qp );
}
for( i = 0; i < 4; i++ )
......@@ -420,7 +501,10 @@ void x264_macroblock_encode( x264_t *h )
/* emulate missing topright samples */
*(uint32_t*) &p_dst[4-FDEC_STRIDE] = p_dst[3-FDEC_STRIDE] * 0x01010101U;
h->predict_4x4[i_mode]( p_dst );
if( h->mb.b_lossless )
x264_predict_lossless_4x4( h, p_dst, i, i_mode );
else
h->predict_4x4[i_mode]( p_dst );
x264_mb_encode_i4x4( h, i, i_qp );
}
}
......@@ -435,12 +519,23 @@ void x264_macroblock_encode( x264_t *h )
if( h->mb.b_lossless )
{
for( i4x4 = 0; i4x4 < 16; i4x4++ )
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
}
if( h->mb.b_transform_8x8 )
for( i8x8 = 0; i8x8 < 4; i8x8++ )
{
int x = 8*(i8x8&1);
int y = 8*(i8x8>>1);
h->zigzagf.sub_8x8( h->dct.luma8x8[i8x8],
h->mb.pic.p_fenc[0]+x+y*FENC_STRIDE,
h->mb.pic.p_fdec[0]+x+y*FDEC_STRIDE );
nnz8x8[i8x8] = array_non_zero( h->dct.luma8x8[i8x8] );
}
else
for( i4x4 = 0; i4x4 < 16; i4x4++ )
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i4x4],
h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4x4],
h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4x4] );
}
}
else if( h->mb.b_transform_8x8 )
{
......@@ -531,8 +626,13 @@ void x264_macroblock_encode( x264_t *h )
if( IS_INTRA( h->mb.i_type ) )
{
const int i_mode = h->mb.i_chroma_pred_mode;
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
if( h->mb.b_lossless )
x264_predict_lossless_8x8_chroma( h, i_mode );
else
{
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1] );
h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2] );
}
}
/* encode the 8x8 blocks */
......@@ -756,12 +856,20 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
if( h->mb.b_lossless )
{
int i4;
for( i4 = i8*4; i4 < i8*4+4; i4++ )
if( h->mb.b_transform_8x8 )
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i8], p_fenc, p_fdec );
nnz8x8 = array_non_zero( h->dct.luma8x8[i8] );
}
else
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i4],
h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4],
h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4] );
nnz8x8 |= array_non_zero( h->dct.luma4x4[i4] );
for( i4 = i8*4; i4 < i8*4+4; i4++ )
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i4],
h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4],
h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4] );
nnz8x8 |= array_non_zero( h->dct.luma4x4[i4] );
}
}
for( ch = 0; ch < 2; ch++ )
{
......
......@@ -38,6 +38,11 @@ static inline int x264_macroblock_probe_pskip( x264_t *h )
static inline int x264_macroblock_probe_bskip( x264_t *h )
{ return x264_macroblock_probe_skip( h, 1 ); }
void x264_predict_lossless_8x8_chroma( x264_t *h, int i_mode );
void x264_predict_lossless_4x4( x264_t *h, uint8_t *p_dst, int idx, int i_mode );
void x264_predict_lossless_8x8( x264_t *h, uint8_t *p_dst, int idx, int i_mode, uint8_t edge[33] );
void x264_predict_lossless_16x16( x264_t *h, int i_mode );