Commit 1eb8b071 authored by Fiona Glaser's avatar Fiona Glaser

Improve subme7 at low QPs and add subme7 support in lossless mode

parent 01d7deaf
...@@ -782,7 +782,8 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) ...@@ -782,7 +782,8 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
uint8_t *p_dst = h->mb.pic.p_fdec[0]; uint8_t *p_dst = h->mb.pic.p_fdec[0];
int i, j, idx, x, y; int i, j, idx, x, y;
int i_max, i_satd, i_best, i_mode, i_thresh; int i_max, i_mode, i_thresh;
uint64_t i_satd, i_best;
int i_pred_mode; int i_pred_mode;
int predict_mode[9]; int predict_mode[9];
h->mb.i_skip_intra = 0; h->mb.i_skip_intra = 0;
...@@ -810,7 +811,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) ...@@ -810,7 +811,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
for( idx = 0; idx < 16; idx++ ) for( idx = 0; idx < 16; idx++ )
{ {
uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx]; uint8_t *p_dst_by = p_dst + block_idx_xy_fdec[idx];
i_best = COST_MAX; i_best = COST_MAX64;
i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx ); i_pred_mode = x264_mb_predict_intra4x4_mode( h, idx );
...@@ -860,7 +861,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) ...@@ -860,7 +861,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a )
int j; int j;
i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8; i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8;
i_best = COST_MAX; i_best = COST_MAX64;
i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx ); i_pred_mode = x264_mb_predict_intra4x4_mode( h, 4*idx );
x = idx&1; x = idx&1;
y = idx>>1; y = idx>>1;
......
...@@ -410,7 +410,6 @@ static int x264_validate_parameters( x264_t *h ) ...@@ -410,7 +410,6 @@ static int x264_validate_parameters( x264_t *h )
h->param.analyse.i_trellis = 0; h->param.analyse.i_trellis = 0;
h->param.analyse.b_fast_pskip = 0; h->param.analyse.b_fast_pskip = 0;
h->param.analyse.i_noise_reduction = 0; h->param.analyse.i_noise_reduction = 0;
h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 6 );
} }
if( h->param.rc.i_rc_method == X264_RC_CQP ) if( h->param.rc.i_rc_method == X264_RC_CQP )
{ {
......
...@@ -747,11 +747,31 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) ...@@ -747,11 +747,31 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE; uint8_t *p_fenc = h->mb.pic.p_fenc[0] + (i8&1)*8 + (i8>>1)*8*FENC_STRIDE;
uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE; uint8_t *p_fdec = h->mb.pic.p_fdec[0] + (i8&1)*8 + (i8>>1)*8*FDEC_STRIDE;
int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate; int b_decimate = h->sh.i_type == SLICE_TYPE_B || h->param.analyse.b_dct_decimate;
int nnz8x8; int nnz8x8 = 0;
int ch; int ch;
x264_mb_mc_8x8( h, i8 ); x264_mb_mc_8x8( h, i8 );
if( h->mb.b_lossless )
{
int i4;
for( i4 = i8*4; i4 < i8*4+4; i4++ )
{
h->zigzagf.sub_4x4( h->dct.luma4x4[i4],
h->mb.pic.p_fenc[0]+block_idx_xy_fenc[i4],
h->mb.pic.p_fdec[0]+block_idx_xy_fdec[i4] );
nnz8x8 |= array_non_zero( h->dct.luma4x4[i4] );
}
for( ch = 0; ch < 2; ch++ )
{
p_fenc = h->mb.pic.p_fenc[1+ch] + (i8&1)*4 + (i8>>1)*4*FENC_STRIDE;
p_fdec = h->mb.pic.p_fdec[1+ch] + (i8&1)*4 + (i8>>1)*4*FDEC_STRIDE;
h->zigzagf.sub_4x4( h->dct.luma4x4[16+i8+ch*4], p_fenc, p_fdec );
h->dct.luma4x4[16+i8+ch*4][0] = 0;
}
}
else
{
if( h->mb.b_transform_8x8 ) if( h->mb.b_transform_8x8 )
{ {
DECLARE_ALIGNED_16( int16_t dct8x8[8][8] ); DECLARE_ALIGNED_16( int16_t dct8x8[8][8] );
...@@ -816,7 +836,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 ) ...@@ -816,7 +836,7 @@ void x264_macroblock_encode_p8x8( x264_t *h, int i8 )
h->dctf.add4x4_idct( p_fdec, dct4x4 ); h->dctf.add4x4_idct( p_fdec, dct4x4 );
} }
} }
}
h->mb.i_cbp_luma &= ~(1 << i8); h->mb.i_cbp_luma &= ~(1 << i8);
h->mb.i_cbp_luma |= nnz8x8 << i8; h->mb.i_cbp_luma |= nnz8x8 << i8;
h->mb.i_cbp_chroma = 0x02; h->mb.i_cbp_chroma = 0x02;
......
...@@ -913,7 +913,7 @@ int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ...@@ -913,7 +913,7 @@ int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight
{ \ { \
if( satd <= bsatd * SATD_THRESH )\ if( satd <= bsatd * SATD_THRESH )\
{ \ { \
int cost; \ uint64_t cost; \
*(uint32_t*)cache_mv = *(uint32_t*)cache_mv2 = pack16to32_mask(mx,my); \ *(uint32_t*)cache_mv = *(uint32_t*)cache_mv2 = pack16to32_mask(mx,my); \
cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \ cost = x264_rd_cost_part( h, i_lambda2, i8, m->i_pixel ); \
COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \ COPY4_IF_LT( bcost, cost, bmx, mx, bmy, my, dir, do_dir?mdir:dir ); \
...@@ -934,7 +934,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ) ...@@ -934,7 +934,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 )
const int i_pixel = m->i_pixel; const int i_pixel = m->i_pixel;
DECLARE_ALIGNED_16( uint8_t pix[16*16] ); DECLARE_ALIGNED_16( uint8_t pix[16*16] );
int bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX; uint64_t bcost = m->i_pixel == PIXEL_16x16 ? m->cost : COST_MAX64;
int bmx = m->mv[0]; int bmx = m->mv[0];
int bmy = m->mv[1]; int bmy = m->mv[1];
int omx = bmx; int omx = bmx;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#define X264_ME_H #define X264_ME_H
#define COST_MAX (1<<28) #define COST_MAX (1<<28)
#define COST_MAX64 (1ULL<<60)
typedef struct typedef struct
{ {
...@@ -54,7 +55,7 @@ static inline void x264_me_search( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], i ...@@ -54,7 +55,7 @@ static inline void x264_me_search( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], i
void x264_me_refine_qpel( x264_t *h, x264_me_t *m ); void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 ); void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8 );
int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ); int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ); uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
extern uint16_t *x264_cost_mv_fpel[52][4]; extern uint16_t *x264_cost_mv_fpel[52][4];
......
...@@ -101,9 +101,11 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 ) ...@@ -101,9 +101,11 @@ static int x264_rd_cost_mb( x264_t *h, int i_lambda2 )
return i_ssd + i_bits; return i_ssd + i_bits;
} }
int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ) /* subpartition RD functions use 8 bits more precision to avoid large rounding errors at low QPs */
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel )
{ {
int i_ssd, i_bits; uint64_t i_ssd, i_bits;
if( i_pixel == PIXEL_16x16 ) if( i_pixel == PIXEL_16x16 )
{ {
...@@ -128,19 +130,19 @@ int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel ) ...@@ -128,19 +130,19 @@ int x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel )
x264_cabac_t cabac_tmp; x264_cabac_t cabac_tmp;
COPY_CABAC; COPY_CABAC;
x264_partition_size_cabac( h, &cabac_tmp, i8, i_pixel ); x264_partition_size_cabac( h, &cabac_tmp, i8, i_pixel );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16; i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
} }
else else
{ {
i_bits = ( x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2 + 128 ) >> 8; i_bits = x264_partition_size_cavlc( h, i8, i_pixel ) * i_lambda2;
} }
return i_ssd + i_bits; return (i_ssd<<8) + i_bits;
} }
int x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode ) uint64_t x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode )
{ {
int i_ssd, i_bits; uint64_t i_ssd, i_bits;
x264_mb_encode_i8x8( h, i8, h->mb.i_qp ); x264_mb_encode_i8x8( h, i8, h->mb.i_qp );
i_ssd = ssd_plane( h, PIXEL_8x8, 0, (i8&1)*8, (i8>>1)*8 ); i_ssd = ssd_plane( h, PIXEL_8x8, 0, (i8&1)*8, (i8>>1)*8 );
...@@ -150,19 +152,19 @@ int x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode ) ...@@ -150,19 +152,19 @@ int x264_rd_cost_i8x8( x264_t *h, int i_lambda2, int i8, int i_mode )
x264_cabac_t cabac_tmp; x264_cabac_t cabac_tmp;
COPY_CABAC; COPY_CABAC;
x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode ); x264_partition_i8x8_size_cabac( h, &cabac_tmp, i8, i_mode );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16; i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
} }
else else
{ {
i_bits = ( x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2 + 128 ) >> 8; i_bits = x264_partition_i8x8_size_cavlc( h, i8, i_mode ) * i_lambda2;
} }
return i_ssd + i_bits; return (i_ssd<<8) + i_bits;
} }
int x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode ) uint64_t x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
{ {
int i_ssd, i_bits; uint64_t i_ssd, i_bits;
x264_mb_encode_i4x4( h, i4, h->mb.i_qp ); x264_mb_encode_i4x4( h, i4, h->mb.i_qp );
i_ssd = ssd_plane( h, PIXEL_4x4, 0, block_idx_x[i4]*4, block_idx_y[i4]*4 ); i_ssd = ssd_plane( h, PIXEL_4x4, 0, block_idx_x[i4]*4, block_idx_y[i4]*4 );
...@@ -172,19 +174,19 @@ int x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode ) ...@@ -172,19 +174,19 @@ int x264_rd_cost_i4x4( x264_t *h, int i_lambda2, int i4, int i_mode )
x264_cabac_t cabac_tmp; x264_cabac_t cabac_tmp;
COPY_CABAC; COPY_CABAC;
x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode ); x264_partition_i4x4_size_cabac( h, &cabac_tmp, i4, i_mode );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16; i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
} }
else else
{ {
i_bits = ( x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2 + 128 ) >> 8; i_bits = x264_partition_i4x4_size_cavlc( h, i4, i_mode ) * i_lambda2;
} }
return i_ssd + i_bits; return (i_ssd<<8) + i_bits;
} }
int x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct ) uint64_t x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct )
{ {
int i_ssd, i_bits; uint64_t i_ssd, i_bits;
if( b_dct ) if( b_dct )
x264_mb_encode_8x8_chroma( h, 0, h->mb.i_chroma_qp ); x264_mb_encode_8x8_chroma( h, 0, h->mb.i_chroma_qp );
...@@ -198,14 +200,14 @@ int x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct ) ...@@ -198,14 +200,14 @@ int x264_rd_cost_i8x8_chroma( x264_t *h, int i_lambda2, int i_mode, int b_dct )
x264_cabac_t cabac_tmp; x264_cabac_t cabac_tmp;
COPY_CABAC; COPY_CABAC;
x264_i8x8_chroma_size_cabac( h, &cabac_tmp ); x264_i8x8_chroma_size_cabac( h, &cabac_tmp );
i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 32768 ) >> 16; i_bits = ( (uint64_t)cabac_tmp.f8_bits_encoded * i_lambda2 + 128 ) >> 8;
} }
else else
{ {
i_bits = ( x264_i8x8_chroma_size_cavlc( h ) * i_lambda2 + 128 ) >> 8; i_bits = x264_i8x8_chroma_size_cavlc( h ) * i_lambda2;
} }
return i_ssd + i_bits; return (i_ssd<<8) + i_bits;
} }
/**************************************************************************** /****************************************************************************
* Trellis RD quantization * Trellis RD quantization
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment