Commit 834eac28 authored by Loren Merritt's avatar Loren Merritt

Add: implicit weighted prediction for B-frames.

Slightly optimize x264_mb_mc_01xywh.
Fix an error in B16x8 cost.


git-svn-id: svn://svn.videolan.org/x264/trunk@134 df754926-b1dd-0310-bc7b-ec298dee348c
parent 47706e75
......@@ -68,4 +68,9 @@ static inline uint8_t x264_mc_clip1( int x )
return x264_mc_clip1_table[x+80];
}
static inline uint8_t x264_clip_uint8( int x )
{
return x&(~255) ? (-x)>>31 : x;
}
#endif
......@@ -342,6 +342,9 @@ struct x264_t
int i_last_qp; /* last qp */
int i_last_dqp; /* last delta qp */
/* B_direct and weighted prediction */
int dist_scale_factor[16][16];
int bipred_weight[16][16];
} mb;
/* rate control encoding only */
......
......@@ -373,19 +373,8 @@ static int x264_mb_predict_mv_direct16x16_temporal( x264_t *h )
}
else
{
int poc0 = h->fref0[i_ref]->i_poc;
int poc1 = h->fref1[0]->i_poc;
int td = x264_clip3( poc1 - poc0, -128, 127 );
int dist_scale_factor;
const int dist_scale_factor = h->mb.dist_scale_factor[i_ref][0];
int x4, y4;
if( td == 0 /* || pic0 is a long-term ref */ )
dist_scale_factor = 256;
else
{
int tb = x264_clip3( h->fdec->i_poc - poc0, -128, 127 );
int tx = (16384 + (abs(td) >> 1)) / td;
dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
}
x264_macroblock_cache_ref( h, x8, y8, 2, 2, 0, i_ref );
......@@ -623,15 +612,34 @@ static inline void x264_mb_mc_01xywh( x264_t *h, int x, int y, int width, int he
h->mc.mc_luma( h->mb.pic.p_fref[1][i_ref1], h->mb.pic.i_stride[0],
tmp, 16, mvx1 + 4*4*x, mvy1 + 4*4*y, 4*width, 4*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
if( h->param.analyse.b_weighted_bipred )
{
const int i_ref0 = h->mb.cache.ref[0][i8];
const int weight = h->mb.bipred_weight[i_ref0][i_ref1];
h->pixf.avg_weight[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16, weight );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16, weight );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg_weight[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16, weight );
}
else
{
h->pixf.avg[i_mode]( &h->mb.pic.p_fdec[0][4*y *h->mb.pic.i_stride[0]+4*x], h->mb.pic.i_stride[0], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][4][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[1][2*y*h->mb.pic.i_stride[1]+2*x], h->mb.pic.i_stride[1], tmp, 16 );
h->mc.mc_chroma( &h->mb.pic.p_fref[1][i_ref1][5][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2],
tmp, 16, mvx1, mvy1, 2*width, 2*height );
h->pixf.avg[i_mode+3]( &h->mb.pic.p_fdec[2][2*y*h->mb.pic.i_stride[2]+2*x], h->mb.pic.i_stride[2], tmp, 16 );
}
}
static void x264_mb_mc_direct8x8( x264_t *h, int x, int y )
......@@ -1372,3 +1380,35 @@ void x264_macroblock_cache_save( x264_t *h )
}
}
}
void x264_macroblock_bipred_init( x264_t *h )
{
int i_ref0, i_ref1;
for( i_ref0 = 0; i_ref0 < h->i_ref0; i_ref0++ )
{
int poc0 = h->fref0[i_ref0]->i_poc;
for( i_ref1 = 0; i_ref1 < h->i_ref1; i_ref1++ )
{
int dist_scale_factor;
int poc1 = h->fref1[i_ref1]->i_poc;
int td = x264_clip3( poc1 - poc0, -128, 127 );
if( td == 0 /* || pic0 is a long-term ref */ )
dist_scale_factor = 256;
else
{
int tb = x264_clip3( h->fdec->i_poc - poc0, -128, 127 );
int tx = (16384 + (abs(td) >> 1)) / td;
dist_scale_factor = x264_clip3( (tb * tx + 32) >> 6, -1024, 1023 );
}
h->mb.dist_scale_factor[i_ref0][i_ref1] = dist_scale_factor;
dist_scale_factor >>= 2;
if( h->param.analyse.b_weighted_bipred
&& dist_scale_factor >= -64
&& dist_scale_factor <= 128 )
h->mb.bipred_weight[i_ref0][i_ref1] = 64 - dist_scale_factor;
else
h->mb.bipred_weight[i_ref0][i_ref1] = 32;
}
}
}
......@@ -154,6 +154,8 @@ void x264_macroblock_cache_load( x264_t *h, int i_mb_x, int i_mb_y );
void x264_macroblock_cache_save( x264_t *h );
void x264_macroblock_cache_end( x264_t *h );
void x264_macroblock_bipred_init( x264_t *h );
void x264_mb_dequant_4x4_dc( int16_t dct[4][4], int i_qscale );
void x264_mb_dequant_2x2_dc( int16_t dct[2][2], int i_qscale );
void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale );
......
......@@ -32,6 +32,7 @@
#include "x264.h"
#include "pixel.h"
#include "clip1.h"
#ifdef HAVE_MMXEXT
# include "i386/pixel.h"
......@@ -174,6 +175,60 @@ PIXEL_AVG_C( pixel_avg_8x8, 8, 8 )
PIXEL_AVG_C( pixel_avg_8x4, 8, 4 )
PIXEL_AVG_C( pixel_avg_4x8, 4, 8 )
PIXEL_AVG_C( pixel_avg_4x4, 4, 4 )
PIXEL_AVG_C( pixel_avg_4x2, 4, 2 )
PIXEL_AVG_C( pixel_avg_2x4, 2, 4 )
PIXEL_AVG_C( pixel_avg_2x2, 2, 2 )
/* Implicit weighted bipred only:
* assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
#define op_scale2(x) dst[x] = x264_clip_uint8( (dst[x]*i_weight1 + src[x]*i_weight2 + (1<<5)) >> 6 )
static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height, int i_weight1 ){
int y;
const int i_weight2 = 64 - i_weight1;
for(y=0; y<height; y++, dst += i_dst, src += i_src){
op_scale2(0);
op_scale2(1);
if(width==2) continue;
op_scale2(2);
op_scale2(3);
if(width==4) continue;
op_scale2(4);
op_scale2(5);
op_scale2(6);
op_scale2(7);
if(width==8) continue;
op_scale2(8);
op_scale2(9);
op_scale2(10);
op_scale2(11);
op_scale2(12);
op_scale2(13);
op_scale2(14);
op_scale2(15);
}
}
#define PIXEL_AVG_WEIGHT_C( width, height ) \
static void pixel_avg_weight_##width##x##height( \
uint8_t *pix1, int i_stride_pix1, \
uint8_t *pix2, int i_stride_pix2, int i_weight1 ) \
{ \
pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height, i_weight1 ); \
}
PIXEL_AVG_WEIGHT_C(16,16)
PIXEL_AVG_WEIGHT_C(16,8)
PIXEL_AVG_WEIGHT_C(8,16)
PIXEL_AVG_WEIGHT_C(8,8)
PIXEL_AVG_WEIGHT_C(8,4)
PIXEL_AVG_WEIGHT_C(4,8)
PIXEL_AVG_WEIGHT_C(4,4)
PIXEL_AVG_WEIGHT_C(4,2)
PIXEL_AVG_WEIGHT_C(2,4)
PIXEL_AVG_WEIGHT_C(2,2)
#undef op_scale2
#undef PIXEL_AVG_WEIGHT_C
/****************************************************************************
* x264_pixel_init:
......@@ -203,6 +258,21 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->avg[PIXEL_8x4] = pixel_avg_8x4;
pixf->avg[PIXEL_4x8] = pixel_avg_4x8;
pixf->avg[PIXEL_4x4] = pixel_avg_4x4;
pixf->avg[PIXEL_4x2] = pixel_avg_4x2;
pixf->avg[PIXEL_2x4] = pixel_avg_2x4;
pixf->avg[PIXEL_2x2] = pixel_avg_2x2;
pixf->avg_weight[PIXEL_16x16]= pixel_avg_weight_16x16;
pixf->avg_weight[PIXEL_16x8] = pixel_avg_weight_16x8;
pixf->avg_weight[PIXEL_8x16] = pixel_avg_weight_8x16;
pixf->avg_weight[PIXEL_8x8] = pixel_avg_weight_8x8;
pixf->avg_weight[PIXEL_8x4] = pixel_avg_weight_8x4;
pixf->avg_weight[PIXEL_4x8] = pixel_avg_weight_4x8;
pixf->avg_weight[PIXEL_4x4] = pixel_avg_weight_4x4;
pixf->avg_weight[PIXEL_4x2] = pixel_avg_weight_4x2;
pixf->avg_weight[PIXEL_2x4] = pixel_avg_weight_2x4;
pixf->avg_weight[PIXEL_2x2] = pixel_avg_weight_2x2;
#ifdef HAVE_MMXEXT
if( cpu&X264_CPU_MMXEXT )
{
......
......@@ -27,6 +27,7 @@
typedef int (*x264_pixel_sad_t) ( uint8_t *, int, uint8_t *, int );
typedef int (*x264_pixel_satd_t)( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_t) ( uint8_t *, int, uint8_t *, int );
typedef void (*x264_pixel_avg_weight_t) ( uint8_t *, int, uint8_t *, int, int );
enum
{
......@@ -37,6 +38,9 @@ enum
PIXEL_8x4 = 4,
PIXEL_4x8 = 5,
PIXEL_4x4 = 6,
PIXEL_4x2 = 7,
PIXEL_2x4 = 8,
PIXEL_2x2 = 9,
};
static const struct {
......@@ -62,7 +66,8 @@ typedef struct
{
x264_pixel_sad_t sad[7];
x264_pixel_satd_t satd[7];
x264_pixel_avg_t avg[7];
x264_pixel_avg_t avg[10];
x264_pixel_avg_weight_t avg_weight[10];
} x264_pixel_function_t;
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
......
......@@ -816,6 +816,15 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a )
a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT];
}
#define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \
{ \
if( h->param.analyse.b_weighted_bipred ) \
h->pixf.avg_weight[size]( pix1, stride1, src2, stride2, \
h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref] ); \
else \
h->pixf.avg[size]( pix1, stride1, src2, stride2 ); \
}
static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
{
uint8_t pix1[16*16], pix2[16*16];
......@@ -917,7 +926,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a )
16, 16 );
}
h->pixf.avg[PIXEL_16x16]( pix1, 16, src2, stride2 );
WEIGHTED_AVG( PIXEL_16x16, pix1, 16, src2, stride2 );
a->i_cost16x16bi = h->pixf.satd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], pix1, 16 )
+ a->i_lambda * ( bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, a->l0.i_ref )
......@@ -1041,7 +1050,7 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a )
/* FIXME: ref cost */
}
h->pixf.avg[PIXEL_8x8]( pix[0], 8, pix[1], 8 );
WEIGHTED_AVG( PIXEL_8x8, pix[0], 8, pix[1], 8 );
i_part_cost_bi += h->pixf.satd[PIXEL_8x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 )
+ a->i_lambda * i_sub_mb_b_cost_table[D_BI_8x8];
a->l0.me8x8[i].cost += a->i_lambda * i_sub_mb_b_cost_table[D_L0_8x8];
......@@ -1089,7 +1098,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
h->mb.pic.p_fref[1][a->l1.i_ref][3]
} };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t pix[2][8*8];
uint8_t pix[2][16*8];
int i_ref_stride = h->mb.pic.i_stride[0];
int mvc[2][2];
int i, l;
......@@ -1125,14 +1134,14 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a )
x264_me_search( h, m, mvc, 2 );
/* BI mode */
h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
m->mv[0], m->mv[1], 8, 8 );
h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 16,
m->mv[0], m->mv[1], 16, 8 );
/* FIXME: ref cost */
i_part_cost_bi += m->cost_mv;
}
h->pixf.avg[PIXEL_16x8]( pix[0], 8, pix[1], 8 );
i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
WEIGHTED_AVG( PIXEL_16x8, pix[0], 16, pix[1], 16 );
i_part_cost_bi += h->pixf.satd[PIXEL_16x8]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 16 );
i_part_cost = a->l0.me16x8[i].cost;
a->i_mb_partition16x8[i] = D_L0_8x8; /* not actually 8x8, only the L0 matters */
......@@ -1173,7 +1182,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
h->mb.pic.p_fref[1][a->l1.i_ref][3]
} };
uint8_t *p_fenc = h->mb.pic.p_fenc[0];
uint8_t pix[2][8*8];
uint8_t pix[2][8*16];
int i_ref_stride = h->mb.pic.i_stride[0];
int mvc[2][2];
int i, l;
......@@ -1209,12 +1218,12 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a )
/* BI mode */
h->mc.mc_luma( m->p_fref, m->i_stride, pix[l], 8,
m->mv[0], m->mv[1], 8, 8 );
m->mv[0], m->mv[1], 8, 16 );
/* FIXME: ref cost */
i_part_cost_bi += m->cost_mv;
}
h->pixf.avg[PIXEL_8x16]( pix[0], 8, pix[1], 8 );
WEIGHTED_AVG( PIXEL_8x16, pix[0], 8, pix[1], 8 );
i_part_cost_bi += h->pixf.satd[PIXEL_8x16]( p_fenc_i, h->mb.pic.i_stride[0], pix[0], 8 );
i_part_cost = a->l0.me8x16[i].cost;
......
......@@ -1083,6 +1083,9 @@ do_encode:
/* build ref list 0/1 */
x264_reference_build_list( h, h->fdec->i_poc );
if( i_slice_type == SLICE_TYPE_B )
x264_macroblock_bipred_init( h );
/* increase frame num but only once for B frame */
if( i_slice_type != SLICE_TYPE_B || h->sh.i_type != SLICE_TYPE_B )
{
......
......@@ -292,7 +292,7 @@ void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *
pps->i_num_ref_idx_l1_active = 1;
pps->b_weighted_pred = 0;
pps->b_weighted_bipred = 0;
pps->b_weighted_bipred = param->analyse.b_weighted_bipred ? 2 : 0;
pps->i_pic_init_qp = 26;
pps->i_pic_init_qs = 26;
......
......@@ -137,6 +137,7 @@ static void Help( x264_param_t *defaults )
" - none, all\n"
" --direct <string> Direct MV prediction mode [\"temporal\"]\n"
" - none, spatial, temporal\n"
" -w, --weightb Weighted prediction for B-frames\n"
" -m, --subme <integer> Subpixel motion estimation quality: 1=fast, 5=best. [%d]\n"
"\n"
" --level <integer> Specify IDC level\n"
......@@ -239,6 +240,7 @@ static int Parse( int argc, char **argv,
{ "output", required_argument, NULL, 'o' },
{ "analyse", required_argument, NULL, 'A' },
{ "direct", required_argument, NULL, OPT_DIRECT },
{ "weightb", no_argument, NULL, 'w' },
{ "subme", required_argument, NULL, 'm' },
{ "level", required_argument, NULL, OPT_LEVEL },
{ "rcsens", required_argument, NULL, OPT_RCSENS },
......@@ -260,7 +262,7 @@ static int Parse( int argc, char **argv,
int c;
c = getopt_long( argc, argv, "hi:I:b:r:cxB:q:nf:o:s:A:m:p:v",
c = getopt_long( argc, argv, "hi:I:b:r:cxB:q:nf:o:s:A:m:p:vw",
long_options, &long_options_index);
if( c == -1 )
......@@ -385,6 +387,9 @@ static int Parse( int argc, char **argv,
else
param->analyse.i_direct_mv_pred = atoi( optarg );
break;
case 'w':
param->analyse.b_weighted_bipred = 1;
break;
case 'm':
param->analyse.i_subpel_refine = atoi(optarg);
break;
......
......@@ -26,7 +26,7 @@
#include <stdarg.h>
#define X264_BUILD 0x0011
#define X264_BUILD 0x0012
/* x264_t:
* opaque handler for decoder and encoder */
......@@ -134,11 +134,11 @@ typedef struct
unsigned int inter; /* inter flags */
int i_direct_mv_pred; /* spatial vs temporal mv prediction */
int i_subpel_refine; /* subpixel motion estimation quality */
int i_mv_range; /* maximum length of a mv (in pixels) */
int b_weighted_bipred; /* implicit weighting for B-frames */
int b_psnr; /* Do we compute PSNR stats (save a few % of cpu) */
} analyse;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment