Commit fe5f0a47 authored by Fiona Glaser's avatar Fiona Glaser

Faster qpel-RD

3-4% faster qpel-RD; avoid re-checking bmv/pmv during the hex search.
parent d17e81e2
......@@ -942,18 +942,23 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
}
#undef COST_MV_SATD
#define COST_MV_SATD( mx, my, dst ) \
#define COST_MV_SATD( mx, my, dst, avoid_mvp ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4 ); \
dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[mx] + p_cost_mvy[my]; \
COPY1_IF_LT( bsatd, dst ); \
if( !avoid_pmv || !(mx == pmx && my == pmy) ) \
{ \
int stride = 16; \
uint8_t *src = h->mc.get_ref( pix, &stride, m->p_fref, m->i_stride[0], mx, my, bw*4, bh*4 ); \
dst = h->pixf.mbcmp_unaligned[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \
+ p_cost_mvx[mx] + p_cost_mvy[my]; \
COPY1_IF_LT( bsatd, dst ); \
} \
else \
dst = COST_MAX; \
}
#define COST_MV_RD( mx, my, satd, do_dir, mdir ) \
{ \
if( satd <= bsatd * SATD_THRESH )\
if( satd <= bsatd * SATD_THRESH ) \
{ \
uint64_t cost; \
*(uint32_t*)cache_mv = *(uint32_t*)cache_mv2 = pack16to32_mask(mx,my); \
......@@ -991,7 +996,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
pmy = m->mvp[1];
p_cost_mvx = m->p_cost_mv - pmx;
p_cost_mvy = m->p_cost_mv - pmy;
COST_MV_SATD( bmx, bmy, bsatd );
COST_MV_SATD( bmx, bmy, bsatd, 0 );
COST_MV_RD( bmx, bmy, 0, 0, 0 );
/* check the predicted mv */
......@@ -999,16 +1004,24 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
&& pmx >= h->mb.mv_min_spel[0] && pmx <= h->mb.mv_max_spel[0]
&& pmy >= h->mb.mv_min_spel[1] && pmy <= h->mb.mv_max_spel[1] )
{
COST_MV_SATD( pmx, pmy, satd );
COST_MV_SATD( pmx, pmy, satd, 0 );
COST_MV_RD( pmx, pmy, satd, 0,0 );
/* The hex motion search is guaranteed to not repeat the center candidate,
* so if pmv is chosen, set the "MV to avoid checking" to bmv instead. */
if( bmx == pmx && bmy == pmy )
{
pmx = m->mv[0];
pmy = m->mv[1];
}
}
/* subpel hex search, same pattern as ME HEX. */
dir = -2;
omx = bmx;
omy = bmy;
for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j] );
for( j=0; j<6; j++ ) COST_MV_SATD( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1 );
for( j=0; j<6; j++ ) COST_MV_RD ( omx + hex2[j+1][0], omy + hex2[j+1][1], satds[j], 1,j );
if( dir != -2 )
{
/* half hexagon, not overlapping the previous iteration */
......@@ -1021,7 +1034,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
dir = -2;
omx = bmx;
omy = bmy;
for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j] );
for( j=0; j<3; j++ ) COST_MV_SATD( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1 );
for( j=0; j<3; j++ ) COST_MV_RD ( omx + hex2[odir+j][0], omy + hex2[odir+j][1], satds[j], 1, odir-1+j );
if( dir == -2 )
break;
......@@ -1031,7 +1044,7 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
/* square refine, same as pattern as ME HEX. */
omx = bmx;
omy = bmy;
for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy + square1[i][1], satds[i] );
for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i][0], omy + square1[i][1], satds[i], 1 );
for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i][0], omy + square1[i][1], satds[i], 0,0 );
bmy = x264_clip3( bmy, h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
......@@ -1039,6 +1052,6 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i8, int
m->mv[0] = bmx;
m->mv[1] = bmy;
x264_macroblock_cache_mv ( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx, bmy) );
x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx - pmx, bmy - pmy) );
x264_macroblock_cache_mvd( h, 2*(i8&1), i8&2, bw, bh, i_list, pack16to32_mask(bmx - m->mvp[0], bmy - m->mvp[1]) );
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment