Commit 2ec69418 authored by Fiona Glaser's avatar Fiona Glaser

Faster predictor checking with subme<3

Fix a typo that made an early-skip less effective.
Avoid a relatively unpredictable branch.
Slightly changed output due to the typo-fix.
~50 cycles faster on Core i7.
parent d026397b
......@@ -250,24 +250,22 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc,
if( i_mvc > 0 )
{
ALIGNED_ARRAY_8( int16_t, mvc_fpel,[16],[2] );
x264_predictor_roundclip( mvc_fpel, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
x264_predictor_roundclip( mvc_fpel+2, mvc, i_mvc, mv_x_min, mv_x_max, mv_y_min, mv_y_max );
M32( mvc_fpel[1] ) = pmv;
bcost <<= 4;
for( int i = 1; i <= i_mvc; i++ )
{
if( M32( mvc_fpel[i-1] ) && (pmv != M32( mvc[i-1] )) )
if( M32( mvc_fpel[i+1] ) && (pmv != M32( mvc_fpel[i+1] )) )
{
int mx = mvc_fpel[i-1][0];
int my = mvc_fpel[i-1][1];
int mx = mvc_fpel[i+1][0];
int my = mvc_fpel[i+1][1];
int cost = h->pixf.fpelcmp[i_pixel]( p_fenc, FENC_STRIDE, &p_fref_w[my*stride+mx], stride ) + BITS_MVD( mx, my );
cost = (cost << 4) + i;
COPY1_IF_LT( bcost, cost );
}
}
if( bcost&15 )
{
bmx = mvc_fpel[(bcost&15)-1][0];
bmy = mvc_fpel[(bcost&15)-1][1];
}
bmx = mvc_fpel[(bcost&15)+1][0];
bmy = mvc_fpel[(bcost&15)+1][1];
bcost >>= 4;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment