Commit fa3b8139 authored by Fiona Glaser's avatar Fiona Glaser

Fix a nondeterminism with threads and subme>7

Also add a few more checks to eliminate the need for spel_border.
parent 4304c427
......@@ -309,8 +309,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
int i_fmv_range = 4 * h->param.analyse.i_mv_range;
// limit motion search to a slightly smaller range than the theoretical limit,
// since the search may go a few iterations past its given range
int i_fpel_border = 5; // umh unconditional radius
int i_spel_border = 8; // 1.5 for subpel_satd, 1.5 for subpel_rd, 2 for bime, round up
int i_fpel_border = 6; // umh: 1 for diamond, 2 for octagon, 2 for hpel
/* Calculate max allowed MV range */
#define CLIP_FMV(mv) x264_clip3( mv, -i_fmv_range, i_fmv_range-1 )
......@@ -348,7 +347,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp )
h->mb.mv_min[1] = 4*( -16*mb_y - 24 );
h->mb.mv_max[1] = 4*( 16*( mb_height - mb_y - 1 ) + 24 );
h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], X264_MAX(4*(-512+i_spel_border), -i_fmv_range), i_fmv_range );
h->mb.mv_min_spel[1] = x264_clip3( h->mb.mv_min[1], -i_fmv_range, i_fmv_range );
h->mb.mv_max_spel[1] = CLIP_FMV( h->mb.mv_max[1] );
h->mb.mv_max_spel[1] = X264_MIN( h->mb.mv_max_spel[1], thread_mvy_range*4 );
h->mb.mv_min_fpel[1] = (h->mb.mv_min_spel[1]>>2) + i_fpel_border;
......
......@@ -535,7 +535,7 @@ me_hex2:
}
}
} while( ++i <= i_me_range/4 );
if( bmy <= mv_y_max )
if( bmy <= mv_y_max && bmy >= mv_y_min )
goto me_hex2;
break;
}
......@@ -718,8 +718,6 @@ me_hex2:
int qpel = subpel_iterations[h->mb.i_subpel_refine][3];
refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 );
}
else if( m->mv[1] > h->mb.mv_max_spel[1] )
m->mv[1] = h->mb.mv_max_spel[1];
}
#undef COST_MV
......@@ -790,8 +788,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
/* try the subpel component of the predicted mv */
if( hpel_iters && h->mb.i_subpel_refine < 3 )
{
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0]+2, h->mb.mv_max_spel[0]-2 );
int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1]+2, h->mb.mv_max_spel[1]-2 );
if( (mx-bmx)|(my-bmy) )
COST_MV_SAD( mx, my );
}
......@@ -818,9 +816,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
if( !b_refine_qpel )
{
/* check for mvrange */
if( bmy > h->mb.mv_max_spel[1] )
bmy = h->mb.mv_max_spel[1];
bcost = COST_MAX;
COST_MV_SATD( bmx, bmy, -1 );
}
......@@ -844,6 +839,8 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
bdir = -1;
for( i = qpel_iters; i > 0; i-- )
{
if( bmy <= h->mb.mv_min_spel[1] || bmy >= h->mb.mv_max_spel[1] )
break;
odir = bdir;
omx = bmx;
omy = bmy;
......@@ -855,14 +852,6 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite
break;
}
/* check for mvrange */
if( bmy > h->mb.mv_max_spel[1] )
{
bmy = h->mb.mv_max_spel[1];
bcost = COST_MAX;
COST_MV_SATD( bmx, bmy, -1 );
}
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
......@@ -970,8 +959,8 @@ static void ALWAYS_INLINE x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_m
/* each byte of visited represents 8 possible m1y positions, so a 4D array isn't needed */
DECLARE_ALIGNED_16( uint8_t visited[8][8][8] );
if( bm0y > h->mb.mv_max_spel[1] - 8 ||
bm1y > h->mb.mv_max_spel[1] - 8 )
if( bm0y < h->mb.mv_min_spel[1] + 8 || bm1y < h->mb.mv_min_spel[1] + 8 ||
bm0y > h->mb.mv_max_spel[1] - 8 || bm1y > h->mb.mv_max_spel[1] - 8 )
return;
h->mc.memzero_aligned( visited, sizeof(visited) );
......@@ -1096,6 +1085,10 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
}
}
if( bmy < h->mb.mv_min_spel[1] + 3 ||
bmy > h->mb.mv_max_spel[1] - 3 )
return;
/* subpel hex search, same pattern as ME HEX. */
dir = -2;
omx = bmx;
......@@ -1109,8 +1102,8 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
for( i = 1; i < 10; i++ )
{
const int odir = mod6m1[dir+1];
if( bmy > h->mb.mv_max_spel[1] - 2 ||
bmy < h->mb.mv_min_spel[1] - 2 )
if( bmy < h->mb.mv_min_spel[1] + 3 ||
bmy > h->mb.mv_max_spel[1] - 3 )
break;
dir = -2;
omx = bmx;
......@@ -1128,7 +1121,6 @@ void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int
for( i=0; i<8; i++ ) COST_MV_SATD( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 1 );
for( i=0; i<8; i++ ) COST_MV_RD ( omx + square1[i+1][0], omy + square1[i+1][1], satds[i], 0,0 );
bmy = x264_clip3( bmy, h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] );
m->cost = bcost;
m->mv[0] = bmx;
m->mv[1] = bmy;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment