Commit c9d2c1c8 authored by Henrik Gramner's avatar Henrik Gramner Committed by Anton Mitrofanov
Browse files

analyse: Reduce the size the cost_mv arrays

Use a dynamic size depending on the MV range. Reduces memory consumption by
up to a few megabytes.

Drop a related old miscompilation check since it may otherwise cause an
out-of-bounds memory access.

Also remove an unused extern variable declaration.
parent d46a5a46
......@@ -264,29 +264,31 @@ static uint16_t x264_cost_i4x4_mode[(QP_MAX+2)*32];
static int init_costs( x264_t *h, float *logs, int qp )
{
int lambda = x264_lambda_tab[qp];
if( h->cost_mv[qp] )
return 0;
int mv_range = h->param.analyse.i_mv_range;
int lambda = x264_lambda_tab[qp];
/* factor of 4 from qpel, 2 from sign, and 2 because mv can be opposite from mvp */
CHECKED_MALLOC( h->cost_mv[qp], (4*4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv[qp] += 2*4*2048;
for( int i = 0; i <= 2*4*2048; i++ )
CHECKED_MALLOC( h->cost_mv[qp], (4*4*mv_range + 1) * sizeof(uint16_t) );
h->cost_mv[qp] += 2*4*mv_range;
for( int i = 0; i <= 2*4*mv_range; i++ )
{
h->cost_mv[qp][-i] =
h->cost_mv[qp][i] = X264_MIN( lambda * logs[i] + .5f, (1<<16)-1 );
h->cost_mv[qp][i] = X264_MIN( (int)(lambda * logs[i] + .5f), UINT16_MAX );
}
x264_pthread_mutex_lock( &cost_ref_mutex );
for( int i = 0; i < 3; i++ )
for( int j = 0; j < 33; j++ )
x264_cost_ref[qp][i][j] = X264_MIN( i ? lambda * bs_size_te( i, j ) : 0, (1<<16)-1 );
x264_cost_ref[qp][i][j] = i ? X264_MIN( lambda * bs_size_te( i, j ), UINT16_MAX ) : 0;
x264_pthread_mutex_unlock( &cost_ref_mutex );
if( h->param.analyse.i_me_method >= X264_ME_ESA && !h->cost_mv_fpel[qp][0] )
{
for( int j = 0; j < 4; j++ )
{
CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*2048 + 1) * sizeof(uint16_t) );
h->cost_mv_fpel[qp][j] += 2*2048;
for( int i = -2*2048; i < 2*2048; i++ )
CHECKED_MALLOC( h->cost_mv_fpel[qp][j], (4*mv_range + 1) * sizeof(uint16_t) );
h->cost_mv_fpel[qp][j] += 2*mv_range;
for( int i = -2*mv_range; i < 2*mv_range; i++ )
h->cost_mv_fpel[qp][j][i] = h->cost_mv[qp][i*4+j];
}
}
......@@ -300,12 +302,13 @@ fail:
int x264_analyse_init_costs( x264_t *h )
{
float *logs = x264_malloc( (2*4*2048+1) * sizeof(float) );
int mv_range = h->param.analyse.i_mv_range;
float *logs = x264_malloc( (2*4*mv_range+1) * sizeof(float) );
if( !logs )
return -1;
logs[0] = 0.718f;
for( int i = 1; i <= 2*4*2048; i++ )
for( int i = 1; i <= 2*4*mv_range; i++ )
logs[i] = log2f( i+1 ) * 2.0f + 1.718f;
for( int qp = X264_MIN( h->param.rc.i_qp_min, QP_MAX_SPEC ); qp <= h->param.rc.i_qp_max; qp++ )
......@@ -324,13 +327,14 @@ fail:
void x264_analyse_free_costs( x264_t *h )
{
int mv_range = h->param.analyse.i_mv_range;
for( int i = 0; i < QP_MAX+1; i++ )
{
if( h->cost_mv[i] )
x264_free( h->cost_mv[i] - 2*4*2048 );
x264_free( h->cost_mv[i] - 2*4*mv_range );
if( h->cost_mv_fpel[i][0] )
for( int j = 0; j < 4; j++ )
x264_free( h->cost_mv_fpel[i][j] - 2*2048 );
x264_free( h->cost_mv_fpel[i][j] - 2*mv_range );
}
}
......
......@@ -1593,14 +1593,6 @@ x264_t *x264_encoder_open( x264_param_t *param )
if( x264_analyse_init_costs( h ) )
goto fail;
static const uint16_t cost_mv_correct[7] = { 24, 47, 95, 189, 379, 757, 1515 };
/* Checks for known miscompilation issues. */
if( h->cost_mv[X264_LOOKAHEAD_QP][2013] != cost_mv_correct[BIT_DEPTH-8] )
{
x264_log( h, X264_LOG_ERROR, "MV cost test failed: x264 has been miscompiled!\n" );
goto fail;
}
/* Must be volatile or else GCC will optimize it out. */
volatile int temp = 392;
if( x264_clz( temp ) != 23 )
......
......@@ -66,8 +66,6 @@ void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_wei
void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
extern uint16_t *x264_cost_mv_fpel[QP_MAX+1][4];
#define COPY1_IF_LT(x,y)\
if( (y) < (x) )\
(x) = (y);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment