Commit 8ade5036 authored by Fiona Glaser's avatar Fiona Glaser

Fix SSIM calculation with sliced threads

parent 03bf7da6
......@@ -830,6 +830,7 @@ struct x264_t
/* Metrics */
int64_t i_ssd[3];
double f_ssim;
int i_ssim_cnt;
} frame;
/* Cumulated stats */
......
......@@ -619,7 +619,7 @@ static float ssim_end4( int sum0[5][4], int sum1[5][4], int width )
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
pixel *pix1, int stride1,
pixel *pix2, int stride2,
int width, int height, void *buf )
int width, int height, void *buf, int *cnt )
{
int z = 0;
float ssim = 0.0;
......@@ -638,6 +638,7 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
for( int x = 0; x < width-1; x += 4 )
ssim += pf->ssim_end4( sum0+x, sum1+x, X264_MIN(4,width-x-1) );
}
*cnt = (height-1) * (width-1);
return ssim;
}
......
......@@ -125,7 +125,7 @@ typedef struct
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height );
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf );
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf, int *cnt );
int x264_field_vsad( x264_t *h, int mb_x, int mb_y );
#endif
......@@ -1819,6 +1819,7 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
if( h->param.analyse.b_ssim )
{
int ssim_cnt;
x264_emms();
/* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks,
* and overlap by 4 */
......@@ -1827,7 +1828,8 @@ static void x264_fdec_filter_row( x264_t *h, int mb_y, int b_inloop )
x264_pixel_ssim_wxh( &h->pixf,
h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],
h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],
h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer );
h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer, &ssim_cnt );
h->stat.frame.i_ssim_cnt += ssim_cnt;
}
}
}
......@@ -2446,6 +2448,7 @@ static int x264_threaded_slices_write( x264_t *h )
for( int j = 0; j < 3; j++ )
h->stat.frame.i_ssd[j] += t->stat.frame.i_ssd[j];
h->stat.frame.f_ssim += t->stat.frame.f_ssim;
h->stat.frame.i_ssim_cnt += t->stat.frame.i_ssim_cnt;
}
return 0;
......@@ -3126,7 +3129,7 @@ static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current,
if( h->param.analyse.b_ssim )
{
double ssim_y = h->stat.frame.f_ssim
/ (((h->param.i_width-6)>>2) * ((h->param.i_height-6)>>2));
/ h->stat.frame.i_ssim_cnt;
h->stat.f_ssim_mean_y[h->sh.i_type] += ssim_y * dur;
snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message),
" SSIM Y:%.5f", ssim_y );
......
......@@ -488,12 +488,13 @@ static int check_pixel( int cpu_ref, int cpu_new )
if( pixel_asm.ssim_4x4x2_core != pixel_ref.ssim_4x4x2_core ||
pixel_asm.ssim_end4 != pixel_ref.ssim_end4 )
{
int cnt;
float res_c, res_a;
ALIGNED_16( int sums[5][4] ) = {{0}};
used_asm = ok = 1;
x264_emms();
res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 );
res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3 );
res_c = x264_pixel_ssim_wxh( &pixel_c, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt );
res_a = x264_pixel_ssim_wxh( &pixel_asm, pbuf1+2, 32, pbuf2+2, 32, 32, 28, pbuf3, &cnt );
if( fabs( res_c - res_a ) > 1e-6 )
{
ok = 0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment