Commit 1c7caa53 authored by Fiona Glaser's avatar Fiona Glaser

MBAFF: Improve handling of bottom row mod32 padding

Force skip on any MBs entirely outside the frame
If an mb pair in the bottom row is chosen to be progressive, re-pad the bottom rows progressively.
parent 52b3d803
......@@ -464,6 +464,22 @@ void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame )
}
}
void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y )
{
for( int i = 0; i < h->fenc->i_plane; i++ )
{
int stride = h->fenc->i_stride[i];
int height = h->param.i_height >> !!i;
int pady = (h->mb.i_mb_height * 16 - h->param.i_height) >> !!i;
int mbsize = (16>>!!i);
pixel *fenc = h->fenc->plane[i] + mbsize * mb_x;
for( int y = height; y < height + pady; y++ )
memcpy( fenc + y*stride,
fenc + (height-1)*stride,
mbsize * sizeof(pixel) );
}
}
/* threading */
void x264_frame_cond_broadcast( x264_frame_t *frame, int i_lines_completed )
{
......
......@@ -195,6 +195,7 @@ void x264_frame_expand_border( x264_t *h, x264_frame_t *frame, int mb_y
void x264_frame_expand_border_filtered( x264_t *h, x264_frame_t *frame, int mb_y, int b_end );
void x264_frame_expand_border_lowres( x264_frame_t *frame );
void x264_frame_expand_border_mod16( x264_t *h, x264_frame_t *frame );
void x264_expand_border_mbpair( x264_t *h, int mb_x, int mb_y );
void x264_frame_deblock_row( x264_t *h, int mb_y );
void x264_macroblock_deblock( x264_t *h );
......
......@@ -641,22 +641,27 @@ float x264_pixel_ssim_wxh( x264_pixel_function_t *pf,
return ssim;
}
int pixel_vsad( pixel *src, int stride )
int pixel_vsad( pixel *src, int stride, int height )
{
int score = 0;
for( int i = 1; i < 16; i++, src += stride )
for( int i = 1; i < height; i++, src += stride )
for( int j = 0; j < 16; j++ )
score += abs(src[j] - src[j+stride]);
return score;
}
int x264_field_vsad( x264_t *h, pixel *fenc, int stride )
int x264_field_vsad( x264_t *h, int mb_x, int mb_y )
{
int score_field, score_frame;
score_frame = h->pixf.vsad( fenc, stride );
score_frame += h->pixf.vsad( fenc+16*stride, stride );
score_field = h->pixf.vsad( fenc, stride*2 );
score_field += h->pixf.vsad( fenc+stride, stride*2 );
int stride = h->fenc->i_stride[0];
pixel *fenc = h->fenc->plane[0] + 16 * (mb_x + mb_y * stride);
/* We don't want to analyze pixels outside the frame, as it gives inaccurate results. */
int mbpair_height = X264_MIN( h->param.i_height - mb_y * 16, 32 );
score_frame = h->pixf.vsad( fenc, stride, mbpair_height );
score_field = h->pixf.vsad( fenc, stride*2, mbpair_height >> 1 );
score_field += h->pixf.vsad( fenc+stride, stride*2, mbpair_height >> 1 );
return (score_field < score_frame);
}
......
......@@ -82,7 +82,7 @@ typedef struct
x264_pixel_cmp_x3_t fpelcmp_x3[7];
x264_pixel_cmp_x4_t fpelcmp_x4[7];
x264_pixel_cmp_t sad_aligned[7]; /* Aligned SAD for mbcmp */
int (*vsad)( pixel *, int );
int (*vsad)( pixel *, int, int );
int (*var2_8x8)( pixel *, int, pixel *, int, int * );
uint64_t (*var[4])( pixel *pix, int stride );
......@@ -126,6 +126,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
void x264_pixel_ssd_nv12( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, uint64_t *ssd_u, uint64_t *ssd_v );
uint64_t x264_pixel_ssd_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height );
float x264_pixel_ssim_wxh( x264_pixel_function_t *pf, pixel *pix1, int i_pix1, pixel *pix2, int i_pix2, int i_width, int i_height, void *buf );
int x264_field_vsad( x264_t *h, pixel *fenc, int stride );
int x264_field_vsad( x264_t *h, int mb_x, int mb_y );
#endif
......@@ -138,8 +138,8 @@ float x264_pixel_ssim_end4_avx( int sum0[5][4], int sum1[5][4], int width );
int x264_pixel_var2_8x8_mmxext( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_sse2( pixel *, int, pixel *, int, int * );
int x264_pixel_var2_8x8_ssse3( uint8_t *, int, uint8_t *, int, int * );
int x264_pixel_vsad_mmxext( pixel *src, int stride );
int x264_pixel_vsad_sse2( pixel *src, int stride );
int x264_pixel_vsad_mmxext( pixel *src, int stride, int height );
int x264_pixel_vsad_sse2( pixel *src, int stride, int height );
#define DECL_ADS( size, suffix ) \
int x264_pixel_ads##size##_##suffix( int enc_dc[size], uint16_t *sums, int delta,\
......
......@@ -278,54 +278,60 @@ cglobal pixel_sad_8x16_sse2, 4,4
%ifndef ARCH_X86_64
INIT_MMX
cglobal pixel_vsad_mmxext, 2,3
mova m0, [r0+0]
cglobal pixel_vsad_mmxext, 3,3
mova m0, [r0]
mova m1, [r0+8]
mova m2, [r0+r1+0]
mova m2, [r0+r1]
mova m3, [r0+r1+8]
lea r0, [r0+r1*2]
psadbw m0, m2
psadbw m1, m3
mov r2d, 7
paddw m0, m1
sub r2d, 2
je .end
.loop:
mova m4, [r0+0]
mova m4, [r0]
mova m5, [r0+8]
mova m6, [r0+r1]
mova m7, [r0+r1+8]
lea r0, [r0+r1*2]
psadbw m2, m4
psadbw m3, m5
psadbw m4, m6
psadbw m5, m7
paddw m0, m2
paddw m1, m3
mova m2, [r0+r1+0]
mova m3, [r0+r1+8]
lea r0, [r0+r1*2]
psadbw m4, m2
psadbw m5, m3
paddw m0, m3
paddw m0, m4
paddw m1, m5
dec r2d
paddw m0, m5
mova m2, m6
mova m3, m7
sub r2d, 2
jg .loop
paddw m0, m1
.end:
movd eax, m0
RET
%endif
INIT_XMM
cglobal pixel_vsad_sse2, 2,2
mova m1, [r0]
%assign i 1
%rep 15
mova m2, [r0+r1*(i&1)]
%if i&1
cglobal pixel_vsad_sse2, 3,3
mova m0, [r0]
mova m1, [r0+r1]
lea r0, [r0+r1*2]
psadbw m0, m1
sub r2d, 2
je .end
.loop:
mova m2, [r0]
mova m3, [r0+r1]
lea r0, [r0+r1*2]
%endif
psadbw m1, m2
%if i>1
psadbw m2, m3
paddw m0, m1
%else
SWAP 0, 1
%endif
SWAP 1, 2
%assign i i+1
%endrep
paddw m0, m2
mova m1, m3
sub r2d, 2
jg .loop
.end:
movhlps m1, m0
paddw m0, m1
movd eax, m0
......
......@@ -2857,10 +2857,14 @@ intra_analysis:
}
else
{
int skip_invalid = h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1];
/* If the current macroblock is off the frame, just skip it. */
if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height && !skip_invalid )
b_skip = 1;
/* Fast P_SKIP detection */
if( h->param.analyse.b_fast_pskip )
else if( h->param.analyse.b_fast_pskip )
{
if( h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1] )
if( skip_invalid )
// FIXME don't need to check this if the reference frame is done
{}
else if( h->param.analyse.i_subpel_refine >= 3 )
......@@ -3187,7 +3191,10 @@ intra_analysis:
{
if( !h->mb.b_direct_auto_write )
x264_mb_mc( h );
if( analysis.i_mbrd )
/* If the current macroblock is off the frame, just skip it. */
if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height )
b_skip = 1;
else if( analysis.i_mbrd )
{
i_bskip_cost = ssd_mb( h );
/* 6 = minimum cavlc cost of a non-skipped MB */
......
......@@ -2063,10 +2063,10 @@ static int x264_slice_write( x264_t *h )
if( !(i_mb_y&1) )
{
/* FIXME: VSAD is fast but fairly poor at choosing the best interlace type. */
int stride = h->fenc->i_stride[0];
pixel *fenc = h->fenc->plane[0] + 16 * (i_mb_x + i_mb_y * stride);
h->mb.b_interlaced = x264_field_vsad( h, fenc, stride );
h->mb.b_interlaced = x264_field_vsad( h, i_mb_x, i_mb_y );
memcpy( &h->zigzagf, MB_INTERLACED ? &h->zigzagf_interlaced : &h->zigzagf_progressive, sizeof(h->zigzagf) );
if( !MB_INTERLACED && (i_mb_y+2) == h->mb.i_mb_height )
x264_expand_border_mbpair( h, i_mb_x, i_mb_y );
}
}
h->mb.field[mb_xy] = MB_INTERLACED;
......
......@@ -420,15 +420,19 @@ static int check_pixel( int cpu_ref, int cpu_new )
ok = 1; used_asm = 0;
if( pixel_asm.vsad != pixel_ref.vsad )
{
int res_c, res_asm;
set_func_name( "vsad" );
used_asm = 1;
res_c = call_c( pixel_c.vsad, pbuf1, 16 );
res_asm = call_a( pixel_asm.vsad, pbuf1, 16 );
if( res_c != res_asm )
for( int h = 2; h <= 32; h += 2 )
{
ok = 0;
fprintf( stderr, "vsad: %d != %d\n", res_c, res_asm );
int res_c, res_asm;
set_func_name( "vsad" );
used_asm = 1;
res_c = call_c( pixel_c.vsad, pbuf1, 16, h );
res_asm = call_a( pixel_asm.vsad, pbuf1, 16, h );
if( res_c != res_asm )
{
ok = 0;
fprintf( stderr, "vsad: height=%d, %d != %d\n", h, res_c, res_asm );
break;
}
}
}
report( "pixel vsad :" );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment