Commit 8245feb2 authored by Oskar Arvidsson's avatar Oskar Arvidsson Committed by Fiona Glaser

Fix possible overflow in sub4x4_dct in 10-bit builds

parent 4c9fe3fb
......@@ -77,6 +77,14 @@ cextern pd_32
SWAP %1, %2, %3
%endmacro
%macro DCT_UNPACK 3
punpcklwd %3, %1
punpckhwd %2, %1
psrad %3, 16
psrad %2, 16
SWAP %1, %3
%endmacro
%ifdef HIGH_BIT_DEPTH
INIT_XMM
;-----------------------------------------------------------------------------
......@@ -175,11 +183,24 @@ cglobal sub4x4_dct_mmx, 3,3
LOAD_DIFF m2, m4, none, [r1+4*FENC_STRIDE], [r2+4*FDEC_STRIDE]
DCT4_1D 0,1,2,3,4
TRANSPOSE4x4W 0,1,2,3,4
DCT4_1D 0,1,2,3,4
STORE_DIFF m0, m4, m5, [r0+ 0], [r0+ 8]
STORE_DIFF m1, m4, m5, [r0+16], [r0+24]
STORE_DIFF m2, m4, m5, [r0+32], [r0+40]
STORE_DIFF m3, m4, m5, [r0+48], [r0+56]
SUMSUB_BADC w, m3, m0, m2, m1
SUMSUB_BA w, m2, m3, m4
DCT_UNPACK m2, m4, m5
DCT_UNPACK m3, m6, m7
mova [r0+ 0], m2 ; s03 + s12
mova [r0+ 8], m4
mova [r0+32], m3 ; s03 - s12
mova [r0+40], m6
DCT_UNPACK m0, m2, m4
DCT_UNPACK m1, m3, m5
SUMSUB2_AB d, m0, m1, m4
SUMSUB2_AB d, m2, m3, m5
mova [r0+16], m0 ; d03*2 + d12
mova [r0+24], m2
mova [r0+48], m4 ; d03 - 2*d12
mova [r0+56], m5
RET
%else
......
......@@ -584,16 +584,6 @@
packuswb %2, %1
%endmacro
%ifdef HIGH_BIT_DEPTH
%macro STORE_DIFF 5
punpcklwd %2, %1
punpckhwd %3, %1
psrad %2, 16
psrad %3, 16
mova %4, %2
mova %5, %3
%endmacro
%else
%macro STORE_DIFF 4
movh %2, %4
punpcklbw %2, %3
......@@ -602,7 +592,6 @@
packuswb %1, %1
movh %4, %1
%endmacro
%endif
%macro CLIPW 3 ;(dst, min, max)
pmaxsw %1, %2
......
......@@ -556,17 +556,54 @@ static int check_dct( int cpu_ref, int cpu_new )
x264_cqm_init( h );
x264_quant_init( h, 0, &qf );
/* overflow test cases */
for( int i = 0; i < 5; i++ )
{
pixel *enc = &pbuf3[16*i*FENC_STRIDE];
pixel *dec = &pbuf4[16*i*FDEC_STRIDE];
for( int j = 0; j < 16; j++ )
{
int cond_a = (i < 2) ? 1 : ((j&3) == 0 || (j&3) == (i-1));
int cond_b = (i == 0) ? 1 : !cond_a;
enc[0] = enc[1] = cond_a ? PIXEL_MAX : 0;
enc[2] = enc[3] = cond_b ? PIXEL_MAX : 0;
for( int k = 0; k < 4; k++ )
dec[k] = PIXEL_MAX - enc[k];
enc += FENC_STRIDE;
dec += FDEC_STRIDE;
}
}
#define TEST_DCT( name, t1, t2, size ) \
if( dct_asm.name != dct_ref.name ) \
{ \
set_func_name( #name ); \
used_asm = 1; \
call_c( dct_c.name, t1, pbuf1, pbuf2 ); \
call_a( dct_asm.name, t2, pbuf1, pbuf2 ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
pixel *enc = pbuf3; \
pixel *dec = pbuf4; \
for( int j = 0; j < 5; j++) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
call_c( dct_c.name, t1, &pbuf1[j*64], &pbuf2[j*64] ); \
call_a( dct_asm.name, t2, &pbuf1[j*64], &pbuf2[j*64] ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED]\n" ); \
break; \
} \
call_c( dct_c.name, t1, enc, dec ); \
call_a( dct_asm.name, t2, enc, dec ); \
if( memcmp( t1, t2, size*sizeof(dctcoef) ) ) \
{ \
ok = 0; \
fprintf( stderr, #name " [FAILED] (overflow)\n" ); \
break; \
} \
enc += 16*FENC_STRIDE; \
dec += 16*FDEC_STRIDE; \
} \
}
ok = 1; used_asm = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment