Commit e269ca55 authored by Ilia's avatar Ilia Committed by Fiona Glaser
Browse files

More 4:2:2 asm functions

High bit depth version of deblock_h_chroma_422.
Regular and high bit depth versions of deblock_h_chroma_intra_422.
High bit depth pixel_vsad.
SSE2 high bit depth and MMX 8-bit predict_8x8_vl.
Our first GCI patch this year!
parent 5d66c501
......@@ -511,7 +511,9 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->idct4x4dc = x264_idct4x4dc_sse2;
dctf->add8x8_idct = x264_add8x8_idct_sse2;
dctf->add16x16_idct = x264_add16x16_idct_sse2;
dctf->sub8x8_dct_dc = x264_sub8x8_dct_dc_sse2;
dctf->add8x8_idct_dc = x264_add8x8_idct_dc_sse2;
dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_sse2;
dctf->add16x16_idct_dc= x264_add16x16_idct_dc_sse2;
}
if( cpu&X264_CPU_AVX )
......@@ -522,6 +524,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf )
dctf->add8x8_idct = x264_add8x8_idct_avx;
dctf->add16x16_idct = x264_add16x16_idct_avx;
dctf->add8x8_idct_dc = x264_add8x8_idct_dc_avx;
dctf->sub8x16_dct_dc = x264_sub8x16_dct_dc_avx;
dctf->add16x16_idct_dc= x264_add16x16_idct_dc_avx;
}
#endif // HAVE_MMX
......
......@@ -658,6 +658,8 @@ void x264_deblock_v_chroma_intra_sse2( pixel *pix, int stride, int alpha, int be
void x264_deblock_v_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_sse2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_intra_avx ( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_mmx2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_h_chroma_422_intra_sse2( pixel *pix, int stride, int alpha, int beta );
void x264_deblock_strength_mmx2 ( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
int mvy_limit, int bframe );
......@@ -741,6 +743,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_mmx2;
#if !HIGH_BIT_DEPTH
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_mmx2;
pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_mmx2;
#endif
pf->deblock_luma_intra[1] = x264_deblock_v_luma_intra_mmx2;
pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_mmx2;
......@@ -752,9 +755,8 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
{
pf->deblock_strength = x264_deblock_strength_sse2;
pf->deblock_h_chroma_420 = x264_deblock_h_chroma_sse2;
#if !HIGH_BIT_DEPTH
pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_sse2;
#endif
pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_sse2;
if( !(cpu&X264_CPU_STACK_MOD4) )
{
pf->deblock_luma[1] = x264_deblock_v_luma_sse2;
......
......@@ -884,7 +884,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
INIT4( hadamard_ac, _sse2 );
}
pixf->vsad = x264_pixel_vsad_sse2;
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_sse2;
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_sse2;
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_sse2;
......@@ -911,7 +911,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
INIT4( hadamard_ac, _ssse3 );
}
pixf->vsad = x264_pixel_vsad_ssse3;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3;
......@@ -943,6 +943,10 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_avx;
pixf->ssim_end4 = x264_pixel_ssim_end4_avx;
}
if( cpu&X264_CPU_XOP )
{
pixf->vsad = x264_pixel_vsad_xop;
}
#endif // HAVE_MMX
#else // !HIGH_BIT_DEPTH
#if HAVE_MMX
......
......@@ -31,6 +31,7 @@
%include "x86util.asm"
SECTION_RODATA
pw_ppmmmmpp: dw 1,1,-1,-1,-1,-1,1,1
pb_sub4frame: db 0,1,4,8,5,2,3,6,9,12,13,10,7,11,14,15
pb_sub4field: db 0,4,1,8,12,5,9,13,2,6,10,14,3,7,11,15
pb_subacmask: dw 0,-1,-1,-1,-1,-1,-1,-1
......@@ -852,6 +853,79 @@ SUB8x16_DCT_DC
%endif ; !HIGH_BIT_DEPTH
%macro DCTDC_4ROW_SSE2 2
mova %1, [r1+FENC_STRIDEB*%2]
mova m0, [r2+FDEC_STRIDEB*%2]
%assign Y (%2+1)
%rep 3
paddw %1, [r1+FENC_STRIDEB*Y]
paddw m0, [r2+FDEC_STRIDEB*Y]
%assign Y (Y+1)
%endrep
psubw %1, m0
pshufd m0, %1, q2301
paddw %1, m0
%endmacro
%ifdef HIGH_BIT_DEPTH
%macro SUB8x8_DCT_DC_10 0
cglobal sub8x8_dct_dc, 3,3,3
DCTDC_4ROW_SSE2 m1, 0
DCTDC_4ROW_SSE2 m2, 4
mova m0, [pw_ppmmmmpp]
pmaddwd m1, m0
pmaddwd m2, m0
pshufd m0, m1, q2200 ; -1 -1 +0 +0
pshufd m1, m1, q0033 ; +0 +0 +1 +1
paddd m1, m0
pshufd m0, m2, q1023 ; -2 +2 -3 +3
paddd m1, m2
paddd m1, m0
mova [r0], m1
RET
%endmacro
INIT_XMM sse2
SUB8x8_DCT_DC_10
%macro SUB8x16_DCT_DC_10 0
cglobal sub8x16_dct_dc, 3,3,6
DCTDC_4ROW_SSE2 m1, 0
DCTDC_4ROW_SSE2 m2, 4
DCTDC_4ROW_SSE2 m3, 8
DCTDC_4ROW_SSE2 m4, 12
mova m0, [pw_ppmmmmpp]
pmaddwd m1, m0
pmaddwd m2, m0
pshufd m5, m1, q2200 ; -1 -1 +0 +0
pshufd m1, m1, q0033 ; +0 +0 +1 +1
paddd m1, m5
pshufd m5, m2, q1023 ; -2 +2 -3 +3
paddd m1, m2
paddd m1, m5 ; a6 a2 a4 a0
pmaddwd m3, m0
pmaddwd m4, m0
pshufd m5, m3, q2200
pshufd m3, m3, q0033
paddd m3, m5
pshufd m5, m4, q1023
paddd m3, m4
paddd m3, m5 ; a7 a3 a5 a1
paddd m0, m1, m3
psubd m1, m3
pshufd m0, m0, q3120
pshufd m1, m1, q3120
punpcklqdq m2, m0, m1
punpckhqdq m1, m0
mova [r0+ 0], m2
mova [r0+16], m1
RET
%endmacro
INIT_XMM sse2
SUB8x16_DCT_DC_10
INIT_XMM avx
SUB8x16_DCT_DC_10
%endif
;-----------------------------------------------------------------------------
; void zigzag_scan_8x8_frame( int16_t level[64], int16_t dct[8][8] )
;-----------------------------------------------------------------------------
......
......@@ -39,8 +39,9 @@ void x264_sub16x16_dct_ssse3( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2
void x264_sub8x8_dct_avx ( int16_t dct[ 4][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub16x16_dct_avx ( int16_t dct[16][16], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x8_dct_dc_mmx2( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x8_dct_dc_sse2( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x16_dct_dc_sse2 ( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
void x264_sub8x8_dct_dc_sse2( dctcoef dct [ 4], pixel *pix1, pixel *pix2 );
void x264_sub8x16_dct_dc_sse2( dctcoef dct [ 4], pixel *pix1, pixel *pix2 );
void x264_sub8x16_dct_dc_avx ( dctcoef dct [ 4], pixel *pix1, pixel *pix2 );
void x264_sub8x16_dct_dc_ssse3( int16_t dct [ 4], uint8_t *pix1, uint8_t *pix2 );
void x264_add4x4_idct_mmx ( uint8_t *p_dst, int16_t dct [16] );
......
......@@ -1780,6 +1780,52 @@ INIT_XMM sse2
DEBLOCK_CHROMA
INIT_XMM avx
DEBLOCK_CHROMA
%macro DEBLOCK_H_CHROMA_422_INTRA_10 0
cglobal deblock_h_chroma_422_intra, 4,6,8
add r1, r1
mov r4, 64/mmsize
%if mmsize == 16
lea r5, [r1*3]
%endif
.loop:
CHROMA_H_LOAD r5
call deblock_intra_body
CHROMA_H_STORE r5
lea r0, [r0+r1*(mmsize/4)]
dec r4
jg .loop
REP_RET
%endmacro
INIT_XMM sse2
DEBLOCK_H_CHROMA_422_INTRA_10
%macro DEBLOCK_H_CHROMA_422_10 0
cglobal deblock_h_chroma_422, 5,7,8
add r1, r1
mov r5, 64/mmsize
lea r6, [r1*3]
.loop:
CHROMA_H_LOAD r6
RESET_MM_PERMUTATION
LOAD_AB m4, m5, r2, r3
LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4
pxor m4, m4
movd m6, [r4-1]
psraw m6, 8
SPLATW m6, m6
pmaxsw m6, m4
pand m7, m6
DEBLOCK_P0_Q0 m1, m2, m0, m3, m7, m5, m6
CHROMA_H_STORE r6
lea r0, [r0+r1*(mmsize/4)]
add r4, mmsize/16
dec r5
jg .loop
REP_RET
%endmacro
INIT_XMM sse2
DEBLOCK_H_CHROMA_422_10
%endif ; HIGH_BIT_DEPTH
%ifndef HIGH_BIT_DEPTH
......@@ -1791,7 +1837,7 @@ DEBLOCK_CHROMA
sub t5, r1
%if mmsize==8
mov dword r0m, 2
.skip_prologue:
.loop:
%endif
%endmacro
......@@ -1802,10 +1848,6 @@ DEBLOCK_CHROMA
lea t6, [r1*3]
mov t5, r0
add r0, t6
%if mmsize==8
mov dword r0m, 2
.skip_prologue:
%endif
%endmacro
%macro CHROMA_V_LOOP 1
......@@ -1816,7 +1858,7 @@ DEBLOCK_CHROMA
add r4, 2
%endif
dec dword r0m
jg .skip_prologue
jg .loop
%endif
%endmacro
......@@ -1828,7 +1870,7 @@ DEBLOCK_CHROMA
add r4, 2
%endif
dec dword r0m
jg .skip_prologue
jg .loop
%endif
%endmacro
......@@ -1865,6 +1907,10 @@ cglobal deblock_v_chroma, 5,6,8
;-----------------------------------------------------------------------------
cglobal deblock_h_chroma, 5,7,8
CHROMA_H_START
%if mmsize==8
mov dword r0m, 2
.loop:
%endif
TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
call chroma_inter_body
TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
......@@ -1888,14 +1934,9 @@ cglobal deblock_h_chroma_422, 5,7,8
%else
%define cntr dword r0m
%endif
dec r2d
dec r3d
sub r0, 4
lea t6, [r1*3]
mov t5, r0
add r0, t6
CHROMA_H_START
mov cntr, 32/mmsize
.skip_prologue:
.loop:
TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
LOAD_MASK r2d, r3d
movd m6, [r4] ; tc0
......@@ -1913,7 +1954,7 @@ cglobal deblock_h_chroma_422, 5,7,8
lea t5, [t5+r1*(mmsize/2)]
add r4, mmsize/8
dec cntr
jg .skip_prologue
jg .loop
REP_RET
%endmacro
......@@ -1972,6 +2013,10 @@ cglobal deblock_v_chroma_intra, 4,5,8
;-----------------------------------------------------------------------------
cglobal deblock_h_chroma_intra, 4,6,8
CHROMA_H_START
%if mmsize==8
mov dword r0m, 2
.loop:
%endif
TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
call chroma_intra_body
TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
......@@ -1987,6 +2032,27 @@ DEBLOCK_CHROMA_INTRA
INIT_MMX mmx2
DEBLOCK_CHROMA_INTRA
%endif
%macro DEBLOCK_H_CHROMA_422_INTRA 0
cglobal deblock_h_chroma_422_intra, 4,7,8
CHROMA_H_START
mov r6d, 32/mmsize
.loop:
TRANSPOSE4x8W_LOAD PASS8ROWS(t5, r0, r1, t6)
call chroma_intra_body
TRANSPOSE8x2W_STORE PASS8ROWS(t5, r0, r1, t6, 2)
lea r0, [r0+r1*(mmsize/2)]
lea t5, [t5+r1*(mmsize/2)]
dec r6d
jg .loop
REP_RET
%endmacro
INIT_XMM sse2
DEBLOCK_H_CHROMA_422_INTRA
%ifndef ARCH_X86_64
INIT_MMX mmx2
DEBLOCK_H_CHROMA_422_INTRA
%endif
%endif ; !HIGH_BIT_DEPTH
......
......@@ -153,6 +153,8 @@ int x264_pixel_var2_8x16_ssse3( uint8_t *, int, uint8_t *, int, int * );
int x264_pixel_var2_8x16_xop( uint8_t *, int, uint8_t *, int, int * );
int x264_pixel_vsad_mmx2( pixel *src, int stride, int height );
int x264_pixel_vsad_sse2( pixel *src, int stride, int height );
int x264_pixel_vsad_ssse3( pixel *src, int stride, int height );
int x264_pixel_vsad_xop( pixel *src, int stride, int height );
#define DECL_ADS( size, suffix ) \
int x264_pixel_ads##size##_##suffix( int enc_dc[size], uint16_t *sums, int delta,\
......
......@@ -1406,6 +1406,51 @@ PREDICT_8x8
%endif ; !HIGH_BIT_DEPTH
;-----------------------------------------------------------------------------
; void predict_8x8_vl( pixel *src, pixel *edge )
;-----------------------------------------------------------------------------
%macro PREDICT_8x8_VL_10 1
cglobal predict_8x8_vl, 2,2,8
mova m0, [r1+16*SIZEOF_PIXEL]
mova m1, [r1+24*SIZEOF_PIXEL]
PALIGNR m2, m1, m0, SIZEOF_PIXEL*1, m4
PSRLPIX m4, m1, 1
pavg%1 m6, m0, m2
pavg%1 m7, m1, m4
add r0, FDEC_STRIDEB*4
mova [r0-4*FDEC_STRIDEB], m6
PALIGNR m3, m7, m6, SIZEOF_PIXEL*1, m5
mova [r0-2*FDEC_STRIDEB], m3
PALIGNR m3, m7, m6, SIZEOF_PIXEL*2, m5
mova [r0+0*FDEC_STRIDEB], m3
PALIGNR m3, m7, m6, SIZEOF_PIXEL*3, m5
mova [r0+2*FDEC_STRIDEB], m3
PALIGNR m3, m1, m0, SIZEOF_PIXEL*7, m6
PSLLPIX m5, m0, 1
PRED8x8_LOWPASS m0, m5, m2, m0, m7
PRED8x8_LOWPASS m1, m3, m4, m1, m7
PALIGNR m4, m1, m0, SIZEOF_PIXEL*1, m2
mova [r0-3*FDEC_STRIDEB], m4
PALIGNR m4, m1, m0, SIZEOF_PIXEL*2, m2
mova [r0-1*FDEC_STRIDEB], m4
PALIGNR m4, m1, m0, SIZEOF_PIXEL*3, m2
mova [r0+1*FDEC_STRIDEB], m4
PALIGNR m4, m1, m0, SIZEOF_PIXEL*4, m2
mova [r0+3*FDEC_STRIDEB], m4
RET
%endmacro
%ifdef HIGH_BIT_DEPTH
INIT_XMM sse2
PREDICT_8x8_VL_10 w
INIT_XMM ssse3
PREDICT_8x8_VL_10 w
INIT_XMM avx
PREDICT_8x8_VL_10 w
%else
INIT_MMX mmx2
PREDICT_8x8_VL_10 b
%endif
;-----------------------------------------------------------------------------
; void predict_8x8_hd( pixel *src, pixel *edge )
;-----------------------------------------------------------------------------
......
......@@ -419,6 +419,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_sse2;
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_sse2;
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_sse2;
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_sse2;
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_sse2;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_sse2;
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_sse2;
......@@ -429,6 +430,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
pf[I_PRED_8x8_DDR] = x264_predict_8x8_ddr_ssse3;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_ssse3;
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_ssse3;
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_ssse3;
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_ssse3;
*predict_8x8_filter = x264_predict_8x8_filter_ssse3;
if( cpu&X264_CPU_CACHELINE_64 )
......@@ -440,6 +442,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
return;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_avx;
pf[I_PRED_8x8_HU] = x264_predict_8x8_hu_avx;
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_avx;
pf[I_PRED_8x8_VR] = x264_predict_8x8_vr_avx;
*predict_8x8_filter = x264_predict_8x8_filter_avx;
#else
......@@ -449,6 +452,7 @@ void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12], x264_predict_
pf[I_PRED_8x8_DC_TOP] = x264_predict_8x8_dc_top_mmx2;
pf[I_PRED_8x8_DC_LEFT]= x264_predict_8x8_dc_left_mmx2;
pf[I_PRED_8x8_HD] = x264_predict_8x8_hd_mmx2;
pf[I_PRED_8x8_VL] = x264_predict_8x8_vl_mmx2;
*predict_8x8_filter = x264_predict_8x8_filter_mmx2;
#if ARCH_X86
pf[I_PRED_8x8_DDL] = x264_predict_8x8_ddl_mmx2;
......
......@@ -93,8 +93,10 @@ void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] );
void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] );
void x264_predict_8x8_ddr_ssse3_cache64( pixel *src, pixel edge[36] );
void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] );
void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] );
void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] );
void x264_predict_8x8_vl_sse2( pixel *src, pixel edge[36] );
void x264_predict_8x8_vl_ssse3( pixel *src, pixel edge[36] );
void x264_predict_8x8_vl_avx( pixel *src, pixel edge[36] );
void x264_predict_8x8_vl_mmx2( uint8_t *src, uint8_t edge[36] );
void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] );
void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] );
void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] );
......
......@@ -347,6 +347,57 @@ SAD_XMM 8, 8
%endrep
%endmacro
%macro PIXEL_VSAD 0
cglobal pixel_vsad, 3,3,8
mova m0, [r0]
mova m1, [r0+16]
mova m2, [r0+2*r1]
mova m3, [r0+2*r1+16]
lea r0, [r0+4*r1]
psubw m0, m2
psubw m1, m3
ABSW2 m0, m1, m0, m1, m4, m5
paddw m0, m1
sub r2d, 2
je .end
.loop:
mova m4, [r0]
mova m5, [r0+16]
mova m6, [r0+2*r1]
mova m7, [r0+2*r1+16]
lea r0, [r0+4*r1]
psubw m2, m4
psubw m3, m5
psubw m4, m6
psubw m5, m7
ABSW m2, m2, m1
ABSW m3, m3, m1
ABSW m4, m4, m1
ABSW m5, m5, m1
paddw m0, m2
paddw m0, m3
paddw m0, m4
paddw m0, m5
mova m2, m6
mova m3, m7
sub r2d, 2
jg .loop
.end:
%if BIT_DEPTH == 9
HADDW m0, m1 ; max sum: 62(pixel diffs)*511(pixel_max)=31682
%else
HADDUW m0, m1 ; max sum: 62(pixel diffs)*1023(pixel_max)=63426
%endif
movd eax, m0
RET
%endmacro
INIT_XMM sse2
PIXEL_VSAD
INIT_XMM ssse3
PIXEL_VSAD
INIT_XMM xop
PIXEL_VSAD
;-----------------------------------------------------------------------------
; void pixel_sad_xK_MxN( uint16_t *fenc, uint16_t *pix0, uint16_t *pix1,
; uint16_t *pix2, int i_stride, int scores[3] )
......
......@@ -426,6 +426,10 @@ static int check_pixel( int cpu_ref, int cpu_new )
}
report( "pixel hadamard_ac :" );
// maximize sum
for( int i = 0; i < 32; i++ )
for( int j = 0; j < 16; j++ )
pbuf4[16*i+j] = -((i+j)&1) & PIXEL_MAX;
ok = 1; used_asm = 0;
if( pixel_asm.vsad != pixel_ref.vsad )
{
......@@ -434,13 +438,17 @@ static int check_pixel( int cpu_ref, int cpu_new )
int res_c, res_asm;
set_func_name( "vsad" );
used_asm = 1;
res_c = call_c( pixel_c.vsad, pbuf1, 16, h );
res_asm = call_a( pixel_asm.vsad, pbuf1, 16, h );
if( res_c != res_asm )
for( int j = 0; j < 2 && ok; j++ )
{
ok = 0;
fprintf( stderr, "vsad: height=%d, %d != %d\n", h, res_c, res_asm );
break;
pixel *p = j ? pbuf4 : pbuf1;
res_c = call_c( pixel_c.vsad, p, 16, h );
res_asm = call_a( pixel_asm.vsad, p, 16, h );
if( res_c != res_asm )
{
ok = 0;
fprintf( stderr, "vsad: height=%d, %d != %d\n", h, res_c, res_asm );
break;
}
}
}
}
......@@ -721,8 +729,8 @@ static int check_dct( int cpu_ref, int cpu_new )
{
int cond_a = (i < 2) ? 1 : ((j&3) == 0 || (j&3) == (i-1));
int cond_b = (i == 0) ? 1 : !cond_a;
enc[0] = enc[1] = cond_a ? PIXEL_MAX : 0;
enc[2] = enc[3] = cond_b ? PIXEL_MAX : 0;
enc[0] = enc[1] = enc[4] = enc[5] = enc[8] = enc[9] = enc[12] = enc[13] = cond_a ? PIXEL_MAX : 0;
enc[2] = enc[3] = enc[6] = enc[7] = enc[10] = enc[11] = enc[14] = enc[15] = cond_b ? PIXEL_MAX : 0;
for( int k = 0; k < 4; k++ )
dec[k] = PIXEL_MAX - enc[k];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment