Commit 55ccc4ef authored by Fiona Glaser's avatar Fiona Glaser

intra_sad_x3_8x8 assembly

parent 104511d6
......@@ -665,6 +665,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
pixf->intra_sad_x3_16x16 = x264_intra_sad_x3_16x16_mmxext;
pixf->intra_satd_x3_8x8c = x264_intra_satd_x3_8x8c_mmxext;
pixf->intra_sad_x3_8x8c = x264_intra_sad_x3_8x8c_mmxext;
pixf->intra_sad_x3_8x8 = x264_intra_sad_x3_8x8_mmxext;
pixf->intra_satd_x3_4x4 = x264_intra_satd_x3_4x4_mmxext;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_mmxext;
}
......
......@@ -103,7 +103,9 @@ typedef struct
void (*intra_mbcmp_x3_4x4) ( uint8_t *fenc, uint8_t *fdec , int res[3] );
void (*intra_satd_x3_4x4) ( uint8_t *fenc, uint8_t *fdec , int res[3] );
void (*intra_sad_x3_4x4) ( uint8_t *fenc, uint8_t *fdec , int res[3] );
void (*intra_mbcmp_x3_8x8) ( uint8_t *fenc, uint8_t edge[33], int res[3] );
void (*intra_sa8d_x3_8x8) ( uint8_t *fenc, uint8_t edge[33], int res[3] );
void (*intra_sad_x3_8x8) ( uint8_t *fenc, uint8_t edge[33], int res[3] );
} x264_pixel_function_t;
void x264_pixel_init( int cpu, x264_pixel_function_t *pixf );
......
......@@ -92,6 +92,7 @@ void x264_intra_sad_x3_16x16_ssse3 ( uint8_t *, uint8_t *, int * );
void x264_intra_sa8d_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
void x264_intra_sa8d_x3_8x8_sse2 ( uint8_t *, uint8_t *, int * );
void x264_intra_sa8d_x3_8x8_ssse3 ( uint8_t *, uint8_t *, int * );
void x264_intra_sad_x3_8x8_mmxext ( uint8_t *, uint8_t *, int * );
void x264_intra_sa8d_x3_8x8_core_mmxext( uint8_t *, int16_t [2][8], int * );
void x264_intra_sa8d_x3_8x8_core_sse2 ( uint8_t *, int16_t [2][8], int * );
void x264_intra_sa8d_x3_8x8_core_ssse3 ( uint8_t *, int16_t [2][8], int * );
......
......@@ -28,6 +28,7 @@
SECTION_RODATA
pb_3: times 16 db 3
pw_8: times 4 dw 8
pb_shuf8x8c0: db 0,0,0,0,2,2,2,2
pb_shuf8x8c1: db 4,4,4,4,6,6,6,6
sw_64: dd 64
......@@ -304,6 +305,73 @@ cglobal x264_intra_sad_x3_4x4_mmxext, 3,3
movd [r2+4], mm1 ;H prediction cost
RET
;-----------------------------------------------------------------------------
; void intra_sad_x3_8x8 ( uint8_t *fenc, uint8_t edge[33], int res[3]);
;-----------------------------------------------------------------------------
;m0 = DC
;m6 = V
;m7 = H
;m1 = DC score
;m2 = V score
;m3 = H score
;m5 = pixel row
;m4 = temp
%macro INTRA_SAD_HVDC_ITER 2
movq m5, [r0+FENC_STRIDE*%1]
movq m4, m5
psadbw m4, m0
%if %1
paddw m1, m4
%else
SWAP m1, m4
%endif
movq m4, m5
psadbw m4, m6
%if %1
paddw m2, m4
%else
SWAP m2, m4
%endif
pshufw m4, m7, %2
psadbw m5, m4
%if %1
paddw m3, m5
%else
SWAP m3, m5
%endif
%endmacro
INIT_MMX
cglobal x264_intra_sad_x3_8x8_mmxext, 3,3
movq m7, [r1+7]
pxor m0, m0
movq m6, [r1+16] ;V prediction
pxor m1, m1
psadbw m0, m7
psadbw m1, m6
paddw m0, m1
paddw m0, [pw_8 GLOBAL]
psrlw m0, 4
punpcklbw m0, m0
pshufw m0, m0, 0x0 ;DC prediction
punpckhbw m7, m7
INTRA_SAD_HVDC_ITER 0, 0xff
INTRA_SAD_HVDC_ITER 1, 0xaa
INTRA_SAD_HVDC_ITER 2, 0x55
INTRA_SAD_HVDC_ITER 3, 0x00
movq m7, [r1+7]
punpcklbw m7, m7
INTRA_SAD_HVDC_ITER 4, 0xff
INTRA_SAD_HVDC_ITER 5, 0xaa
INTRA_SAD_HVDC_ITER 6, 0x55
INTRA_SAD_HVDC_ITER 7, 0x00
movd [r2+0], m2
movd [r2+4], m3
movd [r2+8], m1
RET
;-----------------------------------------------------------------------------
; void intra_sad_x3_8x8c ( uint8_t *fenc, uint8_t *fdec, int res[3] );
;-----------------------------------------------------------------------------
......
......@@ -668,7 +668,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
int i_satd_thresh = a->i_mbrd ? COST_MAX : X264_MIN( i_satd_inter, a->i_satd_i16x16 );
int i_cost = 0;
h->mb.i_cbp_luma = 0;
b_merged_satd = h->pixf.intra_sa8d_x3_8x8 && h->pixf.mbcmp[0] == h->pixf.satd[0];
b_merged_satd = h->pixf.intra_mbcmp_x3_8x8 && !h->mb.b_lossless;
// FIXME some bias like in i4x4?
if( h->sh.i_type == SLICE_TYPE_B )
......@@ -689,7 +689,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_
if( b_merged_satd && i_max == 9 )
{
int satd[9];
h->pixf.intra_sa8d_x3_8x8( p_src_by, edge, satd );
h->pixf.intra_mbcmp_x3_8x8( p_src_by, edge, satd );
satd[i_pred_mode] -= 3 * a->i_lambda;
for( i=2; i>=0; i-- )
{
......
......@@ -406,6 +406,7 @@ static int check_pixel( int cpu_ref, int cpu_new )
report( "intra satd_x3 :" );
TEST_INTRA_MBCMP( intra_sad_x3_16x16 , predict_16x16, sad [PIXEL_16x16], 0 );
TEST_INTRA_MBCMP( intra_sad_x3_8x8c , predict_8x8c , sad [PIXEL_8x8] , 0 );
TEST_INTRA_MBCMP( intra_sad_x3_8x8 , predict_8x8 , sad [PIXEL_8x8] , 1, edge );
TEST_INTRA_MBCMP( intra_sad_x3_4x4 , predict_4x4 , sad [PIXEL_4x4] , 0 );
report( "intra sad_x3 :" );
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment