Commit c83edc04 authored by Cristian Militaru's avatar Cristian Militaru Committed by Fiona Glaser

High bit depth intra_sad_x3_4x4

From Google Code-In.
parent 9c0fa2d6
......@@ -552,7 +552,6 @@ INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _c )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 )
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 )
INTRA_MBCMP( sad, 4x4, v, h, dc, , _ssse3, _c )
INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 )
INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 )
#else
......@@ -860,6 +859,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
#if ARCH_X86_64
pixf->intra_sa8d_x3_8x8 = x264_intra_sa8d_x3_8x8_sse2;
#endif
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_sse2;
pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_sse2;
pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_sse2;
pixf->ssim_end4 = x264_pixel_ssim_end4_sse2;
......@@ -908,6 +908,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
INIT4( hadamard_ac, _ssse3 );
}
pixf->vsad = x264_pixel_vsad_ssse3;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_ssse3;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_ssse3;
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_ssse3;
......@@ -931,6 +932,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
{
INIT4( hadamard_ac, _avx );
}
pixf->intra_sad_x3_4x4 = x264_intra_sad_x3_4x4_avx;
pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_avx;
pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_avx;
pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_avx;
......
......@@ -98,6 +98,9 @@ DECL_PIXELS( uint64_t, hadamard_ac, xop, ( pixel *pix, int i_stride ))
void x264_intra_satd_x3_4x4_mmx2 ( pixel *, pixel *, int * );
void x264_intra_sad_x3_4x4_mmx2 ( pixel *, pixel *, int * );
void x264_intra_sad_x3_4x4_sse2 ( pixel *, pixel *, int * );
void x264_intra_sad_x3_4x4_ssse3 ( pixel *, pixel *, int * );
void x264_intra_sad_x3_4x4_avx ( pixel *, pixel *, int * );
void x264_intra_satd_x3_8x8c_mmx2 ( pixel *, pixel *, int * );
void x264_intra_satd_x3_8x8c_ssse3 ( uint8_t *, uint8_t *, int * );
void x264_intra_sad_x3_8x8c_mmx2 ( pixel *, pixel *, int * );
......
......@@ -29,6 +29,7 @@
SECTION .text
cextern pw_1
cextern pw_4
cextern pw_8
;=============================================================================
......@@ -472,7 +473,69 @@ SAD_X 4, 8, 8
SAD_X 4, 8, 4
;-----------------------------------------------------------------------------
; void intra_sad_x3_8x8( pixel *fenc, pixel edge[36], int res[3]);
; void intra_sad_x3_4x4( uint16_t *fenc, uint16_t *fdec, int res[3] );
;-----------------------------------------------------------------------------
%macro INTRA_SAD_X3_4x4 0
cglobal intra_sad_x3_4x4, 3,3,7
movq m0, [r1-1*FDEC_STRIDEB]
movq m1, [r0+0*FENC_STRIDEB]
movq m2, [r0+2*FENC_STRIDEB]
pshuflw m6, m0, q1032
paddw m6, m0
pshuflw m5, m6, q2301
paddw m6, m5
punpcklqdq m6, m6 ;A+B+C+D 8 times
punpcklqdq m0, m0
movhps m1, [r0+1*FENC_STRIDEB]
movhps m2, [r0+3*FENC_STRIDEB]
psubw m3, m1, m0
psubw m0, m2
ABSW m3, m3, m5
ABSW m0, m0, m5
paddw m0, m3
HADDW m0, m5
movd [r2], m0 ;V prediction cost
movd m3, [r1+0*FDEC_STRIDEB-4]
movhps m3, [r1+1*FDEC_STRIDEB-8]
movd m4, [r1+2*FDEC_STRIDEB-4]
movhps m4, [r1+3*FDEC_STRIDEB-8]
pshufhw m3, m3, q3333
pshufhw m4, m4, q3333
pshuflw m3, m3, q1111 ; FF FF EE EE
pshuflw m4, m4, q1111 ; HH HH GG GG
paddw m5, m3, m4
pshufd m0, m5, q1032
paddw m5, m6
paddw m5, m0
paddw m5, [pw_4]
psrlw m5, 3
psubw m6, m5, m2
psubw m5, m1
psubw m1, m3
psubw m2, m4
ABSW m5, m5, m0
ABSW m6, m6, m0
ABSW m1, m1, m0
ABSW m2, m2, m0
paddw m5, m6
paddw m1, m2
HADDW m5, m0
HADDW m1, m2
movd [r2+8], m5 ;DC prediction cost
movd [r2+4], m1 ;H prediction cost
RET
%endmacro
INIT_XMM sse2
INTRA_SAD_X3_4x4
INIT_XMM ssse3
INTRA_SAD_X3_4x4
INIT_XMM avx
INTRA_SAD_X3_4x4
;-----------------------------------------------------------------------------
; void intra_sad_x3_8x8( pixel *fenc, pixel edge[36], int res[3] );
;-----------------------------------------------------------------------------
;m0 = DC
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment