Commit 12611ec9 authored by Henrik Gramner's avatar Henrik Gramner Committed by Anton Mitrofanov
Browse files

x86: AVX-512 load_deinterleave_chroma_fdec

parent d93851ec
......@@ -1259,6 +1259,29 @@ cglobal load_deinterleave_chroma_fdec, 4,4
RET
%endmacro ; LOAD_DEINTERLEAVE_CHROMA
%macro LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512 0
cglobal load_deinterleave_chroma_fdec, 4,5
vbroadcasti32x8 m0, [deinterleave_shuf32a]
mov r4d, 0x3333ff00
kmovd k1, r4d
lea r4, [r2*3]
kshiftrd k2, k1, 16
.loop:
vbroadcasti128 ym1, [r1]
vbroadcasti32x4 m1 {k1}, [r1+r2]
vbroadcasti128 ym2, [r1+r2*2]
vbroadcasti32x4 m2 {k1}, [r1+r4]
lea r1, [r1+r2*4]
pshufb m1, m0
pshufb m2, m0
vmovdqa32 [r0] {k2}, m1
vmovdqa32 [r0+mmsize] {k2}, m2
add r0, 2*mmsize
sub r3d, 4
jg .loop
RET
%endmacro
%macro LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2 0
cglobal load_deinterleave_chroma_fenc, 4,5
vbroadcasti128 m0, [deinterleave_shuf]
......@@ -1510,6 +1533,7 @@ INIT_YMM avx2
LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
PLANE_DEINTERLEAVE_RGB
INIT_ZMM avx512
LOAD_DEINTERLEAVE_CHROMA_FDEC_AVX512
LOAD_DEINTERLEAVE_CHROMA_FENC_AVX2
%endif
......
......@@ -255,6 +255,8 @@ void x264_load_deinterleave_chroma_fdec_ssse3( uint8_t *dst, uint8_t *src, intpt
void x264_load_deinterleave_chroma_fdec_avx( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
#define x264_load_deinterleave_chroma_fdec_avx2 x264_template(load_deinterleave_chroma_fdec_avx2)
void x264_load_deinterleave_chroma_fdec_avx2( uint16_t *dst, uint16_t *src, intptr_t i_src, int height );
#define x264_load_deinterleave_chroma_fdec_avx512 x264_template(load_deinterleave_chroma_fdec_avx512)
void x264_load_deinterleave_chroma_fdec_avx512( uint8_t *dst, uint8_t *src, intptr_t i_src, int height );
#define x264_memcpy_aligned_sse x264_template(memcpy_aligned_sse)
void *x264_memcpy_aligned_sse ( void *dst, const void *src, size_t n );
#define x264_memcpy_aligned_avx x264_template(memcpy_aligned_avx)
......@@ -1081,6 +1083,7 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_avx512;
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_avx512;
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_avx512;
pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_avx512;
pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_avx512;
}
#endif // HIGH_BIT_DEPTH
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment