x86: AVX-512 pixel_avg_weight_w8

parent d3214e6b
......@@ -276,6 +276,38 @@ cglobal pixel_avg_weight_w16
vextracti128 [t0+t1], m0, 1
INIT_YMM avx512
cglobal pixel_avg_weight_w8
kxnorb k1, k1, k1
kaddb k1, k1, k1
movq xm0, [t2]
movq xm2, [t4]
movq xm1, [t2+t3]
movq xm5, [t4+t5]
lea t2, [t2+t3*2]
lea t4, [t4+t5*2]
vpbroadcastq m0 {k1}, [t2]
vpbroadcastq m2 {k1}, [t4]
vpbroadcastq m1 {k1}, [t2+t3]
vpbroadcastq m5 {k1}, [t4+t5]
punpcklbw m0, m2
punpcklbw m1, m5
pmaddubsw m0, m3
pmaddubsw m1, m3
pmulhrsw m0, m4
pmulhrsw m1, m4
packuswb m0, m1
vextracti128 xmm1, m0, 1
movq [t0], xm0
movhps [t0+t1], xm0
lea t0, [t0+t1*2]
movq [t0], xmm1
movhps [t0+t1], xmm1
INIT_ZMM avx512
cglobal pixel_avg_weight_w16
......@@ -776,6 +808,9 @@ AVGH 16, 8
INIT_XMM avx512
AVGH 16, 16
AVGH 16, 8
AVGH 8, 16
AVGH 8, 8
AVGH 8, 4
......@@ -871,6 +871,9 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_avx512;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_avx512;
pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_avx512;
pf->avg[PIXEL_8x8] = x264_pixel_avg_8x8_avx512;
pf->avg[PIXEL_8x4] = x264_pixel_avg_8x4_avx512;
#endif // HIGH_BIT_DEPTH
