Commit 71e13008 authored by Xuefeng Jiang's avatar Xuefeng Jiang Committed by Henrik Gramner

Add SSSE3 implementation for dav1d_ipred_v and dav1d_ipred_dc

Cycle times:
intra_pred_dc_w4_8bpc_c: 1051.4
intra_pred_dc_w4_8bpc_ssse3: 58.8
intra_pred_dc_w8_8bpc_c: 1587.6
intra_pred_dc_w8_8bpc_ssse3: 75.3
intra_pred_dc_w16_8bpc_c: 2526.2
intra_pred_dc_w16_8bpc_ssse3: 103.5
intra_pred_dc_w32_8bpc_c: 2646.6
intra_pred_dc_w32_8bpc_ssse3: 179.5
intra_pred_dc_w64_8bpc_c: 4084.6
intra_pred_dc_w64_8bpc_ssse3: 356.1
intra_pred_v_w4_8bpc_c: 468.5
intra_pred_v_w4_8bpc_ssse3: 46.8
intra_pred_v_w8_8bpc_c: 839.1
intra_pred_v_w8_8bpc_ssse3: 56.7
intra_pred_v_w16_8bpc_c: 1750.5
intra_pred_v_w16_8bpc_ssse3: 73.0
intra_pred_v_w32_8bpc_c: 1552.5
intra_pred_v_w32_8bpc_ssse3: 135.4
intra_pred_v_w64_8bpc_c: 2463.6
intra_pred_v_w64_8bpc_ssse3: 305.6
parent 8c5d34c8
Pipeline #3779 passed with stages
in 5 minutes and 11 seconds
......@@ -51,7 +51,9 @@ decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_avx2);
decl_pal_pred_fn(dav1d_pal_pred_avx2);
decl_angular_ipred_fn(dav1d_ipred_dc_ssse3);
decl_angular_ipred_fn(dav1d_ipred_h_ssse3);
decl_angular_ipred_fn(dav1d_ipred_v_ssse3);
void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
......@@ -59,7 +61,9 @@ void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
#if BITDEPTH == 8
c->intra_pred[HOR_PRED] = dav1d_ipred_h_ssse3;
c->intra_pred[DC_PRED] = dav1d_ipred_dc_ssse3;
c->intra_pred[HOR_PRED] = dav1d_ipred_h_ssse3;
c->intra_pred[VERT_PRED] = dav1d_ipred_v_ssse3;
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment