Commit 1b30cf2a authored by Liwei Wang's avatar Liwei Wang Committed by Henrik Gramner

Add SSSE3 implementation for the 16x16 blocks in itx

Cycle times:
inv_txfm_add_16x16_adst_adst_0_8bpc_c: 19643.8
inv_txfm_add_16x16_adst_adst_0_8bpc_ssse3: 870.0
inv_txfm_add_16x16_adst_adst_1_8bpc_c: 19611.7
inv_txfm_add_16x16_adst_adst_1_8bpc_ssse3: 870.3
inv_txfm_add_16x16_adst_adst_2_8bpc_c: 19554.2
inv_txfm_add_16x16_adst_adst_2_8bpc_ssse3: 869.9
inv_txfm_add_16x16_adst_dct_0_8bpc_c: 19499.2
inv_txfm_add_16x16_adst_dct_0_8bpc_ssse3: 761.1
inv_txfm_add_16x16_adst_dct_1_8bpc_c: 19819.1
inv_txfm_add_16x16_adst_dct_1_8bpc_ssse3: 760.9
inv_txfm_add_16x16_adst_dct_2_8bpc_c: 19684.5
inv_txfm_add_16x16_adst_dct_2_8bpc_ssse3: 761.4
inv_txfm_add_16x16_adst_flipadst_0_8bpc_c: 19309.3
inv_txfm_add_16x16_adst_flipadst_0_8bpc_ssse3: 877.2
inv_txfm_add_16x16_adst_flipadst_1_8bpc_c: 19374.3
inv_txfm_add_16x16_adst_flipadst_1_8bpc_ssse3: 876.8
inv_txfm_add_16x16_adst_flipadst_2_8bpc_c: 19548.6
inv_txfm_add_16x16_adst_flipadst_2_8bpc_ssse3: 879.4
inv_txfm_add_16x16_dct_adst_0_8bpc_c: 19715.3
inv_txfm_add_16x16_dct_adst_0_8bpc_ssse3: 757.6
inv_txfm_add_16x16_dct_adst_1_8bpc_c: 19586.6
inv_txfm_add_16x16_dct_adst_1_8bpc_ssse3: 756.8
inv_txfm_add_16x16_dct_adst_2_8bpc_c: 19447.3
inv_txfm_add_16x16_dct_adst_2_8bpc_ssse3: 757.2
inv_txfm_add_16x16_dct_dct_0_8bpc_c: 19188.0
inv_txfm_add_16x16_dct_dct_0_8bpc_ssse3: 64.3
inv_txfm_add_16x16_dct_dct_1_8bpc_c: 19230.1
inv_txfm_add_16x16_dct_dct_1_8bpc_ssse3: 649.1
inv_txfm_add_16x16_dct_dct_2_8bpc_c: 19276.7
inv_txfm_add_16x16_dct_dct_2_8bpc_ssse3: 649.5
inv_txfm_add_16x16_dct_flipadst_0_8bpc_c: 19967.8
inv_txfm_add_16x16_dct_flipadst_0_8bpc_ssse3: 761.1
inv_txfm_add_16x16_dct_flipadst_1_8bpc_c: 19665.7
inv_txfm_add_16x16_dct_flipadst_1_8bpc_ssse3: 761.0
inv_txfm_add_16x16_dct_flipadst_2_8bpc_c: 19766.2
inv_txfm_add_16x16_dct_flipadst_2_8bpc_ssse3: 760.6
inv_txfm_add_16x16_dct_identity_0_8bpc_c: 13874.5
inv_txfm_add_16x16_dct_identity_0_8bpc_ssse3: 97.3
inv_txfm_add_16x16_dct_identity_1_8bpc_c: 13931.8
inv_txfm_add_16x16_dct_identity_1_8bpc_ssse3: 76.3
inv_txfm_add_16x16_dct_identity_2_8bpc_c: 13801.5
inv_txfm_add_16x16_dct_identity_2_8bpc_ssse3: 454.6
inv_txfm_add_16x16_flipadst_adst_0_8bpc_c: 18900.6
inv_txfm_add_16x16_flipadst_adst_0_8bpc_ssse3: 884.6
inv_txfm_add_16x16_flipadst_adst_1_8bpc_c: 19180.2
inv_txfm_add_16x16_flipadst_adst_1_8bpc_ssse3: 886.7
inv_txfm_add_16x16_flipadst_adst_2_8bpc_c: 19320.8
inv_txfm_add_16x16_flipadst_adst_2_8bpc_ssse3: 884.6
inv_txfm_add_16x16_flipadst_dct_0_8bpc_c: 19399.7
inv_txfm_add_16x16_flipadst_dct_0_8bpc_ssse3: 775.0
inv_txfm_add_16x16_flipadst_dct_1_8bpc_c: 19345.0
inv_txfm_add_16x16_flipadst_dct_1_8bpc_ssse3: 774.6
inv_txfm_add_16x16_flipadst_dct_2_8bpc_c: 19426.2
inv_txfm_add_16x16_flipadst_dct_2_8bpc_ssse3: 775.6
inv_txfm_add_16x16_flipadst_flipadst_0_8bpc_c: 19457.6
inv_txfm_add_16x16_flipadst_flipadst_0_8bpc_ssse3: 887.8
inv_txfm_add_16x16_flipadst_flipadst_1_8bpc_c: 19413.8
inv_txfm_add_16x16_flipadst_flipadst_1_8bpc_ssse3: 885.3
inv_txfm_add_16x16_flipadst_flipadst_2_8bpc_c: 19425.6
inv_txfm_add_16x16_flipadst_flipadst_2_8bpc_ssse3: 886.3
inv_txfm_add_16x16_identity_dct_0_8bpc_c: 14150.7
inv_txfm_add_16x16_identity_dct_0_8bpc_ssse3: 104.3
inv_txfm_add_16x16_identity_dct_1_8bpc_c: 14041.5
inv_txfm_add_16x16_identity_dct_1_8bpc_ssse3: 104.2
inv_txfm_add_16x16_identity_dct_2_8bpc_c: 13917.7
inv_txfm_add_16x16_identity_dct_2_8bpc_ssse3: 459.7
inv_txfm_add_16x16_identity_identity_0_8bpc_c: 8761.7
inv_txfm_add_16x16_identity_identity_0_8bpc_ssse3: 263.3
inv_txfm_add_16x16_identity_identity_1_8bpc_c: 8669.5
inv_txfm_add_16x16_identity_identity_1_8bpc_ssse3: 263.4
inv_txfm_add_16x16_identity_identity_2_8bpc_c: 8282.1
inv_txfm_add_16x16_identity_identity_2_8bpc_ssse3: 263.3
parent 255581d5
Pipeline #5174 passed with stages
in 5 minutes and 54 seconds
......@@ -85,6 +85,7 @@ decl_itx16_fns( 4, 16, ssse3);
decl_itx16_fns(16, 4, ssse3);
decl_itx16_fns( 8, 16, ssse3);
decl_itx16_fns(16, 8, ssse3);
decl_itx12_fns(16, 16, ssse3);
void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
......@@ -136,6 +137,7 @@ void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
assign_itx16_fn(R, 16, 4, ssse3);
assign_itx16_fn(R, 8, 16, ssse3);
assign_itx16_fn(R, 16, 8, ssse3);
assign_itx12_fn(, 16, 16, ssse3);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment