Commit bd12b1ec authored by Liwei Wang's avatar Liwei Wang

Add SSSE3 implementation for the 16x32,32x16 and 32x32 blocks in itx

Cycle times:
inv_txfm_add_16x32_dct_dct_0_8bpc_c: 2464.6
inv_txfm_add_16x32_dct_dct_0_8bpc_ssse3: 121.6
inv_txfm_add_16x32_dct_dct_1_8bpc_c: 24751.6
inv_txfm_add_16x32_dct_dct_1_8bpc_ssse3: 1101.9
inv_txfm_add_16x32_dct_dct_2_8bpc_c: 24377.0
inv_txfm_add_16x32_dct_dct_2_8bpc_ssse3: 1117.2
inv_txfm_add_16x32_dct_dct_3_8bpc_c: 24155.6
inv_txfm_add_16x32_dct_dct_3_8bpc_ssse3: 2349.3
inv_txfm_add_16x32_dct_dct_4_8bpc_c: 24175.6
inv_txfm_add_16x32_dct_dct_4_8bpc_ssse3: 1642.0
inv_txfm_add_16x32_identity_identity_0_8bpc_c: 10304.7
inv_txfm_add_16x32_identity_identity_0_8bpc_ssse3: 137.7
inv_txfm_add_16x32_identity_identity_1_8bpc_c: 10341.6
inv_txfm_add_16x32_identity_identity_1_8bpc_ssse3: 137.9
inv_txfm_add_16x32_identity_identity_2_8bpc_c: 10299.9
inv_txfm_add_16x32_identity_identity_2_8bpc_ssse3: 253.9
inv_txfm_add_16x32_identity_identity_3_8bpc_c: 10331.4
inv_txfm_add_16x32_identity_identity_3_8bpc_ssse3: 369.7
inv_txfm_add_16x32_identity_identity_4_8bpc_c: 10360.4
inv_txfm_add_16x32_identity_identity_4_8bpc_ssse3: 484.0
inv_txfm_add_32x16_dct_dct_0_8bpc_c: 2288.4
inv_txfm_add_32x16_dct_dct_0_8bpc_ssse3: 142.3
inv_txfm_add_32x16_dct_dct_1_8bpc_c: 23819.9
inv_txfm_add_32x16_dct_dct_1_8bpc_ssse3: 1740.1
inv_txfm_add_32x16_dct_dct_2_8bpc_c: 23755.8
inv_txfm_add_32x16_dct_dct_2_8bpc_ssse3: 1641.4
inv_txfm_add_32x16_dct_dct_3_8bpc_c: 23839.9
inv_txfm_add_32x16_dct_dct_3_8bpc_ssse3: 1559.0
inv_txfm_add_32x16_dct_dct_4_8bpc_c: 23757.7
inv_txfm_add_32x16_dct_dct_4_8bpc_ssse3: 1579.0
inv_txfm_add_32x16_identity_identity_0_8bpc_c: 10381.7
inv_txfm_add_32x16_identity_identity_0_8bpc_ssse3: 126.3
inv_txfm_add_32x16_identity_identity_1_8bpc_c: 10402.5
inv_txfm_add_32x16_identity_identity_1_8bpc_ssse3: 126.5
inv_txfm_add_32x16_identity_identity_2_8bpc_c: 10429.2
inv_txfm_add_32x16_identity_identity_2_8bpc_ssse3: 244.9
inv_txfm_add_32x16_identity_identity_3_8bpc_c: 10382.0
inv_txfm_add_32x16_identity_identity_3_8bpc_ssse3: 491.0
inv_txfm_add_32x16_identity_identity_4_8bpc_c: 10381.0
inv_txfm_add_32x16_identity_identity_4_8bpc_ssse3: 468.0
inv_txfm_add_32x32_dct_dct_0_8bpc_c: 4168.2
inv_txfm_add_32x32_dct_dct_0_8bpc_ssse3: 204.0
inv_txfm_add_32x32_dct_dct_1_8bpc_c: 46306.2
inv_txfm_add_32x32_dct_dct_1_8bpc_ssse3: 2216.0
inv_txfm_add_32x32_dct_dct_2_8bpc_c: 46300.2
inv_txfm_add_32x32_dct_dct_2_8bpc_ssse3: 2194.2
inv_txfm_add_32x32_dct_dct_3_8bpc_c: 46350.1
inv_txfm_add_32x32_dct_dct_3_8bpc_ssse3: 3484.4
inv_txfm_add_32x32_dct_dct_4_8bpc_c: 46318.1
inv_txfm_add_32x32_dct_dct_4_8bpc_ssse3: 3440.9
inv_txfm_add_32x32_identity_identity_0_8bpc_c: 14663.1
inv_txfm_add_32x32_identity_identity_0_8bpc_ssse3: 179.0
inv_txfm_add_32x32_identity_identity_1_8bpc_c: 14737.0
inv_txfm_add_32x32_identity_identity_1_8bpc_ssse3: 179.2
inv_txfm_add_32x32_identity_identity_2_8bpc_c: 14640.4
inv_txfm_add_32x32_identity_identity_2_8bpc_ssse3: 179.1
inv_txfm_add_32x32_identity_identity_3_8bpc_c: 14638.5
inv_txfm_add_32x32_identity_identity_3_8bpc_ssse3: 663.8
inv_txfm_add_32x32_identity_identity_4_8bpc_c: 14635.6
inv_txfm_add_32x32_identity_identity_4_8bpc_ssse3: 663.9
parent 72f8cc62
Pipeline #5731 passed with stages
in 6 minutes and 25 seconds
......@@ -88,6 +88,9 @@ decl_itx16_fns(16, 8, ssse3);
decl_itx12_fns(16, 16, ssse3);
decl_itx2_fns ( 8, 32, ssse3);
decl_itx2_fns (32, 8, ssse3);
decl_itx2_fns (16, 32, ssse3);
decl_itx2_fns (32, 16, ssse3);
decl_itx2_fns (32, 32, ssse3);
void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
......@@ -142,6 +145,9 @@ void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
assign_itx12_fn(, 16, 16, ssse3);
assign_itx2_fn (R, 8, 32, ssse3);
assign_itx2_fn (R, 32, 8, ssse3);
assign_itx2_fn (R, 16, 32, ssse3);
assign_itx2_fn (R, 32, 16, ssse3);
assign_itx2_fn (, 32, 32, ssse3);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment