Commit 585ac462 authored by Liwei Wang's avatar Liwei Wang

Add SSSE3 implementation for the 8x32 and 32x8 blocks in itx

Cycle times:
inv_txfm_add_8x32_dct_dct_0_8bpc_c: 1164.7
inv_txfm_add_8x32_dct_dct_0_8bpc_ssse3: 79.5
inv_txfm_add_8x32_dct_dct_1_8bpc_c: 11291.6
inv_txfm_add_8x32_dct_dct_1_8bpc_ssse3: 508.5
inv_txfm_add_8x32_dct_dct_2_8bpc_c: 10720.4
inv_txfm_add_8x32_dct_dct_2_8bpc_ssse3: 507.9
inv_txfm_add_8x32_dct_dct_3_8bpc_c: 12351.5
inv_txfm_add_8x32_dct_dct_3_8bpc_ssse3: 687.2
inv_txfm_add_8x32_dct_dct_4_8bpc_c: 10402.3
inv_txfm_add_8x32_dct_dct_4_8bpc_ssse3: 687.9
inv_txfm_add_8x32_identity_identity_0_8bpc_c: 3485.0
inv_txfm_add_8x32_identity_identity_0_8bpc_ssse3: 97.7
inv_txfm_add_8x32_identity_identity_1_8bpc_c: 3495.7
inv_txfm_add_8x32_identity_identity_1_8bpc_ssse3: 97.7
inv_txfm_add_8x32_identity_identity_2_8bpc_c: 3503.7
inv_txfm_add_8x32_identity_identity_2_8bpc_ssse3: 97.8
inv_txfm_add_8x32_identity_identity_3_8bpc_c: 3489.5
inv_txfm_add_8x32_identity_identity_3_8bpc_ssse3: 184.4
inv_txfm_add_8x32_identity_identity_4_8bpc_c: 3498.1
inv_txfm_add_8x32_identity_identity_4_8bpc_ssse3: 182.8
inv_txfm_add_32x8_dct_dct_0_8bpc_c: 1220.4
inv_txfm_add_32x8_dct_dct_0_8bpc_ssse3: 65.6
inv_txfm_add_32x8_dct_dct_1_8bpc_c: 11120.7
inv_txfm_add_32x8_dct_dct_1_8bpc_ssse3: 623.8
inv_txfm_add_32x8_dct_dct_2_8bpc_c: 12236.3
inv_txfm_add_32x8_dct_dct_2_8bpc_ssse3: 624.7
inv_txfm_add_32x8_dct_dct_3_8bpc_c: 10866.3
inv_txfm_add_32x8_dct_dct_3_8bpc_ssse3: 694.1
inv_txfm_add_32x8_dct_dct_4_8bpc_c: 10322.8
inv_txfm_add_32x8_dct_dct_4_8bpc_ssse3: 692.5
inv_txfm_add_32x8_identity_identity_0_8bpc_c: 3368.1
inv_txfm_add_32x8_identity_identity_0_8bpc_ssse3: 98.6
inv_txfm_add_32x8_identity_identity_1_8bpc_c: 3381.1
inv_txfm_add_32x8_identity_identity_1_8bpc_ssse3: 98.3
inv_txfm_add_32x8_identity_identity_2_8bpc_c: 3376.6
inv_txfm_add_32x8_identity_identity_2_8bpc_ssse3: 98.3
inv_txfm_add_32x8_identity_identity_3_8bpc_c: 3364.3
inv_txfm_add_32x8_identity_identity_3_8bpc_ssse3: 182.2
inv_txfm_add_32x8_identity_identity_4_8bpc_c: 3390.0
inv_txfm_add_32x8_identity_identity_4_8bpc_ssse3: 182.2
parent 5d944dc6
Pipeline #5595 passed with stages
in 5 minutes and 58 seconds
......@@ -86,6 +86,8 @@ decl_itx16_fns(16, 4, ssse3);
decl_itx16_fns( 8, 16, ssse3);
decl_itx16_fns(16, 8, ssse3);
decl_itx12_fns(16, 16, ssse3);
decl_itx2_fns ( 8, 32, ssse3);
decl_itx2_fns (32, 8, ssse3);
void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
......@@ -138,6 +140,8 @@ void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
assign_itx16_fn(R, 8, 16, ssse3);
assign_itx16_fn(R, 16, 8, ssse3);
assign_itx12_fn(, 16, 16, ssse3);
assign_itx2_fn (R, 8, 32, ssse3);
assign_itx2_fn (R, 32, 8, ssse3);
#endif
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment