• Liwei Wang's avatar
    Add SSSE3 implementation for the 4x4 blocks in itx · 87a377e9
    Liwei Wang authored
    Cycle times:
    inv_txfm_add_4x4_adst_adst_0_8bpc_c: 445.9
    inv_txfm_add_4x4_adst_adst_0_8bpc_ssse3: 23.7
    inv_txfm_add_4x4_adst_adst_1_8bpc_c: 443.7
    inv_txfm_add_4x4_adst_adst_1_8bpc_ssse3: 52.6
    inv_txfm_add_4x4_adst_dct_0_8bpc_c: 474.5
    inv_txfm_add_4x4_adst_dct_0_8bpc_ssse3: 23.9
    inv_txfm_add_4x4_adst_dct_1_8bpc_c: 482.0
    inv_txfm_add_4x4_adst_dct_1_8bpc_ssse3: 51.1
    inv_txfm_add_4x4_adst_flipadst_0_8bpc_c: 587.2
    inv_txfm_add_4x4_adst_flipadst_0_8bpc_ssse3: 24.0
    inv_txfm_add_4x4_adst_flipadst_1_8bpc_c: 457.2
    inv_txfm_add_4x4_adst_flipadst_1_8bpc_ssse3: 52.8
    inv_txfm_add_4x4_adst_identity_0_8bpc_c: 412.4
    inv_txfm_add_4x4_adst_identity_0_8bpc_ssse3: 43.3
    inv_txfm_add_4x4_adst_identity_1_8bpc_c: 412.0
    inv_txfm_add_4x4_adst_identity_1_8bpc_ssse3: 43.3
    inv_txfm_add_4x4_dct_adst_0_8bpc_c: 467.4
    inv_txfm_add_4x4_dct_adst_0_8bpc_ssse3: 23.2
    inv_txfm_add_4x4_dct_adst_1_8bpc_c: 588.3
    inv_txfm_add_4x4_dct_adst_1_8bpc_ssse3: 48.6
    inv_txfm_add_4x4_dct_dct_0_8bpc_c: 611.5
    inv_txfm_add_4x4_dct_dct_0_8bpc_ssse3: 23.1
    inv_txfm_add_4x4_dct_dct_1_8bpc_c: 576.2
    inv_txfm_add_4x4_dct_dct_1_8bpc_ssse3: 47.6
    inv_txfm_add_4x4_dct_flipadst_0_8bpc_c: 479.5
    inv_txfm_add_4x4_dct_flipadst_0_8bpc_ssse3: 23.4
    inv_txfm_add_4x4_dct_flipadst_1_8bpc_c: 549.3
    inv_txfm_add_4x4_dct_flipadst_1_8bpc_ssse3: 48.3
    inv_txfm_add_4x4_dct_identity_0_8bpc_c: 576.9
    inv_txfm_add_4x4_dct_identity_0_8bpc_ssse3: 25.4
    inv_txfm_add_4x4_dct_identity_1_8bpc_c: 610.7
    inv_txfm_add_4x4_dct_identity_1_8bpc_ssse3: 25.1
    inv_txfm_add_4x4_flipadst_adst_0_8bpc_c: 532.8
    inv_txfm_add_4x4_flipadst_adst_0_8bpc_ssse3: 23.8
    inv_txfm_add_4x4_flipadst_adst_1_8bpc_c: 666.7
    inv_txfm_add_4x4_flipadst_adst_1_8bpc_ssse3: 61.0
    inv_txfm_add_4x4_flipadst_dct_0_8bpc_c: 539.6
    inv_txfm_add_4x4_flipadst_dct_0_8bpc_ssse3: 23.8
    inv_txfm_add_4x4_flipadst_dct_1_8bpc_c: 484.6
    inv_txfm_add_4x4_flipadst_dct_1_8bpc_ssse3: 51.1
    inv_txfm_add_4x4_flipadst_flipadst_0_8bpc_c: 503.1
    inv_txfm_add_4x4_flipadst_flipadst_0_8bpc_ssse3: 23.9
    inv_txfm_add_4x4_flipadst_flipadst_1_8bpc_c: 463.0
    inv_txfm_add_4x4_flipadst_flipadst_1_8bpc_ssse3: 54.0
    inv_txfm_add_4x4_flipadst_identity_0_8bpc_c: 719.9
    inv_txfm_add_4x4_flipadst_identity_0_8bpc_ssse3: 43.0
    inv_txfm_add_4x4_flipadst_identity_1_8bpc_c: 456.8
    inv_txfm_add_4x4_flipadst_identity_1_8bpc_ssse3: 44.1
    inv_txfm_add_4x4_identity_adst_0_8bpc_c: 422.8
    inv_txfm_add_4x4_identity_adst_0_8bpc_ssse3: 42.4
    inv_txfm_add_4x4_identity_adst_1_8bpc_c: 417.1
    inv_txfm_add_4x4_identity_adst_1_8bpc_ssse3: 42.3
    inv_txfm_add_4x4_identity_dct_0_8bpc_c: 435.4
    inv_txfm_add_4x4_identity_dct_0_8bpc_ssse3: 25.7
    inv_txfm_add_4x4_identity_dct_1_8bpc_c: 434.1
    inv_txfm_add_4x4_identity_dct_1_8bpc_ssse3: 25.3
    inv_txfm_add_4x4_identity_flipadst_0_8bpc_c: 528.1
    inv_txfm_add_4x4_identity_flipadst_0_8bpc_ssse3: 40.9
    inv_txfm_add_4x4_identity_flipadst_1_8bpc_c: 720.0
    inv_txfm_add_4x4_identity_flipadst_1_8bpc_ssse3: 41.8
    inv_txfm_add_4x4_identity_identity_0_8bpc_c: 383.2
    inv_txfm_add_4x4_identity_identity_0_8bpc_ssse3: 28.3
    inv_txfm_add_4x4_identity_identity_1_8bpc_c: 378.9
    inv_txfm_add_4x4_identity_identity_1_8bpc_ssse3: 28.2
    inv_txfm_add_4x4_wht_wht_0_8bpc_c: 271.5
    inv_txfm_add_4x4_wht_wht_0_8bpc_ssse3: 34.0
    inv_txfm_add_4x4_wht_wht_1_8bpc_c: 266.0
    inv_txfm_add_4x4_wht_wht_1_8bpc_ssse3: 33.9
    87a377e9
itx_ssse3.asm 13.1 KB