1. 25 Jan, 2019 2 commits
  2. 24 Jan, 2019 12 commits
    • Martin Storsjö's avatar
      arm: mc: Implement 8tap and bilin functions · 191f79d5
      Martin Storsjö authored
      Relative speedups measured with checkasm:
                                       Cortex A7     A8     A9    A53   Snapdragon 835
      mc_8tap_regular_w2_0_8bpc_neon:       9.63   4.05   3.82   5.41   5.68
      mc_8tap_regular_w2_h_8bpc_neon:       3.30   5.44   3.38   3.88   5.12
      mc_8tap_regular_w2_hv_8bpc_neon:      3.86   6.21   4.39   5.18   6.10
      mc_8tap_regular_w2_v_8bpc_neon:       4.69   5.43   3.56   7.27   4.86
      mc_8tap_regular_w4_0_8bpc_neon:       9.13   4.05   5.24   5.37   6.60
      mc_8tap_regular_w4_h_8bpc_neon:       4.38   7.11   4.61   6.59   7.15
      mc_8tap_regular_w4_hv_8bpc_neon:      5.11   9.77   7.37   9.21  10.29
      mc_8tap_regular_w4_v_8bpc_neon:       6.24   7.88   4.96  11.16   7.89
      mc_8tap_regular_w8_0_8bpc_neon:       9.12   4.20   5.59   5.59   9.25
      mc_8tap_regular_w8_h_8bpc_neon:       5.91   8.42   4.84   8.46   7.08
      mc_8tap_regular_w8_hv_8bpc_neon:      5.46   8.35   6.52   7.19   8.33
      mc_8tap_regular_w8_v_8bpc_neon:       7.53   8.96   6.28  16.08  10.66
      mc_8tap_regular_w16_0_8bpc_neon:      9.77   5.46   4.06   7.02   7.38
      mc_8tap_regular_w16_h_8bpc_neon:      6.33   8.87   5.03  10.30   4.29
      mc_8tap_regular_w16_hv_8bpc_neon:     5.00   7.84   6.15   6.83   7.44
      mc_8tap_regular_w16_v_8bpc_neon:      7.74   8.81   6.23  19.24  11.16
      mc_8tap_regular_w32_0_8bpc_neon:      6.11   4.63   2.44   5.92   4.70
      mc_8tap_regular_w32_h_8bpc_neon:      6.60   9.02   5.20  11.08   3.50
      mc_8tap_regular_w32_hv_8bpc_neon:     4.85   7.64   6.09   6.68   6.92
      mc_8tap_regular_w32_v_8bpc_neon:      7.61   8.36   6.13  19.94  11.17
      mc_8tap_regular_w64_0_8bpc_neon:      4.61   3.81   1.60   3.50   2.73
      mc_8tap_regular_w64_h_8bpc_neon:      6.72   9.07   5.21  11.41   3.10
      mc_8tap_regular_w64_hv_8bpc_neon:     4.67   7.43   5.92   6.43   6.59
      mc_8tap_regular_w64_v_8bpc_neon:      7.64   8.28   6.07  20.48  11.41
      mc_8tap_regular_w128_0_8bpc_neon:     2.41   3.13   1.11   2.31   1.73
      mc_8tap_regular_w128_h_8bpc_neon:     6.68   9.03   5.09  11.41   2.90
      mc_8tap_regular_w128_hv_8bpc_neon:    4.50   7.39   5.70   6.26   6.47
      mc_8tap_regular_w128_v_8bpc_neon:     7.21   8.23   5.88  19.82  11.42
      mc_bilinear_w2_0_8bpc_neon:           9.23   4.03   3.74   5.33   6.49
      mc_bilinear_w2_h_8bpc_neon:           2.07   3.52   2.71   2.35   3.40
      mc_bilinear_w2_hv_8bpc_neon:          2.60   5.24   2.73   2.74   3.89
      mc_bilinear_w2_v_8bpc_neon:           2.57   4.39   3.14   3.04   4.05
      mc_bilinear_w4_0_8bpc_neon:           8.74   4.03   5.38   5.28   6.53
      mc_bilinear_w4_h_8bpc_neon:           3.41   6.22   4.28   3.86   7.56
      mc_bilinear_w4_hv_8bpc_neon:          4.38   7.45   4.61   5.26   7.95
      mc_bilinear_w4_v_8bpc_neon:           3.65   6.57   4.51   4.45   7.62
      mc_bilinear_w8_0_8bpc_neon:           8.74   4.50   5.71   5.46   9.39
      mc_bilinear_w8_h_8bpc_neon:           6.14  10.71   6.78   6.88  14.10
      mc_bilinear_w8_hv_8bpc_neon:          7.11  12.80   8.24  11.08   7.83
      mc_bilinear_w8_v_8bpc_neon:           7.24  11.69   7.57   8.04  15.46
      mc_bilinear_w16_0_8bpc_neon:         10.01   5.47   4.07   6.97   7.64
      mc_bilinear_w16_h_8bpc_neon:          8.36  17.00   8.34  11.61   7.64
      mc_bilinear_w16_hv_8bpc_neon:         7.67  13.54   8.53  13.32   8.05
      mc_bilinear_w16_v_8bpc_neon:         10.19  22.56  10.52  15.39  10.62
      mc_bilinear_w32_0_8bpc_neon:          6.22   4.73   2.43   5.89   4.90
      mc_bilinear_w32_h_8bpc_neon:          9.47  18.96   9.34  13.10   7.24
      mc_bilinear_w32_hv_8bpc_neon:         7.95  13.15   9.49  13.78   8.71
      mc_bilinear_w32_v_8bpc_neon:         11.10  23.53  11.34  16.74   8.78
      mc_bilinear_w64_0_8bpc_neon:          4.58   3.82   1.59   3.46   2.71
      mc_bilinear_w64_h_8bpc_neon:         10.07  19.77   9.60  13.99   6.88
      mc_bilinear_w64_hv_8bpc_neon:         8.08  12.95   9.39  13.84   8.90
      mc_bilinear_w64_v_8bpc_neon:         11.49  23.85  11.12  17.13   7.90
      mc_bilinear_w128_0_8bpc_neon:         2.37   3.24   1.15   2.28   1.73
      mc_bilinear_w128_h_8bpc_neon:         9.94  18.84   8.66  13.91   6.74
      mc_bilinear_w128_hv_8bpc_neon:        7.26  12.82   8.97  12.43   8.88
      mc_bilinear_w128_v_8bpc_neon:         9.89  23.88   8.93  14.73   7.33
      mct_8tap_regular_w4_0_8bpc_neon:      2.82   4.46   2.72   3.50   5.41
      mct_8tap_regular_w4_h_8bpc_neon:      4.16   6.88   4.64   6.51   6.60
      mct_8tap_regular_w4_hv_8bpc_neon:     5.22   9.87   7.81   9.39  10.11
      mct_8tap_regular_w4_v_8bpc_neon:      5.81   7.72   4.80  10.16   6.85
      mct_8tap_regular_w8_0_8bpc_neon:      4.48   6.30   3.01   5.82   5.04
      mct_8tap_regular_w8_h_8bpc_neon:      5.59   8.04   4.18   8.68   8.30
      mct_8tap_regular_w8_hv_8bpc_neon:     5.34   8.32   6.42   7.04   7.99
      mct_8tap_regular_w8_v_8bpc_neon:      7.32   8.71   5.75  17.07   9.73
      mct_8tap_regular_w16_0_8bpc_neon:     5.05   9.60   3.64  10.06   4.29
      mct_8tap_regular_w16_h_8bpc_neon:     5.53   8.20   4.54   9.98   7.33
      mct_8tap_regular_w16_hv_8bpc_neon:    4.90   7.87   6.07   6.67   7.03
      mct_8tap_regular_w16_v_8bpc_neon:     7.39   8.55   5.72  19.64   9.98
      mct_8tap_regular_w32_0_8bpc_neon:     5.28   8.16   4.07  11.03   2.38
      mct_8tap_regular_w32_h_8bpc_neon:     5.97   8.31   4.67  10.63   6.72
      mct_8tap_regular_w32_hv_8bpc_neon:    4.73   7.65   5.98   6.51   6.31
      mct_8tap_regular_w32_v_8bpc_neon:     7.33   8.18   5.72  20.50  10.03
      mct_8tap_regular_w64_0_8bpc_neon:     5.11   9.19   4.01  10.61   1.92
      mct_8tap_regular_w64_h_8bpc_neon:     6.05   8.33   4.53  10.84   6.38
      mct_8tap_regular_w64_hv_8bpc_neon:    4.61   7.54   5.69   6.35   6.11
      mct_8tap_regular_w64_v_8bpc_neon:     7.27   8.06   5.39  20.41  10.15
      mct_8tap_regular_w128_0_8bpc_neon:    4.29   8.21   4.28   9.55   1.32
      mct_8tap_regular_w128_h_8bpc_neon:    6.01   8.26   4.43  10.78   6.20
      mct_8tap_regular_w128_hv_8bpc_neon:   4.49   7.49   5.46   6.11   5.96
      mct_8tap_regular_w128_v_8bpc_neon:    6.90   8.00   5.19  18.47  10.13
      mct_bilinear_w4_0_8bpc_neon:          2.70   4.53   2.67   3.32   5.11
      mct_bilinear_w4_h_8bpc_neon:          3.02   5.06   3.13   3.28   5.38
      mct_bilinear_w4_hv_8bpc_neon:         4.14   7.04   4.75   4.99   6.30
      mct_bilinear_w4_v_8bpc_neon:          3.17   5.30   3.66   3.87   5.01
      mct_bilinear_w8_0_8bpc_neon:          4.41   6.46   2.99   5.74   5.98
      mct_bilinear_w8_h_8bpc_neon:          5.36   8.27   3.62   6.39   9.06
      mct_bilinear_w8_hv_8bpc_neon:         6.65  11.82   6.79  11.47   7.07
      mct_bilinear_w8_v_8bpc_neon:          6.26   9.62   4.05   7.75  16.81
      mct_bilinear_w16_0_8bpc_neon:         4.86   9.85   3.61  10.03   4.19
      mct_bilinear_w16_h_8bpc_neon:         5.26  12.91   4.76   9.56   9.68
      mct_bilinear_w16_hv_8bpc_neon:        6.96  12.58   7.05  13.48   7.35
      mct_bilinear_w16_v_8bpc_neon:         6.46  17.94   5.72  13.70  19.20
      mct_bilinear_w32_0_8bpc_neon:         5.31   8.10   4.06  10.88   2.77
      mct_bilinear_w32_h_8bpc_neon:         6.91  14.28   5.33  11.24  10.33
      mct_bilinear_w32_hv_8bpc_neon:        7.13  12.21   7.57  13.91   7.19
      mct_bilinear_w32_v_8bpc_neon:         8.06  18.48   5.88  14.74  15.47
      mct_bilinear_w64_0_8bpc_neon:         5.08   7.29   3.83  10.44   1.71
      mct_bilinear_w64_h_8bpc_neon:         7.24  14.59   5.40  11.70  11.03
      mct_bilinear_w64_hv_8bpc_neon:        7.24  11.98   7.59  13.72   7.30
      mct_bilinear_w64_v_8bpc_neon:         8.20  18.24   5.69  14.57  15.04
      mct_bilinear_w128_0_8bpc_neon:        4.35   8.23   4.17   9.71   1.11
      mct_bilinear_w128_h_8bpc_neon:        7.02  13.80   5.63  11.11  11.26
      mct_bilinear_w128_hv_8bpc_neon:       6.31  11.89   6.75  12.12   7.24
      mct_bilinear_w128_v_8bpc_neon:        6.95  18.26   5.84  11.31  14.78
      191f79d5
    • Martin Storsjö's avatar
      588cbf94
    • Ronald S. Bultje's avatar
      9824c5d9
    • Martin Storsjö's avatar
      CI: Add CI jobs for armv7-w64-mingw32 and aarch64-w64-mingw32 · 9a550985
      Martin Storsjö authored
      Keep artifacts from the aarch64 build job. There's less point in
      keeping artifacts from the armv7 build job, as all modern arm based
      windows desktop setups are arm64 (even though they can run these armv7
      binaries as well).
      9a550985
    • Martin Storsjö's avatar
      arm64: mc: Optimize mc_8tap_regular_w4_hv_8bpc for A53 · e80955cc
      Martin Storsjö authored
      Before:                       Cortex A53   Snapdragon 835
      mc_8tap_regular_w4_hv_8bpc_neon:   543.6   359.1
      After:
      mc_8tap_regular_w4_hv_8bpc_neon:   466.7   355.5
      
      The same kind of change doesn't seem to give any benefits on the 8
      pixel wide hv filtering though, potentially related to the fact that
      it uses not only smull/smlal but also smull2/smlal2.
      e80955cc
    • Martin Storsjö's avatar
      arm64: mc: Simplify the 8tap_2w_hv code slightly · 72af9329
      Martin Storsjö authored
      Before:                       Cortex A53   Snapdragon 835
      mc_8tap_regular_w2_hv_8bpc_neon:   415.0   286.9
      After:
      mc_8tap_regular_w2_hv_8bpc_neon:   399.1   269.9
      72af9329
    • Martin Storsjö's avatar
      arm64: mc: Optimize the mul_mla_8_* macros for Cortex A53 · fc5a3728
      Martin Storsjö authored
      Before:                      Cortex A53   Snapdragon 835
      mc_8tap_regular_w2_v_8bpc_neon:   155.1   131.8
      mc_8tap_regular_w4_v_8bpc_neon:   199.6   148.1
      mc_8tap_regular_w8_v_8bpc_neon:   286.2   225.5
      After:
      mc_8tap_regular_w2_v_8bpc_neon:   134.1   129.5
      mc_8tap_regular_w4_v_8bpc_neon:   157.6   146.5
      mc_8tap_regular_w8_v_8bpc_neon:   208.0   225.0
      fc5a3728
    • Martin Storsjö's avatar
      arm64: mc: Improve a comment · 1407506a
      Martin Storsjö authored
      1407506a
    • Martin Storsjö's avatar
      b7a5d2ff
    • Martin Storsjö's avatar
      arm64: mc: Use ubfx instead of ubfm, for consistency with arm · 2c1eba5e
      Martin Storsjö authored
      On arm, there's no ubfm instruction, only ubfx.
      2c1eba5e
    • Martin Storsjö's avatar
      arm: Create proper .rdata sections for COFF · 1ef86e1b
      Martin Storsjö authored
      On COFF, the default read only data section is `.rdata`, not `.rodata`.
      1ef86e1b
    • James Almer's avatar
      337d8f94
  3. 21 Jan, 2019 1 commit
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 4x16 and 16x4 blocks in itx · bf659082
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_4x16_adst_adst_0_8bpc_c: 2203.6
      inv_txfm_add_4x16_adst_adst_0_8bpc_ssse3: 198.7
      inv_txfm_add_4x16_adst_adst_1_8bpc_c: 2235.1
      inv_txfm_add_4x16_adst_adst_1_8bpc_ssse3: 199.7
      inv_txfm_add_4x16_adst_adst_2_8bpc_c: 2199.1
      inv_txfm_add_4x16_adst_adst_2_8bpc_ssse3: 199.9
      inv_txfm_add_4x16_adst_dct_0_8bpc_c: 2272.4
      inv_txfm_add_4x16_adst_dct_0_8bpc_ssse3: 50.0
      inv_txfm_add_4x16_adst_dct_1_8bpc_c: 2281.6
      inv_txfm_add_4x16_adst_dct_1_8bpc_ssse3: 163.7
      inv_txfm_add_4x16_adst_dct_2_8bpc_c: 2262.5
      inv_txfm_add_4x16_adst_dct_2_8bpc_ssse3: 164.7
      inv_txfm_add_4x16_adst_flipadst_0_8bpc_c: 2456.5
      inv_txfm_add_4x16_adst_flipadst_0_8bpc_ssse3: 204.3
      inv_txfm_add_4x16_adst_flipadst_1_8bpc_c: 2349.1
      inv_txfm_add_4x16_adst_flipadst_1_8bpc_ssse3: 198.5
      inv_txfm_add_4x16_adst_flipadst_2_8bpc_c: 2241.5
      inv_txfm_add_4x16_adst_flipadst_2_8bpc_ssse3: 198.7
      inv_txfm_add_4x16_adst_identity_0_8bpc_c: 1574.7
      inv_txfm_add_4x16_adst_identity_0_8bpc_ssse3: 117.0
      inv_txfm_add_4x16_adst_identity_1_8bpc_c: 1576.3
      inv_txfm_add_4x16_adst_identity_1_8bpc_ssse3: 116.6
      inv_txfm_add_4x16_adst_identity_2_8bpc_c: 1572.9
      inv_txfm_add_4x16_adst_identity_2_8bpc_ssse3: 116.7
      inv_txfm_add_4x16_dct_adst_0_8bpc_c: 2162.8
      inv_txfm_add_4x16_dct_adst_0_8bpc_ssse3: 187.6
      inv_txfm_add_4x16_dct_adst_1_8bpc_c: 2180.4
      inv_txfm_add_4x16_dct_adst_1_8bpc_ssse3: 185.6
      inv_txfm_add_4x16_dct_adst_2_8bpc_c: 2165.1
      inv_txfm_add_4x16_dct_adst_2_8bpc_ssse3: 184.9
      inv_txfm_add_4x16_dct_dct_0_8bpc_c: 2233.7
      inv_txfm_add_4x16_dct_dct_0_8bpc_ssse3: 49.5
      inv_txfm_add_4x16_dct_dct_1_8bpc_c: 2770.4
      inv_txfm_add_4x16_dct_dct_1_8bpc_ssse3: 148.4
      inv_txfm_add_4x16_dct_dct_2_8bpc_c: 2288.7
      inv_txfm_add_4x16_dct_dct_2_8bpc_ssse3: 149.0
      inv_txfm_add_4x16_dct_flipadst_0_8bpc_c: 2242.0
      inv_txfm_add_4x16_dct_flipadst_0_8bpc_ssse3: 185.8
      inv_txfm_add_4x16_dct_flipadst_1_8bpc_c: 2249.6
      inv_txfm_add_4x16_dct_flipadst_1_8bpc_ssse3: 188.4
      inv_txfm_add_4x16_dct_flipadst_2_8bpc_c: 2237.3
      inv_txfm_add_4x16_dct_flipadst_2_8bpc_ssse3: 185.1
      inv_txfm_add_4x16_dct_identity_0_8bpc_c: 1532.3
      inv_txfm_add_4x16_dct_identity_0_8bpc_ssse3: 63.7
      inv_txfm_add_4x16_dct_identity_1_8bpc_c: 1534.5
      inv_txfm_add_4x16_dct_identity_1_8bpc_ssse3: 63.6
      inv_txfm_add_4x16_dct_identity_2_8bpc_c: 1548.1
      inv_txfm_add_4x16_dct_identity_2_8bpc_ssse3: 101.6
      inv_txfm_add_4x16_flipadst_adst_0_8bpc_c: 2205.2
      inv_txfm_add_4x16_flipadst_adst_0_8bpc_ssse3: 201.6
      inv_txfm_add_4x16_flipadst_adst_1_8bpc_c: 2222.0
      inv_txfm_add_4x16_flipadst_adst_1_8bpc_ssse3: 202.6
      inv_txfm_add_4x16_flipadst_adst_2_8bpc_c: 2205.2
      inv_txfm_add_4x16_flipadst_adst_2_8bpc_ssse3: 205.7
      inv_txfm_add_4x16_flipadst_dct_0_8bpc_c: 2294.9
      inv_txfm_add_4x16_flipadst_dct_0_8bpc_ssse3: 50.0
      inv_txfm_add_4x16_flipadst_dct_1_8bpc_c: 2304.2
      inv_txfm_add_4x16_flipadst_dct_1_8bpc_ssse3: 164.5
      inv_txfm_add_4x16_flipadst_dct_2_8bpc_c: 2292.7
      inv_txfm_add_4x16_flipadst_dct_2_8bpc_ssse3: 164.5
      inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_c: 2281.3
      inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_ssse3: 202.9
      inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_c: 2258.7
      inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_ssse3: 202.4
      inv_txfm_add_4x16_flipadst_flipadst_2_8bpc_c: 2261.0
      inv_txfm_add_4x16_flipadst_flipadst_2_8bpc_ssse3: 201.3
      inv_txfm_add_4x16_flipadst_identity_0_8bpc_c: 1580.5
      inv_txfm_add_4x16_flipadst_identity_0_8bpc_ssse3: 116.1
      inv_txfm_add_4x16_flipadst_identity_1_8bpc_c: 1578.7
      inv_txfm_add_4x16_flipadst_identity_1_8bpc_ssse3: 116.7
      inv_txfm_add_4x16_flipadst_identity_2_8bpc_c: 1590.8
      inv_txfm_add_4x16_flipadst_identity_2_8bpc_ssse3: 117.4
      inv_txfm_add_4x16_identity_adst_0_8bpc_c: 1949.0
      inv_txfm_add_4x16_identity_adst_0_8bpc_ssse3: 170.9
      inv_txfm_add_4x16_identity_adst_1_8bpc_c: 1947.4
      inv_txfm_add_4x16_identity_adst_1_8bpc_ssse3: 171.0
      inv_txfm_add_4x16_identity_adst_2_8bpc_c: 1948.7
      inv_txfm_add_4x16_identity_adst_2_8bpc_ssse3: 170.3
      inv_txfm_add_4x16_identity_dct_0_8bpc_c: 2022.3
      inv_txfm_add_4x16_identity_dct_0_8bpc_ssse3: 59.2
      inv_txfm_add_4x16_identity_dct_1_8bpc_c: 2020.8
      inv_txfm_add_4x16_identity_dct_1_8bpc_ssse3: 133.7
      inv_txfm_add_4x16_identity_dct_2_8bpc_c: 2020.2
      inv_txfm_add_4x16_identity_dct_2_8bpc_ssse3: 133.2
      inv_txfm_add_4x16_identity_flipadst_0_8bpc_c: 2024.7
      inv_txfm_add_4x16_identity_flipadst_0_8bpc_ssse3: 170.3
      inv_txfm_add_4x16_identity_flipadst_1_8bpc_c: 2021.8
      inv_txfm_add_4x16_identity_flipadst_1_8bpc_ssse3: 170.0
      inv_txfm_add_4x16_identity_flipadst_2_8bpc_c: 2022.5
      inv_txfm_add_4x16_identity_flipadst_2_8bpc_ssse3: 169.9
      inv_txfm_add_4x16_identity_identity_0_8bpc_c: 1328.4
      inv_txfm_add_4x16_identity_identity_0_8bpc_ssse3: 87.7
      inv_txfm_add_4x16_identity_identity_1_8bpc_c: 1330.9
      inv_txfm_add_4x16_identity_identity_1_8bpc_ssse3: 87.7
      inv_txfm_add_4x16_identity_identity_2_8bpc_c: 1327.3
      inv_txfm_add_4x16_identity_identity_2_8bpc_ssse3: 87.6
      inv_txfm_add_16x4_adst_adst_0_8bpc_c: 2166.3
      inv_txfm_add_16x4_adst_adst_0_8bpc_ssse3: 186.3
      inv_txfm_add_16x4_adst_adst_1_8bpc_c: 2166.9
      inv_txfm_add_16x4_adst_adst_1_8bpc_ssse3: 184.9
      inv_txfm_add_16x4_adst_adst_2_8bpc_c: 2167.2
      inv_txfm_add_16x4_adst_adst_2_8bpc_ssse3: 185.2
      inv_txfm_add_16x4_adst_dct_0_8bpc_c: 2123.2
      inv_txfm_add_16x4_adst_dct_0_8bpc_ssse3: 172.1
      inv_txfm_add_16x4_adst_dct_1_8bpc_c: 2124.2
      inv_txfm_add_16x4_adst_dct_1_8bpc_ssse3: 171.2
      inv_txfm_add_16x4_adst_dct_2_8bpc_c: 2122.8
      inv_txfm_add_16x4_adst_dct_2_8bpc_ssse3: 171.8
      inv_txfm_add_16x4_adst_flipadst_0_8bpc_c: 2213.3
      inv_txfm_add_16x4_adst_flipadst_0_8bpc_ssse3: 189.6
      inv_txfm_add_16x4_adst_flipadst_1_8bpc_c: 2227.7
      inv_txfm_add_16x4_adst_flipadst_1_8bpc_ssse3: 188.4
      inv_txfm_add_16x4_adst_flipadst_2_8bpc_c: 2228.5
      inv_txfm_add_16x4_adst_flipadst_2_8bpc_ssse3: 188.4
      inv_txfm_add_16x4_adst_identity_0_8bpc_c: 1906.7
      inv_txfm_add_16x4_adst_identity_0_8bpc_ssse3: 154.3
      inv_txfm_add_16x4_adst_identity_1_8bpc_c: 1905.2
      inv_txfm_add_16x4_adst_identity_1_8bpc_ssse3: 155.6
      inv_txfm_add_16x4_adst_identity_2_8bpc_c: 1905.6
      inv_txfm_add_16x4_adst_identity_2_8bpc_ssse3: 156.3
      inv_txfm_add_16x4_dct_adst_0_8bpc_c: 2209.8
      inv_txfm_add_16x4_dct_adst_0_8bpc_ssse3: 37.4
      inv_txfm_add_16x4_dct_adst_1_8bpc_c: 2209.8
      inv_txfm_add_16x4_dct_adst_1_8bpc_ssse3: 157.9
      inv_txfm_add_16x4_dct_adst_2_8bpc_c: 2221.1
      inv_txfm_add_16x4_dct_adst_2_8bpc_ssse3: 158.5
      inv_txfm_add_16x4_dct_dct_0_8bpc_c: 2177.5
      inv_txfm_add_16x4_dct_dct_0_8bpc_ssse3: 29.6
      inv_txfm_add_16x4_dct_dct_1_8bpc_c: 2179.3
      inv_txfm_add_16x4_dct_dct_1_8bpc_ssse3: 144.9
      inv_txfm_add_16x4_dct_dct_2_8bpc_c: 2177.8
      inv_txfm_add_16x4_dct_dct_2_8bpc_ssse3: 143.7
      inv_txfm_add_16x4_dct_flipadst_0_8bpc_c: 2293.6
      inv_txfm_add_16x4_dct_flipadst_0_8bpc_ssse3: 38.3
      inv_txfm_add_16x4_dct_flipadst_1_8bpc_c: 2293.2
      inv_txfm_add_16x4_dct_flipadst_1_8bpc_ssse3: 163.9
      inv_txfm_add_16x4_dct_flipadst_2_8bpc_c: 2301.3
      inv_txfm_add_16x4_dct_flipadst_2_8bpc_ssse3: 163.7
      inv_txfm_add_16x4_dct_identity_0_8bpc_c: 1977.7
      inv_txfm_add_16x4_dct_identity_0_8bpc_ssse3: 39.9
      inv_txfm_add_16x4_dct_identity_1_8bpc_c: 1978.7
      inv_txfm_add_16x4_dct_identity_1_8bpc_ssse3: 126.8
      inv_txfm_add_16x4_dct_identity_2_8bpc_c: 1979.5
      inv_txfm_add_16x4_dct_identity_2_8bpc_ssse3: 128.1
      inv_txfm_add_16x4_flipadst_adst_0_8bpc_c: 2175.6
      inv_txfm_add_16x4_flipadst_adst_0_8bpc_ssse3: 185.1
      inv_txfm_add_16x4_flipadst_adst_1_8bpc_c: 2175.7
      inv_txfm_add_16x4_flipadst_adst_1_8bpc_ssse3: 185.7
      inv_txfm_add_16x4_flipadst_adst_2_8bpc_c: 2173.1
      inv_txfm_add_16x4_flipadst_adst_2_8bpc_ssse3: 185.0
      inv_txfm_add_16x4_flipadst_dct_0_8bpc_c: 2140.5
      inv_txfm_add_16x4_flipadst_dct_0_8bpc_ssse3: 172.0
      inv_txfm_add_16x4_flipadst_dct_1_8bpc_c: 2147.5
      inv_txfm_add_16x4_flipadst_dct_1_8bpc_ssse3: 171.9
      inv_txfm_add_16x4_flipadst_dct_2_8bpc_c: 2148.5
      inv_txfm_add_16x4_flipadst_dct_2_8bpc_ssse3: 172.0
      inv_txfm_add_16x4_flipadst_flipadst_0_8bpc_c: 2240.6
      inv_txfm_add_16x4_flipadst_flipadst_0_8bpc_ssse3: 191.3
      inv_txfm_add_16x4_flipadst_flipadst_1_8bpc_c: 2243.5
      inv_txfm_add_16x4_flipadst_flipadst_1_8bpc_ssse3: 193.2
      inv_txfm_add_16x4_flipadst_flipadst_2_8bpc_c: 2242.9
      inv_txfm_add_16x4_flipadst_flipadst_2_8bpc_ssse3: 192.0
      inv_txfm_add_16x4_flipadst_identity_0_8bpc_c: 1919.2
      inv_txfm_add_16x4_flipadst_identity_0_8bpc_ssse3: 155.1
      inv_txfm_add_16x4_flipadst_identity_1_8bpc_c: 1925.2
      inv_txfm_add_16x4_flipadst_identity_1_8bpc_ssse3: 155.2
      inv_txfm_add_16x4_flipadst_identity_2_8bpc_c: 2084.8
      inv_txfm_add_16x4_flipadst_identity_2_8bpc_ssse3: 155.0
      inv_txfm_add_16x4_identity_adst_0_8bpc_c: 1498.5
      inv_txfm_add_16x4_identity_adst_0_8bpc_ssse3: 107.6
      inv_txfm_add_16x4_identity_adst_1_8bpc_c: 1499.5
      inv_txfm_add_16x4_identity_adst_1_8bpc_ssse3: 107.0
      inv_txfm_add_16x4_identity_adst_2_8bpc_c: 1498.9
      inv_txfm_add_16x4_identity_adst_2_8bpc_ssse3: 107.9
      inv_txfm_add_16x4_identity_dct_0_8bpc_c: 1471.9
      inv_txfm_add_16x4_identity_dct_0_8bpc_ssse3: 45.4
      inv_txfm_add_16x4_identity_dct_1_8bpc_c: 1476.4
      inv_txfm_add_16x4_identity_dct_1_8bpc_ssse3: 45.5
      inv_txfm_add_16x4_identity_dct_2_8bpc_c: 1459.8
      inv_txfm_add_16x4_identity_dct_2_8bpc_ssse3: 92.3
      inv_txfm_add_16x4_identity_flipadst_0_8bpc_c: 1548.7
      inv_txfm_add_16x4_identity_flipadst_0_8bpc_ssse3: 112.1
      inv_txfm_add_16x4_identity_flipadst_1_8bpc_c: 1548.2
      inv_txfm_add_16x4_identity_flipadst_1_8bpc_ssse3: 111.7
      inv_txfm_add_16x4_identity_flipadst_2_8bpc_c: 1547.2
      inv_txfm_add_16x4_identity_flipadst_2_8bpc_ssse3: 114.1
      inv_txfm_add_16x4_identity_identity_0_8bpc_c: 1271.5
      inv_txfm_add_16x4_identity_identity_0_8bpc_ssse3: 74.5
      inv_txfm_add_16x4_identity_identity_1_8bpc_c: 1266.8
      inv_txfm_add_16x4_identity_identity_1_8bpc_ssse3: 74.5
      inv_txfm_add_16x4_identity_identity_2_8bpc_c: 1268.0
      inv_txfm_add_16x4_identity_identity_2_8bpc_ssse3: 74.6
      bf659082
  4. 19 Jan, 2019 1 commit
  5. 15 Jan, 2019 2 commits
  6. 14 Jan, 2019 1 commit
  7. 13 Jan, 2019 3 commits
  8. 12 Jan, 2019 2 commits
  9. 11 Jan, 2019 2 commits
  10. 10 Jan, 2019 1 commit
  11. 07 Jan, 2019 3 commits
  12. 05 Jan, 2019 1 commit
  13. 28 Dec, 2018 1 commit
    • Xuefeng Jiang's avatar
      Add SSSE3 implementations for dav1d_ipred_top, dav1d_ipred_left and dav1d_ipred_128 · 9ea56386
      Xuefeng Jiang authored
      Cycle times:
      intra_pred_dc_128_w4_8bpc_c: 905.2
      intra_pred_dc_128_w4_8bpc_ssse3: 61.6
      intra_pred_dc_128_w8_8bpc_c: 1393.1
      intra_pred_dc_128_w8_8bpc_ssse3: 82.3
      intra_pred_dc_128_w16_8bpc_c: 2227.4
      intra_pred_dc_128_w16_8bpc_ssse3: 119.6
      intra_pred_dc_128_w32_8bpc_c: 2696.0
      intra_pred_dc_128_w32_8bpc_ssse3: 195.5
      intra_pred_dc_128_w64_8bpc_c: 4298.6
      intra_pred_dc_128_w64_8bpc_ssse3: 465.1
      intra_pred_dc_left_w4_8bpc_c: 974.2
      intra_pred_dc_left_w4_8bpc_ssse3: 80.2
      intra_pred_dc_left_w8_8bpc_c: 1478.4
      intra_pred_dc_left_w8_8bpc_ssse3: 103.7
      intra_pred_dc_left_w16_8bpc_c: 2313.0
      intra_pred_dc_left_w16_8bpc_ssse3: 159.1
      intra_pred_dc_left_w32_8bpc_c: 2835.1
      intra_pred_dc_left_w32_8bpc_ssse3: 305.3
      intra_pred_dc_left_w64_8bpc_c: 4462.2
      intra_pred_dc_left_w64_8bpc_ssse3: 525.5
      intra_pred_dc_top_w4_8bpc_c: 949.5
      intra_pred_dc_top_w4_8bpc_ssse3: 95.5
      intra_pred_dc_top_w8_8bpc_c: 1462.2
      intra_pred_dc_top_w8_8bpc_ssse3: 103.1
      intra_pred_dc_top_w16_8bpc_c: 2312.5
      intra_pred_dc_top_w16_8bpc_ssse3: 146.4
      intra_pred_dc_top_w32_8bpc_c: 2895.9
      intra_pred_dc_top_w32_8bpc_ssse3: 250.4
      intra_pred_dc_top_w64_8bpc_c: 4617.9
      intra_pred_dc_top_w64_8bpc_ssse3: 493.3
      9ea56386
  14. 27 Dec, 2018 1 commit
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 8x8 blocks in itx · 5fa6c44a
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_8x8_adst_adst_0_8bpc_c: 2165.6
      inv_txfm_add_8x8_adst_adst_0_8bpc_ssse3: 194.5
      inv_txfm_add_8x8_adst_adst_1_8bpc_c: 2158.3
      inv_txfm_add_8x8_adst_adst_1_8bpc_ssse3: 194.7
      inv_txfm_add_8x8_adst_dct_0_8bpc_c: 2241.0
      inv_txfm_add_8x8_adst_dct_0_8bpc_ssse3: 165.1
      inv_txfm_add_8x8_adst_dct_1_8bpc_c: 2242.6
      inv_txfm_add_8x8_adst_dct_1_8bpc_ssse3: 164.2
      inv_txfm_add_8x8_adst_flipadst_0_8bpc_c: 2178.2
      inv_txfm_add_8x8_adst_flipadst_0_8bpc_ssse3: 194.4
      inv_txfm_add_8x8_adst_flipadst_1_8bpc_c: 2183.0
      inv_txfm_add_8x8_adst_flipadst_1_8bpc_ssse3: 194.2
      inv_txfm_add_8x8_adst_identity_0_8bpc_c: 1592.1
      inv_txfm_add_8x8_adst_identity_0_8bpc_ssse3: 125.2
      inv_txfm_add_8x8_adst_identity_1_8bpc_c: 1597.7
      inv_txfm_add_8x8_adst_identity_1_8bpc_ssse3: 126.3
      inv_txfm_add_8x8_dct_adst_0_8bpc_c: 2214.1
      inv_txfm_add_8x8_dct_adst_0_8bpc_ssse3: 162.0
      inv_txfm_add_8x8_dct_adst_1_8bpc_c: 2221.5
      inv_txfm_add_8x8_dct_adst_1_8bpc_ssse3: 161.9
      inv_txfm_add_8x8_dct_dct_0_8bpc_c: 2247.8
      inv_txfm_add_8x8_dct_dct_0_8bpc_ssse3: 34.0
      inv_txfm_add_8x8_dct_dct_1_8bpc_c: 2243.1
      inv_txfm_add_8x8_dct_dct_1_8bpc_ssse3: 133.7
      inv_txfm_add_8x8_dct_flipadst_0_8bpc_c: 2255.1
      inv_txfm_add_8x8_dct_flipadst_0_8bpc_ssse3: 161.2
      inv_txfm_add_8x8_dct_flipadst_1_8bpc_c: 2244.6
      inv_txfm_add_8x8_dct_flipadst_1_8bpc_ssse3: 161.8
      inv_txfm_add_8x8_dct_identity_0_8bpc_c: 1632.3
      inv_txfm_add_8x8_dct_identity_0_8bpc_ssse3: 41.3
      inv_txfm_add_8x8_dct_identity_1_8bpc_c: 1629.6
      inv_txfm_add_8x8_dct_identity_1_8bpc_ssse3: 97.7
      inv_txfm_add_8x8_flipadst_adst_0_8bpc_c: 2185.6
      inv_txfm_add_8x8_flipadst_adst_0_8bpc_ssse3: 191.0
      inv_txfm_add_8x8_flipadst_adst_1_8bpc_c: 2165.7
      inv_txfm_add_8x8_flipadst_adst_1_8bpc_ssse3: 191.6
      inv_txfm_add_8x8_flipadst_dct_0_8bpc_c: 2246.4
      inv_txfm_add_8x8_flipadst_dct_0_8bpc_ssse3: 162.8
      inv_txfm_add_8x8_flipadst_dct_1_8bpc_c: 2252.1
      inv_txfm_add_8x8_flipadst_dct_1_8bpc_ssse3: 163.9
      inv_txfm_add_8x8_flipadst_flipadst_0_8bpc_c: 2180.9
      inv_txfm_add_8x8_flipadst_flipadst_0_8bpc_ssse3: 196.3
      inv_txfm_add_8x8_flipadst_flipadst_1_8bpc_c: 2192.2
      inv_txfm_add_8x8_flipadst_flipadst_1_8bpc_ssse3: 194.5
      inv_txfm_add_8x8_flipadst_identity_0_8bpc_c: 1600.9
      inv_txfm_add_8x8_flipadst_identity_0_8bpc_ssse3: 126.6
      inv_txfm_add_8x8_flipadst_identity_1_8bpc_c: 1600.5
      inv_txfm_add_8x8_flipadst_identity_1_8bpc_ssse3: 126.4
      inv_txfm_add_8x8_identity_adst_0_8bpc_c: 1558.0
      inv_txfm_add_8x8_identity_adst_0_8bpc_ssse3: 120.7
      inv_txfm_add_8x8_identity_adst_1_8bpc_c: 1556.7
      inv_txfm_add_8x8_identity_adst_1_8bpc_ssse3: 121.0
      inv_txfm_add_8x8_identity_dct_0_8bpc_c: 1600.8
      inv_txfm_add_8x8_identity_dct_0_8bpc_ssse3: 37.9
      inv_txfm_add_8x8_identity_dct_1_8bpc_c: 1599.5
      inv_txfm_add_8x8_identity_dct_1_8bpc_ssse3: 90.3
      inv_txfm_add_8x8_identity_flipadst_0_8bpc_c: 1584.9
      inv_txfm_add_8x8_identity_flipadst_0_8bpc_ssse3: 120.2
      inv_txfm_add_8x8_identity_flipadst_1_8bpc_c: 1584.3
      inv_txfm_add_8x8_identity_flipadst_1_8bpc_ssse3: 120.5
      inv_txfm_add_8x8_identity_identity_0_8bpc_c: 975.9
      inv_txfm_add_8x8_identity_identity_0_8bpc_ssse3: 54.7
      inv_txfm_add_8x8_identity_identity_1_8bpc_c: 975.7
      inv_txfm_add_8x8_identity_identity_1_8bpc_ssse3: 54.7
      5fa6c44a
  15. 26 Dec, 2018 1 commit
    • Xuefeng Jiang's avatar
      Add SSSE3 implementation for dav1d_ipred_v and dav1d_ipred_dc · 71e13008
      Xuefeng Jiang authored
      Cycle times:
      intra_pred_dc_w4_8bpc_c: 1051.4
      intra_pred_dc_w4_8bpc_ssse3: 58.8
      intra_pred_dc_w8_8bpc_c: 1587.6
      intra_pred_dc_w8_8bpc_ssse3: 75.3
      intra_pred_dc_w16_8bpc_c: 2526.2
      intra_pred_dc_w16_8bpc_ssse3: 103.5
      intra_pred_dc_w32_8bpc_c: 2646.6
      intra_pred_dc_w32_8bpc_ssse3: 179.5
      intra_pred_dc_w64_8bpc_c: 4084.6
      intra_pred_dc_w64_8bpc_ssse3: 356.1
      intra_pred_v_w4_8bpc_c: 468.5
      intra_pred_v_w4_8bpc_ssse3: 46.8
      intra_pred_v_w8_8bpc_c: 839.1
      intra_pred_v_w8_8bpc_ssse3: 56.7
      intra_pred_v_w16_8bpc_c: 1750.5
      intra_pred_v_w16_8bpc_ssse3: 73.0
      intra_pred_v_w32_8bpc_c: 1552.5
      intra_pred_v_w32_8bpc_ssse3: 135.4
      intra_pred_v_w64_8bpc_c: 2463.6
      intra_pred_v_w64_8bpc_ssse3: 305.6
      71e13008
  16. 22 Dec, 2018 1 commit
  17. 21 Dec, 2018 2 commits
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 4x8 and 8x4 blocks in itx · 1703f21f
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_4x8_adst_adst_0_8bpc_c: 1167.6
      inv_txfm_add_4x8_adst_adst_0_8bpc_ssse3: 114.6
      inv_txfm_add_4x8_adst_adst_1_8bpc_c: 1167.2
      inv_txfm_add_4x8_adst_adst_1_8bpc_ssse3: 114.1
      inv_txfm_add_4x8_adst_dct_0_8bpc_c: 1174.7
      inv_txfm_add_4x8_adst_dct_0_8bpc_ssse3: 34.8
      inv_txfm_add_4x8_adst_dct_1_8bpc_c: 1158.0
      inv_txfm_add_4x8_adst_dct_1_8bpc_ssse3: 101.0
      inv_txfm_add_4x8_adst_flipadst_0_8bpc_c: 1150.9
      inv_txfm_add_4x8_adst_flipadst_0_8bpc_ssse3: 115.8
      inv_txfm_add_4x8_adst_flipadst_1_8bpc_c: 1157.6
      inv_txfm_add_4x8_adst_flipadst_1_8bpc_ssse3: 115.8
      inv_txfm_add_4x8_adst_identity_0_8bpc_c: 848.4
      inv_txfm_add_4x8_adst_identity_0_8bpc_ssse3: 59.1
      inv_txfm_add_4x8_adst_identity_1_8bpc_c: 850.1
      inv_txfm_add_4x8_adst_identity_1_8bpc_ssse3: 59.1
      inv_txfm_add_4x8_dct_adst_0_8bpc_c: 1205.6
      inv_txfm_add_4x8_dct_adst_0_8bpc_ssse3: 107.0
      inv_txfm_add_4x8_dct_adst_1_8bpc_c: 1183.7
      inv_txfm_add_4x8_dct_adst_1_8bpc_ssse3: 107.0
      inv_txfm_add_4x8_dct_dct_0_8bpc_c: 1227.0
      inv_txfm_add_4x8_dct_dct_0_8bpc_ssse3: 34.6
      inv_txfm_add_4x8_dct_dct_1_8bpc_c: 1229.7
      inv_txfm_add_4x8_dct_dct_1_8bpc_ssse3: 96.1
      inv_txfm_add_4x8_dct_flipadst_0_8bpc_c: 1188.2
      inv_txfm_add_4x8_dct_flipadst_0_8bpc_ssse3: 109.3
      inv_txfm_add_4x8_dct_flipadst_1_8bpc_c: 1192.7
      inv_txfm_add_4x8_dct_flipadst_1_8bpc_ssse3: 109.9
      inv_txfm_add_4x8_dct_identity_0_8bpc_c: 878.4
      inv_txfm_add_4x8_dct_identity_0_8bpc_ssse3: 31.9
      inv_txfm_add_4x8_dct_identity_1_8bpc_c: 879.0
      inv_txfm_add_4x8_dct_identity_1_8bpc_ssse3: 54.8
      inv_txfm_add_4x8_flipadst_adst_0_8bpc_c: 1181.8
      inv_txfm_add_4x8_flipadst_adst_0_8bpc_ssse3: 114.7
      inv_txfm_add_4x8_flipadst_adst_1_8bpc_c: 1203.0
      inv_txfm_add_4x8_flipadst_adst_1_8bpc_ssse3: 114.5
      inv_txfm_add_4x8_flipadst_dct_0_8bpc_c: 1203.6
      inv_txfm_add_4x8_flipadst_dct_0_8bpc_ssse3: 34.1
      inv_txfm_add_4x8_flipadst_dct_1_8bpc_c: 1204.4
      inv_txfm_add_4x8_flipadst_dct_1_8bpc_ssse3: 100.2
      inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_c: 1180.6
      inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_ssse3: 117.1
      inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_c: 1178.7
      inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_ssse3: 116.8
      inv_txfm_add_4x8_flipadst_identity_0_8bpc_c: 871.3
      inv_txfm_add_4x8_flipadst_identity_0_8bpc_ssse3: 69.0
      inv_txfm_add_4x8_flipadst_identity_1_8bpc_c: 872.3
      inv_txfm_add_4x8_flipadst_identity_1_8bpc_ssse3: 70.0
      inv_txfm_add_4x8_identity_adst_0_8bpc_c: 1125.2
      inv_txfm_add_4x8_identity_adst_0_8bpc_ssse3: 98.7
      inv_txfm_add_4x8_identity_adst_1_8bpc_c: 1092.6
      inv_txfm_add_4x8_identity_adst_1_8bpc_ssse3: 99.6
      inv_txfm_add_4x8_identity_dct_0_8bpc_c: 1139.4
      inv_txfm_add_4x8_identity_dct_0_8bpc_ssse3: 38.8
      inv_txfm_add_4x8_identity_dct_1_8bpc_c: 1111.0
      inv_txfm_add_4x8_identity_dct_1_8bpc_ssse3: 84.1
      inv_txfm_add_4x8_identity_flipadst_0_8bpc_c: 1112.4
      inv_txfm_add_4x8_identity_flipadst_0_8bpc_ssse3: 100.7
      inv_txfm_add_4x8_identity_flipadst_1_8bpc_c: 1098.7
      inv_txfm_add_4x8_identity_flipadst_1_8bpc_ssse3: 100.8
      inv_txfm_add_4x8_identity_identity_0_8bpc_c: 791.6
      inv_txfm_add_4x8_identity_identity_0_8bpc_ssse3: 43.9
      inv_txfm_add_4x8_identity_identity_1_8bpc_c: 797.0
      inv_txfm_add_4x8_identity_identity_1_8bpc_ssse3: 43.8
      inv_txfm_add_8x4_adst_adst_0_8bpc_c: 1102.8
      inv_txfm_add_8x4_adst_adst_0_8bpc_ssse3: 108.7
      inv_txfm_add_8x4_adst_adst_1_8bpc_c: 1101.8
      inv_txfm_add_8x4_adst_adst_1_8bpc_ssse3: 108.9
      inv_txfm_add_8x4_adst_dct_0_8bpc_c: 1146.9
      inv_txfm_add_8x4_adst_dct_0_8bpc_ssse3: 98.7
      inv_txfm_add_8x4_adst_dct_1_8bpc_c: 1157.9
      inv_txfm_add_8x4_adst_dct_1_8bpc_ssse3: 98.9
      inv_txfm_add_8x4_adst_flipadst_0_8bpc_c: 1144.6
      inv_txfm_add_8x4_adst_flipadst_0_8bpc_ssse3: 111.4
      inv_txfm_add_8x4_adst_flipadst_1_8bpc_c: 1128.2
      inv_txfm_add_8x4_adst_flipadst_1_8bpc_ssse3: 112.4
      inv_txfm_add_8x4_adst_identity_0_8bpc_c: 1051.1
      inv_txfm_add_8x4_adst_identity_0_8bpc_ssse3: 87.1
      inv_txfm_add_8x4_adst_identity_1_8bpc_c: 1059.2
      inv_txfm_add_8x4_adst_identity_1_8bpc_ssse3: 87.7
      inv_txfm_add_8x4_dct_adst_0_8bpc_c: 1130.2
      inv_txfm_add_8x4_dct_adst_0_8bpc_ssse3: 29.0
      inv_txfm_add_8x4_dct_adst_1_8bpc_c: 1130.1
      inv_txfm_add_8x4_dct_adst_1_8bpc_ssse3: 89.2
      inv_txfm_add_8x4_dct_dct_0_8bpc_c: 1186.0
      inv_txfm_add_8x4_dct_dct_0_8bpc_ssse3: 26.3
      inv_txfm_add_8x4_dct_dct_1_8bpc_c: 1172.2
      inv_txfm_add_8x4_dct_dct_1_8bpc_ssse3: 78.8
      inv_txfm_add_8x4_dct_flipadst_0_8bpc_c: 1154.7
      inv_txfm_add_8x4_dct_flipadst_0_8bpc_ssse3: 29.1
      inv_txfm_add_8x4_dct_flipadst_1_8bpc_c: 1150.2
      inv_txfm_add_8x4_dct_flipadst_1_8bpc_ssse3: 92.2
      inv_txfm_add_8x4_dct_identity_0_8bpc_c: 1078.7
      inv_txfm_add_8x4_dct_identity_0_8bpc_ssse3: 29.2
      inv_txfm_add_8x4_dct_identity_1_8bpc_c: 1090.1
      inv_txfm_add_8x4_dct_identity_1_8bpc_ssse3: 72.2
      inv_txfm_add_8x4_flipadst_adst_0_8bpc_c: 1111.6
      inv_txfm_add_8x4_flipadst_adst_0_8bpc_ssse3: 108.6
      inv_txfm_add_8x4_flipadst_adst_1_8bpc_c: 1112.1
      inv_txfm_add_8x4_flipadst_adst_1_8bpc_ssse3: 107.6
      inv_txfm_add_8x4_flipadst_dct_0_8bpc_c: 1163.0
      inv_txfm_add_8x4_flipadst_dct_0_8bpc_ssse3: 98.3
      inv_txfm_add_8x4_flipadst_dct_1_8bpc_c: 1160.0
      inv_txfm_add_8x4_flipadst_dct_1_8bpc_ssse3: 99.6
      inv_txfm_add_8x4_flipadst_flipadst_0_8bpc_c: 1137.9
      inv_txfm_add_8x4_flipadst_flipadst_0_8bpc_ssse3: 112.0
      inv_txfm_add_8x4_flipadst_flipadst_1_8bpc_c: 1140.0
      inv_txfm_add_8x4_flipadst_flipadst_1_8bpc_ssse3: 112.0
      inv_txfm_add_8x4_flipadst_identity_0_8bpc_c: 1057.2
      inv_txfm_add_8x4_flipadst_identity_0_8bpc_ssse3: 88.1
      inv_txfm_add_8x4_flipadst_identity_1_8bpc_c: 1058.3
      inv_txfm_add_8x4_flipadst_identity_1_8bpc_ssse3: 87.1
      inv_txfm_add_8x4_identity_adst_0_8bpc_c: 794.0
      inv_txfm_add_8x4_identity_adst_0_8bpc_ssse3: 60.6
      inv_txfm_add_8x4_identity_adst_1_8bpc_c: 793.4
      inv_txfm_add_8x4_identity_adst_1_8bpc_ssse3: 60.6
      inv_txfm_add_8x4_identity_dct_0_8bpc_c: 838.4
      inv_txfm_add_8x4_identity_dct_0_8bpc_ssse3: 27.4
      inv_txfm_add_8x4_identity_dct_1_8bpc_c: 838.5
      inv_txfm_add_8x4_identity_dct_1_8bpc_ssse3: 52.0
      inv_txfm_add_8x4_identity_flipadst_0_8bpc_c: 825.3
      inv_txfm_add_8x4_identity_flipadst_0_8bpc_ssse3: 66.7
      inv_txfm_add_8x4_identity_flipadst_1_8bpc_c: 831.7
      inv_txfm_add_8x4_identity_flipadst_1_8bpc_ssse3: 66.7
      inv_txfm_add_8x4_identity_identity_0_8bpc_c: 768.6
      inv_txfm_add_8x4_identity_identity_0_8bpc_ssse3: 40.0
      inv_txfm_add_8x4_identity_identity_1_8bpc_c: 743.3
      inv_txfm_add_8x4_identity_identity_1_8bpc_ssse3: 39.9
      1703f21f
    • Ronald S. Bultje's avatar
  18. 20 Dec, 2018 3 commits