1. 24 Jan, 2019 10 commits
  2. 21 Jan, 2019 1 commit
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 4x16 and 16x4 blocks in itx · bf659082
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_4x16_adst_adst_0_8bpc_c: 2203.6
      inv_txfm_add_4x16_adst_adst_0_8bpc_ssse3: 198.7
      inv_txfm_add_4x16_adst_adst_1_8bpc_c: 2235.1
      inv_txfm_add_4x16_adst_adst_1_8bpc_ssse3: 199.7
      inv_txfm_add_4x16_adst_adst_2_8bpc_c: 2199.1
      inv_txfm_add_4x16_adst_adst_2_8bpc_ssse3: 199.9
      inv_txfm_add_4x16_adst_dct_0_8bpc_c: 2272.4
      inv_txfm_add_4x16_adst_dct_0_8bpc_ssse3: 50.0
      inv_txfm_add_4x16_adst_dct_1_8bpc_c: 2281.6
      inv_txfm_add_4x16_adst_dct_1_8bpc_ssse3: 163.7
      inv_txfm_add_4x16_adst_dct_2_8bpc_c: 2262.5
      inv_txfm_add_4x16_adst_dct_2_8bpc_ssse3: 164.7
      inv_txfm_add_4x16_adst_flipadst_0_8bpc_c: 2456.5
      inv_txfm_add_4x16_adst_flipadst_0_8bpc_ssse3: 204.3
      inv_txfm_add_4x16_adst_flipadst_1_8bpc_c: 2349.1
      inv_txfm_add_4x16_adst_flipadst_1_8bpc_ssse3: 198.5
      inv_txfm_add_4x16_adst_flipadst_2_8bpc_c: 2241.5
      inv_txfm_add_4x16_adst_flipadst_2_8bpc_ssse3: 198.7
      inv_txfm_add_4x16_adst_identity_0_8bpc_c: 1574.7
      inv_txfm_add_4x16_adst_identity_0_8bpc_ssse3: 117.0
      inv_txfm_add_4x16_adst_identity_1_8bpc_c: 1576.3
      inv_txfm_add_4x16_adst_identity_1_8bpc_ssse3: 116.6
      inv_txfm_add_4x16_adst_identity_2_8bpc_c: 1572.9
      inv_txfm_add_4x16_adst_identity_2_8bpc_ssse3: 116.7
      inv_txfm_add_4x16_dct_adst_0_8bpc_c: 2162.8
      inv_txfm_add_4x16_dct_adst_0_8bpc_ssse3: 187.6
      inv_txfm_add_4x16_dct_adst_1_8bpc_c: 2180.4
      inv_txfm_add_4x16_dct_adst_1_8bpc_ssse3: 185.6
      inv_txfm_add_4x16_dct_adst_2_8bpc_c: 2165.1
      inv_txfm_add_4x16_dct_adst_2_8bpc_ssse3: 184.9
      inv_txfm_add_4x16_dct_dct_0_8bpc_c: 2233.7
      inv_txfm_add_4x16_dct_dct_0_8bpc_ssse3: 49.5
      inv_txfm_add_4x16_dct_dct_1_8bpc_c: 2770.4
      inv_txfm_add_4x16_dct_dct_1_8bpc_ssse3: 148.4
      inv_txfm_add_4x16_dct_dct_2_8bpc_c: 2288.7
      inv_txfm_add_4x16_dct_dct_2_8bpc_ssse3: 149.0
      inv_txfm_add_4x16_dct_flipadst_0_8bpc_c: 2242.0
      inv_txfm_add_4x16_dct_flipadst_0_8bpc_ssse3: 185.8
      inv_txfm_add_4x16_dct_flipadst_1_8bpc_c: 2249.6
      inv_txfm_add_4x16_dct_flipadst_1_8bpc_ssse3: 188.4
      inv_txfm_add_4x16_dct_flipadst_2_8bpc_c: 2237.3
      inv_txfm_add_4x16_dct_flipadst_2_8bpc_ssse3: 185.1
      inv_txfm_add_4x16_dct_identity_0_8bpc_c: 1532.3
      inv_txfm_add_4x16_dct_identity_0_8bpc_ssse3: 63.7
      inv_txfm_add_4x16_dct_identity_1_8bpc_c: 1534.5
      inv_txfm_add_4x16_dct_identity_1_8bpc_ssse3: 63.6
      inv_txfm_add_4x16_dct_identity_2_8bpc_c: 1548.1
      inv_txfm_add_4x16_dct_identity_2_8bpc_ssse3: 101.6
      inv_txfm_add_4x16_flipadst_adst_0_8bpc_c: 2205.2
      inv_txfm_add_4x16_flipadst_adst_0_8bpc_ssse3: 201.6
      inv_txfm_add_4x16_flipadst_adst_1_8bpc_c: 2222.0
      inv_txfm_add_4x16_flipadst_adst_1_8bpc_ssse3: 202.6
      inv_txfm_add_4x16_flipadst_adst_2_8bpc_c: 2205.2
      inv_txfm_add_4x16_flipadst_adst_2_8bpc_ssse3: 205.7
      inv_txfm_add_4x16_flipadst_dct_0_8bpc_c: 2294.9
      inv_txfm_add_4x16_flipadst_dct_0_8bpc_ssse3: 50.0
      inv_txfm_add_4x16_flipadst_dct_1_8bpc_c: 2304.2
      inv_txfm_add_4x16_flipadst_dct_1_8bpc_ssse3: 164.5
      inv_txfm_add_4x16_flipadst_dct_2_8bpc_c: 2292.7
      inv_txfm_add_4x16_flipadst_dct_2_8bpc_ssse3: 164.5
      inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_c: 2281.3
      inv_txfm_add_4x16_flipadst_flipadst_0_8bpc_ssse3: 202.9
      inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_c: 2258.7
      inv_txfm_add_4x16_flipadst_flipadst_1_8bpc_ssse3: 202.4
      inv_txfm_add_4x16_flipadst_flipadst_2_8bpc_c: 2261.0
      inv_txfm_add_4x16_flipadst_flipadst_2_8bpc_ssse3: 201.3
      inv_txfm_add_4x16_flipadst_identity_0_8bpc_c: 1580.5
      inv_txfm_add_4x16_flipadst_identity_0_8bpc_ssse3: 116.1
      inv_txfm_add_4x16_flipadst_identity_1_8bpc_c: 1578.7
      inv_txfm_add_4x16_flipadst_identity_1_8bpc_ssse3: 116.7
      inv_txfm_add_4x16_flipadst_identity_2_8bpc_c: 1590.8
      inv_txfm_add_4x16_flipadst_identity_2_8bpc_ssse3: 117.4
      inv_txfm_add_4x16_identity_adst_0_8bpc_c: 1949.0
      inv_txfm_add_4x16_identity_adst_0_8bpc_ssse3: 170.9
      inv_txfm_add_4x16_identity_adst_1_8bpc_c: 1947.4
      inv_txfm_add_4x16_identity_adst_1_8bpc_ssse3: 171.0
      inv_txfm_add_4x16_identity_adst_2_8bpc_c: 1948.7
      inv_txfm_add_4x16_identity_adst_2_8bpc_ssse3: 170.3
      inv_txfm_add_4x16_identity_dct_0_8bpc_c: 2022.3
      inv_txfm_add_4x16_identity_dct_0_8bpc_ssse3: 59.2
      inv_txfm_add_4x16_identity_dct_1_8bpc_c: 2020.8
      inv_txfm_add_4x16_identity_dct_1_8bpc_ssse3: 133.7
      inv_txfm_add_4x16_identity_dct_2_8bpc_c: 2020.2
      inv_txfm_add_4x16_identity_dct_2_8bpc_ssse3: 133.2
      inv_txfm_add_4x16_identity_flipadst_0_8bpc_c: 2024.7
      inv_txfm_add_4x16_identity_flipadst_0_8bpc_ssse3: 170.3
      inv_txfm_add_4x16_identity_flipadst_1_8bpc_c: 2021.8
      inv_txfm_add_4x16_identity_flipadst_1_8bpc_ssse3: 170.0
      inv_txfm_add_4x16_identity_flipadst_2_8bpc_c: 2022.5
      inv_txfm_add_4x16_identity_flipadst_2_8bpc_ssse3: 169.9
      inv_txfm_add_4x16_identity_identity_0_8bpc_c: 1328.4
      inv_txfm_add_4x16_identity_identity_0_8bpc_ssse3: 87.7
      inv_txfm_add_4x16_identity_identity_1_8bpc_c: 1330.9
      inv_txfm_add_4x16_identity_identity_1_8bpc_ssse3: 87.7
      inv_txfm_add_4x16_identity_identity_2_8bpc_c: 1327.3
      inv_txfm_add_4x16_identity_identity_2_8bpc_ssse3: 87.6
      inv_txfm_add_16x4_adst_adst_0_8bpc_c: 2166.3
      inv_txfm_add_16x4_adst_adst_0_8bpc_ssse3: 186.3
      inv_txfm_add_16x4_adst_adst_1_8bpc_c: 2166.9
      inv_txfm_add_16x4_adst_adst_1_8bpc_ssse3: 184.9
      inv_txfm_add_16x4_adst_adst_2_8bpc_c: 2167.2
      inv_txfm_add_16x4_adst_adst_2_8bpc_ssse3: 185.2
      inv_txfm_add_16x4_adst_dct_0_8bpc_c: 2123.2
      inv_txfm_add_16x4_adst_dct_0_8bpc_ssse3: 172.1
      inv_txfm_add_16x4_adst_dct_1_8bpc_c: 2124.2
      inv_txfm_add_16x4_adst_dct_1_8bpc_ssse3: 171.2
      inv_txfm_add_16x4_adst_dct_2_8bpc_c: 2122.8
      inv_txfm_add_16x4_adst_dct_2_8bpc_ssse3: 171.8
      inv_txfm_add_16x4_adst_flipadst_0_8bpc_c: 2213.3
      inv_txfm_add_16x4_adst_flipadst_0_8bpc_ssse3: 189.6
      inv_txfm_add_16x4_adst_flipadst_1_8bpc_c: 2227.7
      inv_txfm_add_16x4_adst_flipadst_1_8bpc_ssse3: 188.4
      inv_txfm_add_16x4_adst_flipadst_2_8bpc_c: 2228.5
      inv_txfm_add_16x4_adst_flipadst_2_8bpc_ssse3: 188.4
      inv_txfm_add_16x4_adst_identity_0_8bpc_c: 1906.7
      inv_txfm_add_16x4_adst_identity_0_8bpc_ssse3: 154.3
      inv_txfm_add_16x4_adst_identity_1_8bpc_c: 1905.2
      inv_txfm_add_16x4_adst_identity_1_8bpc_ssse3: 155.6
      inv_txfm_add_16x4_adst_identity_2_8bpc_c: 1905.6
      inv_txfm_add_16x4_adst_identity_2_8bpc_ssse3: 156.3
      inv_txfm_add_16x4_dct_adst_0_8bpc_c: 2209.8
      inv_txfm_add_16x4_dct_adst_0_8bpc_ssse3: 37.4
      inv_txfm_add_16x4_dct_adst_1_8bpc_c: 2209.8
      inv_txfm_add_16x4_dct_adst_1_8bpc_ssse3: 157.9
      inv_txfm_add_16x4_dct_adst_2_8bpc_c: 2221.1
      inv_txfm_add_16x4_dct_adst_2_8bpc_ssse3: 158.5
      inv_txfm_add_16x4_dct_dct_0_8bpc_c: 2177.5
      inv_txfm_add_16x4_dct_dct_0_8bpc_ssse3: 29.6
      inv_txfm_add_16x4_dct_dct_1_8bpc_c: 2179.3
      inv_txfm_add_16x4_dct_dct_1_8bpc_ssse3: 144.9
      inv_txfm_add_16x4_dct_dct_2_8bpc_c: 2177.8
      inv_txfm_add_16x4_dct_dct_2_8bpc_ssse3: 143.7
      inv_txfm_add_16x4_dct_flipadst_0_8bpc_c: 2293.6
      inv_txfm_add_16x4_dct_flipadst_0_8bpc_ssse3: 38.3
      inv_txfm_add_16x4_dct_flipadst_1_8bpc_c: 2293.2
      inv_txfm_add_16x4_dct_flipadst_1_8bpc_ssse3: 163.9
      inv_txfm_add_16x4_dct_flipadst_2_8bpc_c: 2301.3
      inv_txfm_add_16x4_dct_flipadst_2_8bpc_ssse3: 163.7
      inv_txfm_add_16x4_dct_identity_0_8bpc_c: 1977.7
      inv_txfm_add_16x4_dct_identity_0_8bpc_ssse3: 39.9
      inv_txfm_add_16x4_dct_identity_1_8bpc_c: 1978.7
      inv_txfm_add_16x4_dct_identity_1_8bpc_ssse3: 126.8
      inv_txfm_add_16x4_dct_identity_2_8bpc_c: 1979.5
      inv_txfm_add_16x4_dct_identity_2_8bpc_ssse3: 128.1
      inv_txfm_add_16x4_flipadst_adst_0_8bpc_c: 2175.6
      inv_txfm_add_16x4_flipadst_adst_0_8bpc_ssse3: 185.1
      inv_txfm_add_16x4_flipadst_adst_1_8bpc_c: 2175.7
      inv_txfm_add_16x4_flipadst_adst_1_8bpc_ssse3: 185.7
      inv_txfm_add_16x4_flipadst_adst_2_8bpc_c: 2173.1
      inv_txfm_add_16x4_flipadst_adst_2_8bpc_ssse3: 185.0
      inv_txfm_add_16x4_flipadst_dct_0_8bpc_c: 2140.5
      inv_txfm_add_16x4_flipadst_dct_0_8bpc_ssse3: 172.0
      inv_txfm_add_16x4_flipadst_dct_1_8bpc_c: 2147.5
      inv_txfm_add_16x4_flipadst_dct_1_8bpc_ssse3: 171.9
      inv_txfm_add_16x4_flipadst_dct_2_8bpc_c: 2148.5
      inv_txfm_add_16x4_flipadst_dct_2_8bpc_ssse3: 172.0
      inv_txfm_add_16x4_flipadst_flipadst_0_8bpc_c: 2240.6
      inv_txfm_add_16x4_flipadst_flipadst_0_8bpc_ssse3: 191.3
      inv_txfm_add_16x4_flipadst_flipadst_1_8bpc_c: 2243.5
      inv_txfm_add_16x4_flipadst_flipadst_1_8bpc_ssse3: 193.2
      inv_txfm_add_16x4_flipadst_flipadst_2_8bpc_c: 2242.9
      inv_txfm_add_16x4_flipadst_flipadst_2_8bpc_ssse3: 192.0
      inv_txfm_add_16x4_flipadst_identity_0_8bpc_c: 1919.2
      inv_txfm_add_16x4_flipadst_identity_0_8bpc_ssse3: 155.1
      inv_txfm_add_16x4_flipadst_identity_1_8bpc_c: 1925.2
      inv_txfm_add_16x4_flipadst_identity_1_8bpc_ssse3: 155.2
      inv_txfm_add_16x4_flipadst_identity_2_8bpc_c: 2084.8
      inv_txfm_add_16x4_flipadst_identity_2_8bpc_ssse3: 155.0
      inv_txfm_add_16x4_identity_adst_0_8bpc_c: 1498.5
      inv_txfm_add_16x4_identity_adst_0_8bpc_ssse3: 107.6
      inv_txfm_add_16x4_identity_adst_1_8bpc_c: 1499.5
      inv_txfm_add_16x4_identity_adst_1_8bpc_ssse3: 107.0
      inv_txfm_add_16x4_identity_adst_2_8bpc_c: 1498.9
      inv_txfm_add_16x4_identity_adst_2_8bpc_ssse3: 107.9
      inv_txfm_add_16x4_identity_dct_0_8bpc_c: 1471.9
      inv_txfm_add_16x4_identity_dct_0_8bpc_ssse3: 45.4
      inv_txfm_add_16x4_identity_dct_1_8bpc_c: 1476.4
      inv_txfm_add_16x4_identity_dct_1_8bpc_ssse3: 45.5
      inv_txfm_add_16x4_identity_dct_2_8bpc_c: 1459.8
      inv_txfm_add_16x4_identity_dct_2_8bpc_ssse3: 92.3
      inv_txfm_add_16x4_identity_flipadst_0_8bpc_c: 1548.7
      inv_txfm_add_16x4_identity_flipadst_0_8bpc_ssse3: 112.1
      inv_txfm_add_16x4_identity_flipadst_1_8bpc_c: 1548.2
      inv_txfm_add_16x4_identity_flipadst_1_8bpc_ssse3: 111.7
      inv_txfm_add_16x4_identity_flipadst_2_8bpc_c: 1547.2
      inv_txfm_add_16x4_identity_flipadst_2_8bpc_ssse3: 114.1
      inv_txfm_add_16x4_identity_identity_0_8bpc_c: 1271.5
      inv_txfm_add_16x4_identity_identity_0_8bpc_ssse3: 74.5
      inv_txfm_add_16x4_identity_identity_1_8bpc_c: 1266.8
      inv_txfm_add_16x4_identity_identity_1_8bpc_ssse3: 74.5
      inv_txfm_add_16x4_identity_identity_2_8bpc_c: 1268.0
      inv_txfm_add_16x4_identity_identity_2_8bpc_ssse3: 74.6
      bf659082
  3. 19 Jan, 2019 1 commit
  4. 15 Jan, 2019 2 commits
  5. 14 Jan, 2019 1 commit
  6. 13 Jan, 2019 3 commits
  7. 12 Jan, 2019 2 commits
  8. 11 Jan, 2019 2 commits
  9. 10 Jan, 2019 1 commit
  10. 07 Jan, 2019 3 commits
  11. 05 Jan, 2019 1 commit
  12. 28 Dec, 2018 1 commit
    • Xuefeng Jiang's avatar
      Add SSSE3 implementations for dav1d_ipred_top, dav1d_ipred_left and dav1d_ipred_128 · 9ea56386
      Xuefeng Jiang authored
      Cycle times:
      intra_pred_dc_128_w4_8bpc_c: 905.2
      intra_pred_dc_128_w4_8bpc_ssse3: 61.6
      intra_pred_dc_128_w8_8bpc_c: 1393.1
      intra_pred_dc_128_w8_8bpc_ssse3: 82.3
      intra_pred_dc_128_w16_8bpc_c: 2227.4
      intra_pred_dc_128_w16_8bpc_ssse3: 119.6
      intra_pred_dc_128_w32_8bpc_c: 2696.0
      intra_pred_dc_128_w32_8bpc_ssse3: 195.5
      intra_pred_dc_128_w64_8bpc_c: 4298.6
      intra_pred_dc_128_w64_8bpc_ssse3: 465.1
      intra_pred_dc_left_w4_8bpc_c: 974.2
      intra_pred_dc_left_w4_8bpc_ssse3: 80.2
      intra_pred_dc_left_w8_8bpc_c: 1478.4
      intra_pred_dc_left_w8_8bpc_ssse3: 103.7
      intra_pred_dc_left_w16_8bpc_c: 2313.0
      intra_pred_dc_left_w16_8bpc_ssse3: 159.1
      intra_pred_dc_left_w32_8bpc_c: 2835.1
      intra_pred_dc_left_w32_8bpc_ssse3: 305.3
      intra_pred_dc_left_w64_8bpc_c: 4462.2
      intra_pred_dc_left_w64_8bpc_ssse3: 525.5
      intra_pred_dc_top_w4_8bpc_c: 949.5
      intra_pred_dc_top_w4_8bpc_ssse3: 95.5
      intra_pred_dc_top_w8_8bpc_c: 1462.2
      intra_pred_dc_top_w8_8bpc_ssse3: 103.1
      intra_pred_dc_top_w16_8bpc_c: 2312.5
      intra_pred_dc_top_w16_8bpc_ssse3: 146.4
      intra_pred_dc_top_w32_8bpc_c: 2895.9
      intra_pred_dc_top_w32_8bpc_ssse3: 250.4
      intra_pred_dc_top_w64_8bpc_c: 4617.9
      intra_pred_dc_top_w64_8bpc_ssse3: 493.3
      9ea56386
  13. 27 Dec, 2018 1 commit
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 8x8 blocks in itx · 5fa6c44a
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_8x8_adst_adst_0_8bpc_c: 2165.6
      inv_txfm_add_8x8_adst_adst_0_8bpc_ssse3: 194.5
      inv_txfm_add_8x8_adst_adst_1_8bpc_c: 2158.3
      inv_txfm_add_8x8_adst_adst_1_8bpc_ssse3: 194.7
      inv_txfm_add_8x8_adst_dct_0_8bpc_c: 2241.0
      inv_txfm_add_8x8_adst_dct_0_8bpc_ssse3: 165.1
      inv_txfm_add_8x8_adst_dct_1_8bpc_c: 2242.6
      inv_txfm_add_8x8_adst_dct_1_8bpc_ssse3: 164.2
      inv_txfm_add_8x8_adst_flipadst_0_8bpc_c: 2178.2
      inv_txfm_add_8x8_adst_flipadst_0_8bpc_ssse3: 194.4
      inv_txfm_add_8x8_adst_flipadst_1_8bpc_c: 2183.0
      inv_txfm_add_8x8_adst_flipadst_1_8bpc_ssse3: 194.2
      inv_txfm_add_8x8_adst_identity_0_8bpc_c: 1592.1
      inv_txfm_add_8x8_adst_identity_0_8bpc_ssse3: 125.2
      inv_txfm_add_8x8_adst_identity_1_8bpc_c: 1597.7
      inv_txfm_add_8x8_adst_identity_1_8bpc_ssse3: 126.3
      inv_txfm_add_8x8_dct_adst_0_8bpc_c: 2214.1
      inv_txfm_add_8x8_dct_adst_0_8bpc_ssse3: 162.0
      inv_txfm_add_8x8_dct_adst_1_8bpc_c: 2221.5
      inv_txfm_add_8x8_dct_adst_1_8bpc_ssse3: 161.9
      inv_txfm_add_8x8_dct_dct_0_8bpc_c: 2247.8
      inv_txfm_add_8x8_dct_dct_0_8bpc_ssse3: 34.0
      inv_txfm_add_8x8_dct_dct_1_8bpc_c: 2243.1
      inv_txfm_add_8x8_dct_dct_1_8bpc_ssse3: 133.7
      inv_txfm_add_8x8_dct_flipadst_0_8bpc_c: 2255.1
      inv_txfm_add_8x8_dct_flipadst_0_8bpc_ssse3: 161.2
      inv_txfm_add_8x8_dct_flipadst_1_8bpc_c: 2244.6
      inv_txfm_add_8x8_dct_flipadst_1_8bpc_ssse3: 161.8
      inv_txfm_add_8x8_dct_identity_0_8bpc_c: 1632.3
      inv_txfm_add_8x8_dct_identity_0_8bpc_ssse3: 41.3
      inv_txfm_add_8x8_dct_identity_1_8bpc_c: 1629.6
      inv_txfm_add_8x8_dct_identity_1_8bpc_ssse3: 97.7
      inv_txfm_add_8x8_flipadst_adst_0_8bpc_c: 2185.6
      inv_txfm_add_8x8_flipadst_adst_0_8bpc_ssse3: 191.0
      inv_txfm_add_8x8_flipadst_adst_1_8bpc_c: 2165.7
      inv_txfm_add_8x8_flipadst_adst_1_8bpc_ssse3: 191.6
      inv_txfm_add_8x8_flipadst_dct_0_8bpc_c: 2246.4
      inv_txfm_add_8x8_flipadst_dct_0_8bpc_ssse3: 162.8
      inv_txfm_add_8x8_flipadst_dct_1_8bpc_c: 2252.1
      inv_txfm_add_8x8_flipadst_dct_1_8bpc_ssse3: 163.9
      inv_txfm_add_8x8_flipadst_flipadst_0_8bpc_c: 2180.9
      inv_txfm_add_8x8_flipadst_flipadst_0_8bpc_ssse3: 196.3
      inv_txfm_add_8x8_flipadst_flipadst_1_8bpc_c: 2192.2
      inv_txfm_add_8x8_flipadst_flipadst_1_8bpc_ssse3: 194.5
      inv_txfm_add_8x8_flipadst_identity_0_8bpc_c: 1600.9
      inv_txfm_add_8x8_flipadst_identity_0_8bpc_ssse3: 126.6
      inv_txfm_add_8x8_flipadst_identity_1_8bpc_c: 1600.5
      inv_txfm_add_8x8_flipadst_identity_1_8bpc_ssse3: 126.4
      inv_txfm_add_8x8_identity_adst_0_8bpc_c: 1558.0
      inv_txfm_add_8x8_identity_adst_0_8bpc_ssse3: 120.7
      inv_txfm_add_8x8_identity_adst_1_8bpc_c: 1556.7
      inv_txfm_add_8x8_identity_adst_1_8bpc_ssse3: 121.0
      inv_txfm_add_8x8_identity_dct_0_8bpc_c: 1600.8
      inv_txfm_add_8x8_identity_dct_0_8bpc_ssse3: 37.9
      inv_txfm_add_8x8_identity_dct_1_8bpc_c: 1599.5
      inv_txfm_add_8x8_identity_dct_1_8bpc_ssse3: 90.3
      inv_txfm_add_8x8_identity_flipadst_0_8bpc_c: 1584.9
      inv_txfm_add_8x8_identity_flipadst_0_8bpc_ssse3: 120.2
      inv_txfm_add_8x8_identity_flipadst_1_8bpc_c: 1584.3
      inv_txfm_add_8x8_identity_flipadst_1_8bpc_ssse3: 120.5
      inv_txfm_add_8x8_identity_identity_0_8bpc_c: 975.9
      inv_txfm_add_8x8_identity_identity_0_8bpc_ssse3: 54.7
      inv_txfm_add_8x8_identity_identity_1_8bpc_c: 975.7
      inv_txfm_add_8x8_identity_identity_1_8bpc_ssse3: 54.7
      5fa6c44a
  14. 26 Dec, 2018 1 commit
    • Xuefeng Jiang's avatar
      Add SSSE3 implementation for dav1d_ipred_v and dav1d_ipred_dc · 71e13008
      Xuefeng Jiang authored
      Cycle times:
      intra_pred_dc_w4_8bpc_c: 1051.4
      intra_pred_dc_w4_8bpc_ssse3: 58.8
      intra_pred_dc_w8_8bpc_c: 1587.6
      intra_pred_dc_w8_8bpc_ssse3: 75.3
      intra_pred_dc_w16_8bpc_c: 2526.2
      intra_pred_dc_w16_8bpc_ssse3: 103.5
      intra_pred_dc_w32_8bpc_c: 2646.6
      intra_pred_dc_w32_8bpc_ssse3: 179.5
      intra_pred_dc_w64_8bpc_c: 4084.6
      intra_pred_dc_w64_8bpc_ssse3: 356.1
      intra_pred_v_w4_8bpc_c: 468.5
      intra_pred_v_w4_8bpc_ssse3: 46.8
      intra_pred_v_w8_8bpc_c: 839.1
      intra_pred_v_w8_8bpc_ssse3: 56.7
      intra_pred_v_w16_8bpc_c: 1750.5
      intra_pred_v_w16_8bpc_ssse3: 73.0
      intra_pred_v_w32_8bpc_c: 1552.5
      intra_pred_v_w32_8bpc_ssse3: 135.4
      intra_pred_v_w64_8bpc_c: 2463.6
      intra_pred_v_w64_8bpc_ssse3: 305.6
      71e13008
  15. 22 Dec, 2018 1 commit
  16. 21 Dec, 2018 2 commits
    • Liwei Wang's avatar
      Add SSSE3 implementation for the 4x8 and 8x4 blocks in itx · 1703f21f
      Liwei Wang authored
      Cycle times:
      inv_txfm_add_4x8_adst_adst_0_8bpc_c: 1167.6
      inv_txfm_add_4x8_adst_adst_0_8bpc_ssse3: 114.6
      inv_txfm_add_4x8_adst_adst_1_8bpc_c: 1167.2
      inv_txfm_add_4x8_adst_adst_1_8bpc_ssse3: 114.1
      inv_txfm_add_4x8_adst_dct_0_8bpc_c: 1174.7
      inv_txfm_add_4x8_adst_dct_0_8bpc_ssse3: 34.8
      inv_txfm_add_4x8_adst_dct_1_8bpc_c: 1158.0
      inv_txfm_add_4x8_adst_dct_1_8bpc_ssse3: 101.0
      inv_txfm_add_4x8_adst_flipadst_0_8bpc_c: 1150.9
      inv_txfm_add_4x8_adst_flipadst_0_8bpc_ssse3: 115.8
      inv_txfm_add_4x8_adst_flipadst_1_8bpc_c: 1157.6
      inv_txfm_add_4x8_adst_flipadst_1_8bpc_ssse3: 115.8
      inv_txfm_add_4x8_adst_identity_0_8bpc_c: 848.4
      inv_txfm_add_4x8_adst_identity_0_8bpc_ssse3: 59.1
      inv_txfm_add_4x8_adst_identity_1_8bpc_c: 850.1
      inv_txfm_add_4x8_adst_identity_1_8bpc_ssse3: 59.1
      inv_txfm_add_4x8_dct_adst_0_8bpc_c: 1205.6
      inv_txfm_add_4x8_dct_adst_0_8bpc_ssse3: 107.0
      inv_txfm_add_4x8_dct_adst_1_8bpc_c: 1183.7
      inv_txfm_add_4x8_dct_adst_1_8bpc_ssse3: 107.0
      inv_txfm_add_4x8_dct_dct_0_8bpc_c: 1227.0
      inv_txfm_add_4x8_dct_dct_0_8bpc_ssse3: 34.6
      inv_txfm_add_4x8_dct_dct_1_8bpc_c: 1229.7
      inv_txfm_add_4x8_dct_dct_1_8bpc_ssse3: 96.1
      inv_txfm_add_4x8_dct_flipadst_0_8bpc_c: 1188.2
      inv_txfm_add_4x8_dct_flipadst_0_8bpc_ssse3: 109.3
      inv_txfm_add_4x8_dct_flipadst_1_8bpc_c: 1192.7
      inv_txfm_add_4x8_dct_flipadst_1_8bpc_ssse3: 109.9
      inv_txfm_add_4x8_dct_identity_0_8bpc_c: 878.4
      inv_txfm_add_4x8_dct_identity_0_8bpc_ssse3: 31.9
      inv_txfm_add_4x8_dct_identity_1_8bpc_c: 879.0
      inv_txfm_add_4x8_dct_identity_1_8bpc_ssse3: 54.8
      inv_txfm_add_4x8_flipadst_adst_0_8bpc_c: 1181.8
      inv_txfm_add_4x8_flipadst_adst_0_8bpc_ssse3: 114.7
      inv_txfm_add_4x8_flipadst_adst_1_8bpc_c: 1203.0
      inv_txfm_add_4x8_flipadst_adst_1_8bpc_ssse3: 114.5
      inv_txfm_add_4x8_flipadst_dct_0_8bpc_c: 1203.6
      inv_txfm_add_4x8_flipadst_dct_0_8bpc_ssse3: 34.1
      inv_txfm_add_4x8_flipadst_dct_1_8bpc_c: 1204.4
      inv_txfm_add_4x8_flipadst_dct_1_8bpc_ssse3: 100.2
      inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_c: 1180.6
      inv_txfm_add_4x8_flipadst_flipadst_0_8bpc_ssse3: 117.1
      inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_c: 1178.7
      inv_txfm_add_4x8_flipadst_flipadst_1_8bpc_ssse3: 116.8
      inv_txfm_add_4x8_flipadst_identity_0_8bpc_c: 871.3
      inv_txfm_add_4x8_flipadst_identity_0_8bpc_ssse3: 69.0
      inv_txfm_add_4x8_flipadst_identity_1_8bpc_c: 872.3
      inv_txfm_add_4x8_flipadst_identity_1_8bpc_ssse3: 70.0
      inv_txfm_add_4x8_identity_adst_0_8bpc_c: 1125.2
      inv_txfm_add_4x8_identity_adst_0_8bpc_ssse3: 98.7
      inv_txfm_add_4x8_identity_adst_1_8bpc_c: 1092.6
      inv_txfm_add_4x8_identity_adst_1_8bpc_ssse3: 99.6
      inv_txfm_add_4x8_identity_dct_0_8bpc_c: 1139.4
      inv_txfm_add_4x8_identity_dct_0_8bpc_ssse3: 38.8
      inv_txfm_add_4x8_identity_dct_1_8bpc_c: 1111.0
      inv_txfm_add_4x8_identity_dct_1_8bpc_ssse3: 84.1
      inv_txfm_add_4x8_identity_flipadst_0_8bpc_c: 1112.4
      inv_txfm_add_4x8_identity_flipadst_0_8bpc_ssse3: 100.7
      inv_txfm_add_4x8_identity_flipadst_1_8bpc_c: 1098.7
      inv_txfm_add_4x8_identity_flipadst_1_8bpc_ssse3: 100.8
      inv_txfm_add_4x8_identity_identity_0_8bpc_c: 791.6
      inv_txfm_add_4x8_identity_identity_0_8bpc_ssse3: 43.9
      inv_txfm_add_4x8_identity_identity_1_8bpc_c: 797.0
      inv_txfm_add_4x8_identity_identity_1_8bpc_ssse3: 43.8
      inv_txfm_add_8x4_adst_adst_0_8bpc_c: 1102.8
      inv_txfm_add_8x4_adst_adst_0_8bpc_ssse3: 108.7
      inv_txfm_add_8x4_adst_adst_1_8bpc_c: 1101.8
      inv_txfm_add_8x4_adst_adst_1_8bpc_ssse3: 108.9
      inv_txfm_add_8x4_adst_dct_0_8bpc_c: 1146.9
      inv_txfm_add_8x4_adst_dct_0_8bpc_ssse3: 98.7
      inv_txfm_add_8x4_adst_dct_1_8bpc_c: 1157.9
      inv_txfm_add_8x4_adst_dct_1_8bpc_ssse3: 98.9
      inv_txfm_add_8x4_adst_flipadst_0_8bpc_c: 1144.6
      inv_txfm_add_8x4_adst_flipadst_0_8bpc_ssse3: 111.4
      inv_txfm_add_8x4_adst_flipadst_1_8bpc_c: 1128.2
      inv_txfm_add_8x4_adst_flipadst_1_8bpc_ssse3: 112.4
      inv_txfm_add_8x4_adst_identity_0_8bpc_c: 1051.1
      inv_txfm_add_8x4_adst_identity_0_8bpc_ssse3: 87.1
      inv_txfm_add_8x4_adst_identity_1_8bpc_c: 1059.2
      inv_txfm_add_8x4_adst_identity_1_8bpc_ssse3: 87.7
      inv_txfm_add_8x4_dct_adst_0_8bpc_c: 1130.2
      inv_txfm_add_8x4_dct_adst_0_8bpc_ssse3: 29.0
      inv_txfm_add_8x4_dct_adst_1_8bpc_c: 1130.1
      inv_txfm_add_8x4_dct_adst_1_8bpc_ssse3: 89.2
      inv_txfm_add_8x4_dct_dct_0_8bpc_c: 1186.0
      inv_txfm_add_8x4_dct_dct_0_8bpc_ssse3: 26.3
      inv_txfm_add_8x4_dct_dct_1_8bpc_c: 1172.2
      inv_txfm_add_8x4_dct_dct_1_8bpc_ssse3: 78.8
      inv_txfm_add_8x4_dct_flipadst_0_8bpc_c: 1154.7
      inv_txfm_add_8x4_dct_flipadst_0_8bpc_ssse3: 29.1
      inv_txfm_add_8x4_dct_flipadst_1_8bpc_c: 1150.2
      inv_txfm_add_8x4_dct_flipadst_1_8bpc_ssse3: 92.2
      inv_txfm_add_8x4_dct_identity_0_8bpc_c: 1078.7
      inv_txfm_add_8x4_dct_identity_0_8bpc_ssse3: 29.2
      inv_txfm_add_8x4_dct_identity_1_8bpc_c: 1090.1
      inv_txfm_add_8x4_dct_identity_1_8bpc_ssse3: 72.2
      inv_txfm_add_8x4_flipadst_adst_0_8bpc_c: 1111.6
      inv_txfm_add_8x4_flipadst_adst_0_8bpc_ssse3: 108.6
      inv_txfm_add_8x4_flipadst_adst_1_8bpc_c: 1112.1
      inv_txfm_add_8x4_flipadst_adst_1_8bpc_ssse3: 107.6
      inv_txfm_add_8x4_flipadst_dct_0_8bpc_c: 1163.0
      inv_txfm_add_8x4_flipadst_dct_0_8bpc_ssse3: 98.3
      inv_txfm_add_8x4_flipadst_dct_1_8bpc_c: 1160.0
      inv_txfm_add_8x4_flipadst_dct_1_8bpc_ssse3: 99.6
      inv_txfm_add_8x4_flipadst_flipadst_0_8bpc_c: 1137.9
      inv_txfm_add_8x4_flipadst_flipadst_0_8bpc_ssse3: 112.0
      inv_txfm_add_8x4_flipadst_flipadst_1_8bpc_c: 1140.0
      inv_txfm_add_8x4_flipadst_flipadst_1_8bpc_ssse3: 112.0
      inv_txfm_add_8x4_flipadst_identity_0_8bpc_c: 1057.2
      inv_txfm_add_8x4_flipadst_identity_0_8bpc_ssse3: 88.1
      inv_txfm_add_8x4_flipadst_identity_1_8bpc_c: 1058.3
      inv_txfm_add_8x4_flipadst_identity_1_8bpc_ssse3: 87.1
      inv_txfm_add_8x4_identity_adst_0_8bpc_c: 794.0
      inv_txfm_add_8x4_identity_adst_0_8bpc_ssse3: 60.6
      inv_txfm_add_8x4_identity_adst_1_8bpc_c: 793.4
      inv_txfm_add_8x4_identity_adst_1_8bpc_ssse3: 60.6
      inv_txfm_add_8x4_identity_dct_0_8bpc_c: 838.4
      inv_txfm_add_8x4_identity_dct_0_8bpc_ssse3: 27.4
      inv_txfm_add_8x4_identity_dct_1_8bpc_c: 838.5
      inv_txfm_add_8x4_identity_dct_1_8bpc_ssse3: 52.0
      inv_txfm_add_8x4_identity_flipadst_0_8bpc_c: 825.3
      inv_txfm_add_8x4_identity_flipadst_0_8bpc_ssse3: 66.7
      inv_txfm_add_8x4_identity_flipadst_1_8bpc_c: 831.7
      inv_txfm_add_8x4_identity_flipadst_1_8bpc_ssse3: 66.7
      inv_txfm_add_8x4_identity_identity_0_8bpc_c: 768.6
      inv_txfm_add_8x4_identity_identity_0_8bpc_ssse3: 40.0
      inv_txfm_add_8x4_identity_identity_1_8bpc_c: 743.3
      inv_txfm_add_8x4_identity_identity_1_8bpc_ssse3: 39.9
      1703f21f
    • Ronald S. Bultje's avatar
      bd8ce19e
  17. 20 Dec, 2018 7 commits