Skip to content

x86: Add high bitdepth mc(t)_scaled SSSE3 asm

mc_scaled_8tap_regular_w2_16bpc_c: 737.7
mc_scaled_8tap_regular_w2_16bpc_ssse3: 151.7
mc_scaled_8tap_regular_w2_16bpc_avx2: 141.2
mc_scaled_8tap_regular_w2_dy1_16bpc_c: 660.3
mc_scaled_8tap_regular_w2_dy1_16bpc_ssse3: 80.8
mc_scaled_8tap_regular_w2_dy1_16bpc_avx2: 73.2
mc_scaled_8tap_regular_w2_dy2_16bpc_c: 884.9
mc_scaled_8tap_regular_w2_dy2_16bpc_ssse3: 101.6
mc_scaled_8tap_regular_w2_dy2_16bpc_avx2: 87.2
mc_scaled_8tap_regular_w4_16bpc_c: 1356.3
mc_scaled_8tap_regular_w4_16bpc_ssse3: 172.3
mc_scaled_8tap_regular_w4_16bpc_avx2: 172.5
mc_scaled_8tap_regular_w4_dy1_16bpc_c: 1244.9
mc_scaled_8tap_regular_w4_dy1_16bpc_ssse3: 125.7
mc_scaled_8tap_regular_w4_dy1_16bpc_avx2: 96.1
mc_scaled_8tap_regular_w4_dy2_16bpc_c: 1665.6
mc_scaled_8tap_regular_w4_dy2_16bpc_ssse3: 150.2
mc_scaled_8tap_regular_w4_dy2_16bpc_avx2: 112.8
mc_scaled_8tap_regular_w8_16bpc_c: 2536.5
mc_scaled_8tap_regular_w8_16bpc_ssse3: 383.4
mc_scaled_8tap_regular_w8_16bpc_avx2: 256.2
mc_scaled_8tap_regular_w8_dy1_16bpc_c: 2331.8
mc_scaled_8tap_regular_w8_dy1_16bpc_ssse3: 350.0
mc_scaled_8tap_regular_w8_dy1_16bpc_avx2: 214.0
mc_scaled_8tap_regular_w8_dy2_16bpc_c: 3169.6
mc_scaled_8tap_regular_w8_dy2_16bpc_ssse3: 395.7
mc_scaled_8tap_regular_w8_dy2_16bpc_avx2: 265.7
mc_scaled_8tap_regular_w16_16bpc_c: 6384.6
mc_scaled_8tap_regular_w16_16bpc_ssse3: 1004.4
mc_scaled_8tap_regular_w16_16bpc_avx2: 665.0
mc_scaled_8tap_regular_w16_dy1_16bpc_c: 6103.4
mc_scaled_8tap_regular_w16_dy1_16bpc_ssse3: 896.3
mc_scaled_8tap_regular_w16_dy1_16bpc_avx2: 544.2
mc_scaled_8tap_regular_w16_dy2_16bpc_c: 8584.5
mc_scaled_8tap_regular_w16_dy2_16bpc_ssse3: 1049.0
mc_scaled_8tap_regular_w16_dy2_16bpc_avx2: 695.1
mc_scaled_8tap_regular_w32_16bpc_c: 19672.8
mc_scaled_8tap_regular_w32_16bpc_ssse3: 3204.3
mc_scaled_8tap_regular_w32_16bpc_avx2: 2109.6
mc_scaled_8tap_regular_w32_dy1_16bpc_c: 15964.6
mc_scaled_8tap_regular_w32_dy1_16bpc_ssse3: 2634.5
mc_scaled_8tap_regular_w32_dy1_16bpc_avx2: 1555.8
mc_scaled_8tap_regular_w32_dy2_16bpc_c: 24156.9
mc_scaled_8tap_regular_w32_dy2_16bpc_ssse3: 3217.3
mc_scaled_8tap_regular_w32_dy2_16bpc_avx2: 2088.8
mc_scaled_8tap_regular_w64_16bpc_c: 74356.3
mc_scaled_8tap_regular_w64_16bpc_ssse3: 11225.9
mc_scaled_8tap_regular_w64_16bpc_avx2: 7434.7
mc_scaled_8tap_regular_w64_dy1_16bpc_c: 60080.9
mc_scaled_8tap_regular_w64_dy1_16bpc_ssse3: 8912.8
mc_scaled_8tap_regular_w64_dy1_16bpc_avx2: 5222.2
mc_scaled_8tap_regular_w64_dy2_16bpc_c: 88891.4
mc_scaled_8tap_regular_w64_dy2_16bpc_ssse3: 10824.8
mc_scaled_8tap_regular_w64_dy2_16bpc_avx2: 7086.3
mc_scaled_8tap_regular_w128_16bpc_c: 171633.3
mc_scaled_8tap_regular_w128_16bpc_ssse3: 27089.3
mc_scaled_8tap_regular_w128_16bpc_avx2: 17998.2
mc_scaled_8tap_regular_w128_dy1_16bpc_c: 164399.9
mc_scaled_8tap_regular_w128_dy1_16bpc_ssse3: 24694.1
mc_scaled_8tap_regular_w128_dy1_16bpc_avx2: 14711.2
mc_scaled_8tap_regular_w128_dy2_16bpc_c: 244865.3
mc_scaled_8tap_regular_w128_dy2_16bpc_ssse3: 30599.1
mc_scaled_8tap_regular_w128_dy2_16bpc_avx2: 20341.1
mct_scaled_8tap_regular_w4_16bpc_c: 946.2
mct_scaled_8tap_regular_w4_16bpc_ssse3: 117.5
mct_scaled_8tap_regular_w4_16bpc_avx2: 112.5
mct_scaled_8tap_regular_w4_dy1_16bpc_c: 886.1
mct_scaled_8tap_regular_w4_dy1_16bpc_ssse3: 100.5
mct_scaled_8tap_regular_w4_dy1_16bpc_avx2: 76.8
mct_scaled_8tap_regular_w4_dy2_16bpc_c: 1170.1
mct_scaled_8tap_regular_w4_dy2_16bpc_ssse3: 117.6
mct_scaled_8tap_regular_w4_dy2_16bpc_avx2: 87.9
mct_scaled_8tap_regular_w8_16bpc_c: 2784.2
mct_scaled_8tap_regular_w8_16bpc_ssse3: 408.5
mct_scaled_8tap_regular_w8_16bpc_avx2: 280.3
mct_scaled_8tap_regular_w8_dy1_16bpc_c: 2530.5
mct_scaled_8tap_regular_w8_dy1_16bpc_ssse3: 358.2
mct_scaled_8tap_regular_w8_dy1_16bpc_avx2: 227.1
mct_scaled_8tap_regular_w8_dy2_16bpc_c: 3525.0
mct_scaled_8tap_regular_w8_dy2_16bpc_ssse3: 425.6
mct_scaled_8tap_regular_w8_dy2_16bpc_avx2: 283.6
mct_scaled_8tap_regular_w16_16bpc_c: 6773.8
mct_scaled_8tap_regular_w16_16bpc_ssse3: 1054.6
mct_scaled_8tap_regular_w16_16bpc_avx2: 696.4
mct_scaled_8tap_regular_w16_dy1_16bpc_c: 6418.0
mct_scaled_8tap_regular_w16_dy1_16bpc_ssse3: 938.7
mct_scaled_8tap_regular_w16_dy1_16bpc_avx2: 584.5
mct_scaled_8tap_regular_w16_dy2_16bpc_c: 9432.4
mct_scaled_8tap_regular_w16_dy2_16bpc_ssse3: 1125.3
mct_scaled_8tap_regular_w16_dy2_16bpc_avx2: 753.1
mct_scaled_8tap_regular_w32_16bpc_c: 26028.8
mct_scaled_8tap_regular_w32_16bpc_ssse3: 4128.4
mct_scaled_8tap_regular_w32_16bpc_avx2: 2748.4
mct_scaled_8tap_regular_w32_dy1_16bpc_c: 21604.3
mct_scaled_8tap_regular_w32_dy1_16bpc_ssse3: 3312.4
mct_scaled_8tap_regular_w32_dy1_16bpc_avx2: 2051.1
mct_scaled_8tap_regular_w32_dy2_16bpc_c: 32844.3
mct_scaled_8tap_regular_w32_dy2_16bpc_ssse3: 4102.9
mct_scaled_8tap_regular_w32_dy2_16bpc_avx2: 2741.6
mct_scaled_8tap_regular_w64_16bpc_c: 49101.8
mct_scaled_8tap_regular_w64_16bpc_ssse3: 8758.9
mct_scaled_8tap_regular_w64_16bpc_avx2: 5822.2
mct_scaled_8tap_regular_w64_dy1_16bpc_c: 53557.7
mct_scaled_8tap_regular_w64_dy1_16bpc_ssse3: 8469.7
mct_scaled_8tap_regular_w64_dy1_16bpc_avx2: 5264.3
mct_scaled_8tap_regular_w64_dy2_16bpc_c: 83379.7
mct_scaled_8tap_regular_w64_dy2_16bpc_ssse3: 10623.7
mct_scaled_8tap_regular_w64_dy2_16bpc_avx2: 7164.0
mct_scaled_8tap_regular_w128_16bpc_c: 163182.2
mct_scaled_8tap_regular_w128_16bpc_ssse3: 26452.9
mct_scaled_8tap_regular_w128_16bpc_avx2: 18402.2
mct_scaled_8tap_regular_w128_dy1_16bpc_c: 148199.8
mct_scaled_8tap_regular_w128_dy1_16bpc_ssse3: 23584.9
mct_scaled_8tap_regular_w128_dy1_16bpc_avx2: 14808.1
mct_scaled_8tap_regular_w128_dy2_16bpc_c: 234702.2
mct_scaled_8tap_regular_w128_dy2_16bpc_ssse3: 29653.8
mct_scaled_8tap_regular_w128_dy2_16bpc_avx2: 20042.4
Edited by Victorien Le Couviour--Tuffet

Merge request reports