• Janne Grunau's avatar
    arm64/mc: add 8-bit neon asm for avg, w_avg and mask · 80e47425
    Janne Grunau authored
    checkasm --bench on a Qualcomm Kryo (Sanpdragon 820):
    nop: 33.0
    avg_w4_8bpc_c: 450.5
    avg_w4_8bpc_neon: 20.1
    avg_w8_8bpc_c: 438.6
    avg_w8_8bpc_neon: 45.2
    avg_w16_8bpc_c: 1003.7
    avg_w16_8bpc_neon: 112.8
    avg_w32_8bpc_c: 3249.6
    avg_w32_8bpc_neon: 429.9
    avg_w64_8bpc_c: 7213.3
    avg_w64_8bpc_neon: 1299.4
    avg_w128_8bpc_c: 16791.3
    avg_w128_8bpc_neon: 2978.4
    w_avg_w4_8bpc_c: 605.7
    w_avg_w4_8bpc_neon: 30.9
    w_avg_w8_8bpc_c: 545.8
    w_avg_w8_8bpc_neon: 72.9
    w_avg_w16_8bpc_c: 1430.1
    w_avg_w16_8bpc_neon: 193.5
    w_avg_w32_8bpc_c: 4876.3
    w_avg_w32_8bpc_neon: 715.3
    w_avg_w64_8bpc_c: 11338.0
    w_avg_w64_8bpc_neon: 2147.0
    w_avg_w128_8bpc_c: 26822.0
    w_avg_w128_8bpc_neon: 4596.3
    mask_w4_8bpc_c: 604.6
    mask_w4_8bpc_neon: 37.2
    mask_w8_8bpc_c: 654.8
    mask_w8_8bpc_neon: 96.0
    mask_w16_8bpc_c: 1663.0
    mask_w16_8bpc_neon: 272.4
    mask_w32_8bpc_c: 5707.6
    mask_w32_8bpc_neon: 1028.9
    mask_w64_8bpc_c: 12735.3
    mask_w64_8bpc_neon: 2533.2
    mask_w128_8bpc_c: 31027.6
    mask_w128_8bpc_neon: 6247.2
    80e47425
meson.build 5.85 KB