Skip to content
  • Martin Storsjö's avatar
    arm: mc: Implement 8tap and bilin functions · 191f79d5
    Martin Storsjö authored and Jean-Baptiste Kempf's avatar Jean-Baptiste Kempf committed
    Relative speedups measured with checkasm:
                                     Cortex A7     A8     A9    A53   Snapdragon 835
    mc_8tap_regular_w2_0_8bpc_neon:       9.63   4.05   3.82   5.41   5.68
    mc_8tap_regular_w2_h_8bpc_neon:       3.30   5.44   3.38   3.88   5.12
    mc_8tap_regular_w2_hv_8bpc_neon:      3.86   6.21   4.39   5.18   6.10
    mc_8tap_regular_w2_v_8bpc_neon:       4.69   5.43   3.56   7.27   4.86
    mc_8tap_regular_w4_0_8bpc_neon:       9.13   4.05   5.24   5.37   6.60
    mc_8tap_regular_w4_h_8bpc_neon:       4.38   7.11   4.61   6.59   7.15
    mc_8tap_regular_w4_hv_8bpc_neon:      5.11   9.77   7.37   9.21  10.29
    mc_8tap_regular_w4_v_8bpc_neon:       6.24   7.88   4.96  11.16   7.89
    mc_8tap_regular_w8_0_8bpc_neon:       9.12   4.20   5.59   5.59   9.25
    mc_8tap_regular_w8_h_8bpc_neon:       5.91   8.42   4.84   8.46   7.08
    mc_8tap_regular_w8_hv_8bpc_neon:      5.46   8.35   6.52   7.19   8.33
    mc_8tap_regular_w8_v_8bpc_neon:       7.53   8.96   6.28  16.08  10.66
    mc_8tap_regular_w16_0_8bpc_neon:      9.77   5.46   4.06   7.02   7.38
    mc_8tap_regular_w16_h_8bpc_neon:      6.33   8.87   5.03  10.30   4.29
    mc_8tap_regular_w16_hv_8bpc_neon:     5.00   7.84   6.15   6.83   7.44
    mc_8tap_regular_w16_v_8bpc_neon:      7.74   8.81   6.23  19.24  11.16
    mc_8tap_regular_w32_0_8bpc_neon:      6.11   4.63   2.44   5.92   4.70
    mc_8tap_regular_w32_h_8bpc_neon:      6.60   9.02   5.20  11.08   3.50
    mc_8tap_regular_w32_hv_8bpc_neon:     4.85   7.64   6.09   6.68   6.92
    mc_8tap_regular_w32_v_8bpc_neon:      7.61   8.36   6.13  19.94  11.17
    mc_8tap_regular_w64_0_8bpc_neon:      4.61   3.81   1.60   3.50   2.73
    mc_8tap_regular_w64_h_8bpc_neon:      6.72   9.07   5.21  11.41   3.10
    mc_8tap_regular_w64_hv_8bpc_neon:     4.67   7.43   5.92   6.43   6.59
    mc_8tap_regular_w64_v_8bpc_neon:      7.64   8.28   6.07  20.48  11.41
    mc_8tap_regular_w128_0_8bpc_neon:     2.41   3.13   1.11   2.31   1.73
    mc_8tap_regular_w128_h_8bpc_neon:     6.68   9.03   5.09  11.41   2.90
    mc_8tap_regular_w128_hv_8bpc_neon:    4.50   7.39   5.70   6.26   6.47
    mc_8tap_regular_w128_v_8bpc_neon:     7.21   8.23   5.88  19.82  11.42
    mc_bilinear_w2_0_8bpc_neon:           9.23   4.03   3.74   5.33   6.49
    mc_bilinear_w2_h_8bpc_neon:           2.07   3.52   2.71   2.35   3.40
    mc_bilinear_w2_hv_8bpc_neon:          2.60   5.24   2.73   2.74   3.89
    mc_bilinear_w2_v_8bpc_neon:           2.57   4.39   3.14   3.04   4.05
    mc_bilinear_w4_0_8bpc_neon:           8.74   4.03   5.38   5.28   6.53
    mc_bilinear_w4_h_8bpc_neon:           3.41   6.22   4.28   3.86   7.56
    mc_bilinear_w4_hv_8bpc_neon:          4.38   7.45   4.61   5.26   7.95
    mc_bilinear_w4_v_8bpc_neon:           3.65   6.57   4.51   4.45   7.62
    mc_bilinear_w8_0_8bpc_neon:           8.74   4.50   5.71   5.46   9.39
    mc_bilinear_w8_h_8bpc_neon:           6.14  10.71   6.78   6.88  14.10
    mc_bilinear_w8_hv_8bpc_neon:          7.11  12.80   8.24  11.08   7.83
    mc_bilinear_w8_v_8bpc_neon:           7.24  11.69   7.57   8.04  15.46
    mc_bilinear_w16_0_8bpc_neon:         10.01   5.47   4.07   6.97   7.64
    mc_bilinear_w16_h_8bpc_neon:          8.36  17.00   8.34  11.61   7.64
    mc_bilinear_w16_hv_8bpc_neon:         7.67  13.54   8.53  13.32   8.05
    mc_bilinear_w16_v_8bpc_neon:         10.19  22.56  10.52  15.39  10.62
    mc_bilinear_w32_0_8bpc_neon:          6.22   4.73   2.43   5.89   4.90
    mc_bilinear_w32_h_8bpc_neon:          9.47  18.96   9.34  13.10   7.24
    mc_bilinear_w32_hv_8bpc_neon:         7.95  13.15   9.49  13.78   8.71
    mc_bilinear_w32_v_8bpc_neon:         11.10  23.53  11.34  16.74   8.78
    mc_bilinear_w64_0_8bpc_neon:          4.58   3.82   1.59   3.46   2.71
    mc_bilinear_w64_h_8bpc_neon:         10.07  19.77   9.60  13.99   6.88
    mc_bilinear_w64_hv_8bpc_neon:         8.08  12.95   9.39  13.84   8.90
    mc_bilinear_w64_v_8bpc_neon:         11.49  23.85  11.12  17.13   7.90
    mc_bilinear_w128_0_8bpc_neon:         2.37   3.24   1.15   2.28   1.73
    mc_bilinear_w128_h_8bpc_neon:         9.94  18.84   8.66  13.91   6.74
    mc_bilinear_w128_hv_8bpc_neon:        7.26  12.82   8.97  12.43   8.88
    mc_bilinear_w128_v_8bpc_neon:         9.89  23.88   8.93  14.73   7.33
    mct_8tap_regular_w4_0_8bpc_neon:      2.82   4.46   2.72   3.50   5.41
    mct_8tap_regular_w4_h_8bpc_neon:      4.16   6.88   4.64   6.51   6.60
    mct_8tap_regular_w4_hv_8bpc_neon:     5.22   9.87   7.81   9.39  10.11
    mct_8tap_regular_w4_v_8bpc_neon:      5.81   7.72   4.80  10.16   6.85
    mct_8tap_regular_w8_0_8bpc_neon:      4.48   6.30   3.01   5.82   5.04
    mct_8tap_regular_w8_h_8bpc_neon:      5.59   8.04   4.18   8.68   8.30
    mct_8tap_regular_w8_hv_8bpc_neon:     5.34   8.32   6.42   7.04   7.99
    mct_8tap_regular_w8_v_8bpc_neon:      7.32   8.71   5.75  17.07   9.73
    mct_8tap_regular_w16_0_8bpc_neon:     5.05   9.60   3.64  10.06   4.29
    mct_8tap_regular_w16_h_8bpc_neon:     5.53   8.20   4.54   9.98   7.33
    mct_8tap_regular_w16_hv_8bpc_neon:    4.90   7.87   6.07   6.67   7.03
    mct_8tap_regular_w16_v_8bpc_neon:     7.39   8.55   5.72  19.64   9.98
    mct_8tap_regular_w32_0_8bpc_neon:     5.28   8.16   4.07  11.03   2.38
    mct_8tap_regular_w32_h_8bpc_neon:     5.97   8.31   4.67  10.63   6.72
    mct_8tap_regular_w32_hv_8bpc_neon:    4.73   7.65   5.98   6.51   6.31
    mct_8tap_regular_w32_v_8bpc_neon:     7.33   8.18   5.72  20.50  10.03
    mct_8tap_regular_w64_0_8bpc_neon:     5.11   9.19   4.01  10.61   1.92
    mct_8tap_regular_w64_h_8bpc_neon:     6.05   8.33   4.53  10.84   6.38
    mct_8tap_regular_w64_hv_8bpc_neon:    4.61   7.54   5.69   6.35   6.11
    mct_8tap_regular_w64_v_8bpc_neon:     7.27   8.06   5.39  20.41  10.15
    mct_8tap_regular_w128_0_8bpc_neon:    4.29   8.21   4.28   9.55   1.32
    mct_8tap_regular_w128_h_8bpc_neon:    6.01   8.26   4.43  10.78   6.20
    mct_8tap_regular_w128_hv_8bpc_neon:   4.49   7.49   5.46   6.11   5.96
    mct_8tap_regular_w128_v_8bpc_neon:    6.90   8.00   5.19  18.47  10.13
    mct_bilinear_w4_0_8bpc_neon:          2.70   4.53   2.67   3.32   5.11
    mct_bilinear_w4_h_8bpc_neon:          3.02   5.06   3.13   3.28   5.38
    mct_bilinear_w4_hv_8bpc_neon:         4.14   7.04   4.75   4.99   6.30
    mct_bilinear_w4_v_8bpc_neon:          3.17   5.30   3.66   3.87   5.01
    mct_bilinear_w8_0_8bpc_neon:          4.41   6.46   2.99   5.74   5.98
    mct_bilinear_w8_h_8bpc_neon:          5.36   8.27   3.62   6.39   9.06
    mct_bilinear_w8_hv_8bpc_neon:         6.65  11.82   6.79  11.47   7.07
    mct_bilinear_w8_v_8bpc_neon:          6.26   9.62   4.05   7.75  16.81
    mct_bilinear_w16_0_8bpc_neon:         4.86   9.85   3.61  10.03   4.19
    mct_bilinear_w16_h_8bpc_neon:         5.26  12.91   4.76   9.56   9.68
    mct_bilinear_w16_hv_8bpc_neon:        6.96  12.58   7.05  13.48   7.35
    mct_bilinear_w16_v_8bpc_neon:         6.46  17.94   5.72  13.70  19.20
    mct_bilinear_w32_0_8bpc_neon:         5.31   8.10   4.06  10.88   2.77
    mct_bilinear_w32_h_8bpc_neon:         6.91  14.28   5.33  11.24  10.33
    mct_bilinear_w32_hv_8bpc_neon:        7.13  12.21   7.57  13.91   7.19
    mct_bilinear_w32_v_8bpc_neon:         8.06  18.48   5.88  14.74  15.47
    mct_bilinear_w64_0_8bpc_neon:         5.08   7.29   3.83  10.44   1.71
    mct_bilinear_w64_h_8bpc_neon:         7.24  14.59   5.40  11.70  11.03
    mct_bilinear_w64_hv_8bpc_neon:        7.24  11.98   7.59  13.72   7.30
    mct_bilinear_w64_v_8bpc_neon:         8.20  18.24   5.69  14.57  15.04
    mct_bilinear_w128_0_8bpc_neon:        4.35   8.23   4.17   9.71   1.11
    mct_bilinear_w128_h_8bpc_neon:        7.02  13.80   5.63  11.11  11.26
    mct_bilinear_w128_hv_8bpc_neon:       6.31  11.89   6.75  12.12   7.24
    mct_bilinear_w128_v_8bpc_neon:        6.95  18.26   5.84  11.31  14.78
    191f79d5