Commit 8bbcd3f7 authored by Martin Storsjö's avatar Martin Storsjö

arm: Add a _neon suffix to all internal functions

This eases disambiguating these functions when looking at perf
profiles.
parent 556780b7
Pipeline #5997 passed with stages
in 7 minutes and 20 seconds
......@@ -218,7 +218,7 @@ bidir_fn mask
// This has got the same signature as the put_8tap functions,
// assumes that the caller has loaded the h argument into r5,
// and assumes that r8 is set to (clz(w)-24).
function put
function put_neon
adr r9, L(put_tbl)
ldr r8, [r9, r8, lsl #2]
add r9, r9, r8
......@@ -309,7 +309,7 @@ endfunc
// This has got the same signature as the put_8tap functions,
// assumes that the caller has loaded the h argument into r4,
// and assumes that r8 is set to (clz(w)-24), and r7 to w*2.
function prep
function prep_neon
adr r9, L(prep_tbl)
ldr r8, [r9, r8, lsl #2]
add r9, r9, r8
......@@ -660,7 +660,7 @@ function \op\()_8tap_\type\()_8bpc_neon, export=1
push {r4-r11,lr}
movw r8, \type_h
movw r9, \type_v
b \op\()_8tap
b \op\()_8tap_neon
endfunc
.endm
......@@ -680,7 +680,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP
make_8tap_fn \type, sharp_regular, SHARP, REGULAR
make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH
function \type\()_8tap
function \type\()_8tap_neon
ldrd r4, r5, [sp, #36]
ldrd r6, r7, [sp, #44]
movw r10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0)
......@@ -699,7 +699,7 @@ function \type\()_8tap
bne L(\type\()_8tap_h)
tst \my, #(0x7f << 14)
bne L(\type\()_8tap_v)
b \type
b \type\()_neon
L(\type\()_8tap_h):
cmp \w, #4
......@@ -1831,7 +1831,7 @@ function \type\()_bilin_8bpc_neon, export=1
bne L(\type\()_bilin_h)
cmp \my, #0
bne L(\type\()_bilin_v)
b \type
b \type\()_neon
L(\type\()_bilin_h):
cmp \my, #0
......
......@@ -236,7 +236,7 @@ bidir_fn mask
// This has got the same signature as the put_8tap functions,
// and assumes that x8 is set to (clz(w)-24).
function put
function put_neon
adr x9, L(put_tbl)
ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw
......@@ -331,7 +331,7 @@ endfunc
// This has got the same signature as the prep_8tap functions,
// and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
function prep
function prep_neon
adr x9, L(prep_tbl)
ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw
......@@ -703,7 +703,7 @@ endfunc
function \op\()_8tap_\type\()_8bpc_neon, export=1
mov x8, \type_h
mov x9, \type_v
b \op\()_8tap
b \op\()_8tap\()_neon
endfunc
.endm
......@@ -723,7 +723,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP
make_8tap_fn \type, sharp_regular, SHARP, REGULAR
make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH
function \type\()_8tap
function \type\()_8tap_neon
mov w10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0)
mul \mx, \mx, w10
mul \my, \my, w10
......@@ -741,7 +741,7 @@ function \type\()_8tap
b.ne L(\type\()_8tap_h)
tst \my, #(0x7f << 14)
b.ne L(\type\()_8tap_v)
b \type
b \type\()_neon
L(\type\()_8tap_h):
cmp \w, #4
......@@ -1826,7 +1826,7 @@ function \type\()_bilin_8bpc_neon, export=1
sub w8, w8, #24
cbnz \mx, L(\type\()_bilin_h)
cbnz \my, L(\type\()_bilin_v)
b \type
b \type\()_neon
L(\type\()_bilin_h):
cbnz \my, L(\type\()_bilin_hv)
......@@ -2335,7 +2335,7 @@ filter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6
add \src, \src, \inc
.endm
function warp_filter_horz
function warp_filter_horz_neon
add w12, w5, #512
ld1 {v16.8b, v17.8b}, [x2], x3
......@@ -2431,24 +2431,24 @@ function warp_affine_8x8\t\()_8bpc_neon, export=1
lsl x1, x1, #1
.endif
bl warp_filter_horz
bl warp_filter_horz_neon
mov v24.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v25.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v26.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v27.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v28.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v29.16b, v16.16b
bl warp_filter_horz
bl warp_filter_horz_neon
mov v30.16b, v16.16b
1:
add w14, w6, #512
bl warp_filter_horz
bl warp_filter_horz_neon
mov v31.16b, v16.16b
load_filter_row d0, w14, w9
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment