Commit 8bbcd3f7 authored by Martin Storsjö's avatar Martin Storsjö

arm: Add a _neon suffix to all internal functions

This eases disambiguating these functions when looking at perf
profiles.
parent 556780b7
Pipeline #5997 passed with stages
in 7 minutes and 20 seconds
...@@ -218,7 +218,7 @@ bidir_fn mask ...@@ -218,7 +218,7 @@ bidir_fn mask
// This has got the same signature as the put_8tap functions, // This has got the same signature as the put_8tap functions,
// assumes that the caller has loaded the h argument into r5, // assumes that the caller has loaded the h argument into r5,
// and assumes that r8 is set to (clz(w)-24). // and assumes that r8 is set to (clz(w)-24).
function put function put_neon
adr r9, L(put_tbl) adr r9, L(put_tbl)
ldr r8, [r9, r8, lsl #2] ldr r8, [r9, r8, lsl #2]
add r9, r9, r8 add r9, r9, r8
...@@ -309,7 +309,7 @@ endfunc ...@@ -309,7 +309,7 @@ endfunc
// This has got the same signature as the put_8tap functions, // This has got the same signature as the put_8tap functions,
// assumes that the caller has loaded the h argument into r4, // assumes that the caller has loaded the h argument into r4,
// and assumes that r8 is set to (clz(w)-24), and r7 to w*2. // and assumes that r8 is set to (clz(w)-24), and r7 to w*2.
function prep function prep_neon
adr r9, L(prep_tbl) adr r9, L(prep_tbl)
ldr r8, [r9, r8, lsl #2] ldr r8, [r9, r8, lsl #2]
add r9, r9, r8 add r9, r9, r8
...@@ -660,7 +660,7 @@ function \op\()_8tap_\type\()_8bpc_neon, export=1 ...@@ -660,7 +660,7 @@ function \op\()_8tap_\type\()_8bpc_neon, export=1
push {r4-r11,lr} push {r4-r11,lr}
movw r8, \type_h movw r8, \type_h
movw r9, \type_v movw r9, \type_v
b \op\()_8tap b \op\()_8tap_neon
endfunc endfunc
.endm .endm
...@@ -680,7 +680,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP ...@@ -680,7 +680,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP
make_8tap_fn \type, sharp_regular, SHARP, REGULAR make_8tap_fn \type, sharp_regular, SHARP, REGULAR
make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH
function \type\()_8tap function \type\()_8tap_neon
ldrd r4, r5, [sp, #36] ldrd r4, r5, [sp, #36]
ldrd r6, r7, [sp, #44] ldrd r6, r7, [sp, #44]
movw r10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) movw r10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0)
...@@ -699,7 +699,7 @@ function \type\()_8tap ...@@ -699,7 +699,7 @@ function \type\()_8tap
bne L(\type\()_8tap_h) bne L(\type\()_8tap_h)
tst \my, #(0x7f << 14) tst \my, #(0x7f << 14)
bne L(\type\()_8tap_v) bne L(\type\()_8tap_v)
b \type b \type\()_neon
L(\type\()_8tap_h): L(\type\()_8tap_h):
cmp \w, #4 cmp \w, #4
...@@ -1831,7 +1831,7 @@ function \type\()_bilin_8bpc_neon, export=1 ...@@ -1831,7 +1831,7 @@ function \type\()_bilin_8bpc_neon, export=1
bne L(\type\()_bilin_h) bne L(\type\()_bilin_h)
cmp \my, #0 cmp \my, #0
bne L(\type\()_bilin_v) bne L(\type\()_bilin_v)
b \type b \type\()_neon
L(\type\()_bilin_h): L(\type\()_bilin_h):
cmp \my, #0 cmp \my, #0
......
...@@ -236,7 +236,7 @@ bidir_fn mask ...@@ -236,7 +236,7 @@ bidir_fn mask
// This has got the same signature as the put_8tap functions, // This has got the same signature as the put_8tap functions,
// and assumes that x8 is set to (clz(w)-24). // and assumes that x8 is set to (clz(w)-24).
function put function put_neon
adr x9, L(put_tbl) adr x9, L(put_tbl)
ldrh w8, [x9, x8, lsl #1] ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw sub x9, x9, w8, uxtw
...@@ -331,7 +331,7 @@ endfunc ...@@ -331,7 +331,7 @@ endfunc
// This has got the same signature as the prep_8tap functions, // This has got the same signature as the prep_8tap functions,
// and assumes that x8 is set to (clz(w)-24), and x7 to w*2. // and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
function prep function prep_neon
adr x9, L(prep_tbl) adr x9, L(prep_tbl)
ldrh w8, [x9, x8, lsl #1] ldrh w8, [x9, x8, lsl #1]
sub x9, x9, w8, uxtw sub x9, x9, w8, uxtw
...@@ -703,7 +703,7 @@ endfunc ...@@ -703,7 +703,7 @@ endfunc
function \op\()_8tap_\type\()_8bpc_neon, export=1 function \op\()_8tap_\type\()_8bpc_neon, export=1
mov x8, \type_h mov x8, \type_h
mov x9, \type_v mov x9, \type_v
b \op\()_8tap b \op\()_8tap\()_neon
endfunc endfunc
.endm .endm
...@@ -723,7 +723,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP ...@@ -723,7 +723,7 @@ make_8tap_fn \type, sharp, SHARP, SHARP
make_8tap_fn \type, sharp_regular, SHARP, REGULAR make_8tap_fn \type, sharp_regular, SHARP, REGULAR
make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH make_8tap_fn \type, sharp_smooth, SHARP, SMOOTH
function \type\()_8tap function \type\()_8tap_neon
mov w10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) mov w10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0)
mul \mx, \mx, w10 mul \mx, \mx, w10
mul \my, \my, w10 mul \my, \my, w10
...@@ -741,7 +741,7 @@ function \type\()_8tap ...@@ -741,7 +741,7 @@ function \type\()_8tap
b.ne L(\type\()_8tap_h) b.ne L(\type\()_8tap_h)
tst \my, #(0x7f << 14) tst \my, #(0x7f << 14)
b.ne L(\type\()_8tap_v) b.ne L(\type\()_8tap_v)
b \type b \type\()_neon
L(\type\()_8tap_h): L(\type\()_8tap_h):
cmp \w, #4 cmp \w, #4
...@@ -1826,7 +1826,7 @@ function \type\()_bilin_8bpc_neon, export=1 ...@@ -1826,7 +1826,7 @@ function \type\()_bilin_8bpc_neon, export=1
sub w8, w8, #24 sub w8, w8, #24
cbnz \mx, L(\type\()_bilin_h) cbnz \mx, L(\type\()_bilin_h)
cbnz \my, L(\type\()_bilin_v) cbnz \my, L(\type\()_bilin_v)
b \type b \type\()_neon
L(\type\()_bilin_h): L(\type\()_bilin_h):
cbnz \my, L(\type\()_bilin_hv) cbnz \my, L(\type\()_bilin_hv)
...@@ -2335,7 +2335,7 @@ filter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6 ...@@ -2335,7 +2335,7 @@ filter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6
add \src, \src, \inc add \src, \src, \inc
.endm .endm
function warp_filter_horz function warp_filter_horz_neon
add w12, w5, #512 add w12, w5, #512
ld1 {v16.8b, v17.8b}, [x2], x3 ld1 {v16.8b, v17.8b}, [x2], x3
...@@ -2431,24 +2431,24 @@ function warp_affine_8x8\t\()_8bpc_neon, export=1 ...@@ -2431,24 +2431,24 @@ function warp_affine_8x8\t\()_8bpc_neon, export=1
lsl x1, x1, #1 lsl x1, x1, #1
.endif .endif
bl warp_filter_horz bl warp_filter_horz_neon
mov v24.16b, v16.16b mov v24.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v25.16b, v16.16b mov v25.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v26.16b, v16.16b mov v26.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v27.16b, v16.16b mov v27.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v28.16b, v16.16b mov v28.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v29.16b, v16.16b mov v29.16b, v16.16b
bl warp_filter_horz bl warp_filter_horz_neon
mov v30.16b, v16.16b mov v30.16b, v16.16b
1: 1:
add w14, w6, #512 add w14, w6, #512
bl warp_filter_horz bl warp_filter_horz_neon
mov v31.16b, v16.16b mov v31.16b, v16.16b
load_filter_row d0, w14, w9 load_filter_row d0, w14, w9
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment