Commit ad4d1c43 authored by Martin Storsjö's avatar Martin Storsjö Committed by Janne Grunau

arm64: looprestoration: Optimize loop termination checks in copy_narrow_neon

parent 8a52189f
Pipeline #4298 passed with stages
in 6 minutes and 41 seconds
...@@ -498,9 +498,8 @@ function copy_narrow_neon, export=1 ...@@ -498,9 +498,8 @@ function copy_narrow_neon, export=1
add x7, x0, x1 add x7, x0, x1
lsl x1, x1, #1 lsl x1, x1, #1
18: 18:
cmp w4, #8
b.lt 110f
subs w4, w4, #8 subs w4, w4, #8
b.lt 110f
ld1 {v0.8b}, [x2], #8 ld1 {v0.8b}, [x2], #8
st1 {v0.b}[0], [x0], x1 st1 {v0.b}[0], [x0], x1
st1 {v0.b}[1], [x7], x1 st1 {v0.b}[1], [x7], x1
...@@ -513,6 +512,7 @@ function copy_narrow_neon, export=1 ...@@ -513,6 +512,7 @@ function copy_narrow_neon, export=1
b.le 0f b.le 0f
b 18b b 18b
110: 110:
add w4, w4, #8
asr x1, x1, #1 asr x1, x1, #1
11: 11:
subs w4, w4, #1 subs w4, w4, #1
...@@ -526,9 +526,8 @@ function copy_narrow_neon, export=1 ...@@ -526,9 +526,8 @@ function copy_narrow_neon, export=1
add x7, x0, x1 add x7, x0, x1
lsl x1, x1, #1 lsl x1, x1, #1
24: 24:
cmp w4, #4
b.lt 210f
subs w4, w4, #4 subs w4, w4, #4
b.lt 210f
ld1 {v0.4h}, [x2], #8 ld1 {v0.4h}, [x2], #8
st1 {v0.h}[0], [x0], x1 st1 {v0.h}[0], [x0], x1
st1 {v0.h}[1], [x7], x1 st1 {v0.h}[1], [x7], x1
...@@ -537,6 +536,7 @@ function copy_narrow_neon, export=1 ...@@ -537,6 +536,7 @@ function copy_narrow_neon, export=1
b.le 0f b.le 0f
b 24b b 24b
210: 210:
add w4, w4, #4
asr x1, x1, #1 asr x1, x1, #1
22: 22:
subs w4, w4, #1 subs w4, w4, #1
...@@ -561,9 +561,8 @@ function copy_narrow_neon, export=1 ...@@ -561,9 +561,8 @@ function copy_narrow_neon, export=1
add x7, x0, x1 add x7, x0, x1
lsl x1, x1, #1 lsl x1, x1, #1
42: 42:
cmp w4, #2
b.lt 41f
subs w4, w4, #2 subs w4, w4, #2
b.lt 41f
ld1 {v0.2s}, [x2], #8 ld1 {v0.2s}, [x2], #8
st1 {v0.s}[0], [x0], x1 st1 {v0.s}[0], [x0], x1
st1 {v0.s}[1], [x7], x1 st1 {v0.s}[1], [x7], x1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment