Commit 65a1aafd authored by Martin Storsjö's avatar Martin Storsjö
Browse files

arm: mc: Avoid an unnecessary mov in 8tap_hv w2

This matches how the same logic is written for w4 and above.
parent 458273ed
......@@ -1951,11 +1951,10 @@ L(\type\()_8tap_hv_tbl):
bl L(\type\()_8tap_filter_2)
vext.8 d18, d17, d26, #4
vmov d19, d26
vmull.s16 q2, d16, d2[0]
vmlal.s16 q2, d17, d2[1]
vmlal.s16 q2, d18, d2[2]
vmlal.s16 q2, d19, d2[3]
vmlal.s16 q2, d26, d2[3]
vqrshrn.s32 d4, q2, #\shift_hv
vqmovun.s16 d4, q2
......@@ -1964,7 +1963,7 @@ L(\type\()_8tap_hv_tbl):
vst1.16 {d4[1]}, [\ds2, :16], \d_strd
ble 0f
vmov d16, d18
vmov d17, d19
vmov d17, d26
b 2b
280: // 2x8, 2x16, 2x32 hv
......@@ -2001,7 +2000,6 @@ L(\type\()_8tap_hv_tbl):
28:
bl L(\type\()_8tap_filter_2)
vext.8 d22, d21, d26, #4
vmov d23, d26
vmull.s16 q2, d16, d2[0]
vmlal.s16 q2, d17, d2[1]
vmlal.s16 q2, d18, d2[2]
......@@ -2009,7 +2007,7 @@ L(\type\()_8tap_hv_tbl):
vmlal.s16 q2, d20, d3[0]
vmlal.s16 q2, d21, d3[1]
vmlal.s16 q2, d22, d3[2]
vmlal.s16 q2, d23, d3[3]
vmlal.s16 q2, d26, d3[3]
vqrshrn.s32 d4, q2, #\shift_hv
vqmovun.s16 d4, q2
......@@ -2022,7 +2020,7 @@ L(\type\()_8tap_hv_tbl):
vmov d18, d20
vmov d19, d21
vmov d20, d22
vmov d21, d23
vmov d21, d26
b 28b
0:
......
......@@ -1906,11 +1906,10 @@ L(\type\()_8tap_hv):
bl L(\type\()_8tap_filter_2)
ext v18.8b, v17.8b, v28.8b, #4
mov v19.8b, v28.8b
smull v2.4s, v16.4h, v1.h[0]
smlal v2.4s, v17.4h, v1.h[1]
smlal v2.4s, v18.4h, v1.h[2]
smlal v2.4s, v19.4h, v1.h[3]
smlal v2.4s, v28.4h, v1.h[3]
sqrshrn v2.4h, v2.4s, #\shift_hv
sqxtun v2.8b, v2.8h
......@@ -1919,7 +1918,7 @@ L(\type\()_8tap_hv):
st1 {v2.h}[1], [\ds2], \d_strd
b.le 0f
mov v16.8b, v18.8b
mov v17.8b, v19.8b
mov v17.8b, v28.8b
b 2b
280: // 2x8, 2x16, 2x32 hv
......@@ -1956,7 +1955,6 @@ L(\type\()_8tap_hv):
28:
bl L(\type\()_8tap_filter_2)
ext v22.8b, v21.8b, v28.8b, #4
mov v23.8b, v28.8b
smull v2.4s, v16.4h, v1.h[0]
smlal v2.4s, v17.4h, v1.h[1]
smlal v2.4s, v18.4h, v1.h[2]
......@@ -1964,7 +1962,7 @@ L(\type\()_8tap_hv):
smlal v2.4s, v20.4h, v1.h[4]
smlal v2.4s, v21.4h, v1.h[5]
smlal v2.4s, v22.4h, v1.h[6]
smlal v2.4s, v23.4h, v1.h[7]
smlal v2.4s, v28.4h, v1.h[7]
sqrshrn v2.4h, v2.4s, #\shift_hv
sqxtun v2.8b, v2.8h
......@@ -1977,7 +1975,7 @@ L(\type\()_8tap_hv):
mov v18.8b, v20.8b
mov v19.8b, v21.8b
mov v20.8b, v22.8b
mov v21.8b, v23.8b
mov v21.8b, v28.8b
b 28b
0:
......
......@@ -2057,11 +2057,10 @@ L(\type\()_8tap_hv):
bl L(\type\()_8tap_filter_2)
ext v18.16b, v17.16b, v24.16b, #8
mov v19.16b, v24.16b
mul v2.4s, v16.4s, v1.s[0]
mla v2.4s, v17.4s, v1.s[1]
mla v2.4s, v18.4s, v1.s[2]
mla v2.4s, v19.4s, v1.s[3]
mla v2.4s, v24.4s, v1.s[3]
srshl v2.4s, v2.4s, v29.4s // -(6+intermediate_bits)
sqxtun v2.4h, v2.4s
......@@ -2071,7 +2070,7 @@ L(\type\()_8tap_hv):
st1 {v2.s}[1], [\ds2], \d_strd
b.le 0f
mov v16.16b, v18.16b
mov v17.16b, v19.16b
mov v17.16b, v24.16b
b 2b
280: // 2x8, 2x16, 2x32 hv
......@@ -2109,7 +2108,6 @@ L(\type\()_8tap_hv):
28:
bl L(\type\()_8tap_filter_2)
ext v22.16b, v21.16b, v24.16b, #8
mov v23.16b, v24.16b
mul v3.4s, v16.4s, v1.s[0]
mla v3.4s, v17.4s, v1.s[1]
mla v3.4s, v18.4s, v1.s[2]
......@@ -2117,7 +2115,7 @@ L(\type\()_8tap_hv):
mla v3.4s, v20.4s, v2.s[0]
mla v3.4s, v21.4s, v2.s[1]
mla v3.4s, v22.4s, v2.s[2]
mla v3.4s, v23.4s, v2.s[3]
mla v3.4s, v24.4s, v2.s[3]
srshl v3.4s, v3.4s, v29.4s // -(6+intermediate_bits)
sqxtun v3.4h, v3.4s
......@@ -2131,7 +2129,7 @@ L(\type\()_8tap_hv):
mov v18.16b, v20.16b
mov v19.16b, v21.16b
mov v20.16b, v22.16b
mov v21.16b, v23.16b
mov v21.16b, v24.16b
b 28b
0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment