Skip to content
Snippets Groups Projects
Commit b374b24c authored by Nathan E. Egge's avatar Nathan E. Egge Committed by Luca Barbato
Browse files

riscv64/mc: Branchless vsetvl in blend function

Kendryte K230

blend_w4_8bpc_c:       204.8 ( 1.00x)
blend_w4_8bpc_rvv:      59.8 ( 3.42x)
blend_w8_8bpc_c:       608.9 ( 1.00x)
blend_w8_8bpc_rvv:      87.2 ( 6.98x)
blend_w16_8bpc_c:     2362.4 ( 1.00x)
blend_w16_8bpc_rvv:    225.2 (10.49x)
blend_w32_8bpc_c:     5990.4 ( 1.00x)
blend_w32_8bpc_rvv:    518.3 (11.56x)

SpacemiT K1

blend_w4_8bpc_c:       201.6 ( 1.00x)
blend_w4_8bpc_rvv:      58.0 ( 3.48x)
blend_w8_8bpc_c:       595.1 ( 1.00x)
blend_w8_8bpc_rvv:      82.1 ( 7.25x)
blend_w16_8bpc_c:     2308.8 ( 1.00x)
blend_w16_8bpc_rvv:    189.0 (12.22x)
blend_w32_8bpc_c:     5853.1 ( 1.00x)
blend_w32_8bpc_rvv:    339.5 (17.24x)
parent 0e3f70e8
No related branches found
No related tags found
1 merge request!1731RVV Optimization batch
......@@ -27,47 +27,19 @@
#include "src/riscv/asm.S"
function blend_vl256_8bpc_rvv, export=1, ext=v
li t0, 4
beq a3, t0, 4f
li t0, 8
beq a3, t0, 8f
li t0, 16
beq a3, t0, 16f
32:
vsetvli zero, a3, e8, m1, ta, ma
j L(blend_epilog)
16:
vsetvli zero, a3, e8, mf2, ta, ma
j L(blend_epilog)
8:
vsetvli zero, a3, e8, mf4, ta, ma
j L(blend_epilog)
4:
vsetvli zero, a3, e8, mf8, ta, ma
function blend_vl256_8bpc_rvv, export=1, ext=zbb
ctz t0, a3
addi t0, t0, 0xc3
j L(blend_epilog)
endfunc
function blend_8bpc_rvv, export=1, ext=v
li t0, 4
beq a3, t0, 4f
li t0, 8
beq a3, t0, 8f
li t0, 16
beq a3, t0, 16f
32:
vsetvli zero, a3, e8, m2, ta, ma
j L(blend_epilog)
16:
vsetvli zero, a3, e8, m1, ta, ma
j L(blend_epilog)
8:
vsetvli zero, a3, e8, mf2, ta, ma
j L(blend_epilog)
4:
vsetvli zero, a3, e8, mf4, ta, ma
function blend_8bpc_rvv, export=1, ext="v,zbb"
ctz t0, a3
addi t0, t0, 0xc4
L(blend_epilog):
csrw vxrm, zero
andi t0, t0, 0xc7
vsetvl zero, a3, t0
li t1, 64
1:
addi a4, a4, -2
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment