diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2024-05-27 18:51:27 +0300 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-05-30 18:30:52 +0300 |
commit | 4fe8f2cc435bdbcaddd6ccfc401c0246b612873b (patch) | |
tree | f83519869c8655b4c8636c3a9f3fcfd555b91512 | |
parent | add8c46215397541210a4e7bc0e401ef52843698 (diff) | |
download | ffmpeg-4fe8f2cc435bdbcaddd6ccfc401c0246b612873b.tar.gz |
riscv: allow passing addend to vtype_vli macro
A constant (-1) is added to the length value, so we can have an added
for free, and optimise the addition away if the addend is exactly 1.
-rw-r--r-- | libavcodec/riscv/lpc_rvv.S | 2 | ||||
-rw-r--r-- | libavutil/riscv/asm.S | 9 |
2 files changed, 7 insertions, 4 deletions
diff --git a/libavcodec/riscv/lpc_rvv.S b/libavcodec/riscv/lpc_rvv.S index 8cf79963f1..fe80305d9a 100644 --- a/libavcodec/riscv/lpc_rvv.S +++ b/libavcodec/riscv/lpc_rvv.S @@ -87,8 +87,8 @@ func ff_lpc_apply_welch_window_rvv, zve64d endfunc func ff_lpc_compute_autocorr_rvv, zve64d, zbb + vtype_vli t1, a2, t2, e64, ta, ma, 1 addi a2, a2, 1 - vtype_vli t1, a2, t2, e64, ta, ma li t0, 1 vsetvl zero, a2, t1 fcvt.d.l ft0, t0 diff --git a/libavutil/riscv/asm.S b/libavutil/riscv/asm.S index 1e6358dcb5..2cf4f7b7ab 100644 --- a/libavutil/riscv/asm.S +++ b/libavutil/riscv/asm.S @@ -196,18 +196,21 @@ * @param ew element width: e8, e16, e32 or e64 * @param tp tail policy: tu or ta * @param mp mask policty: mu or ma + * @param addend optional addend for the vector length register */ - .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu + .macro vtype_vli rd, rs, tmp, ew, tp=tu, mp=mu, addend=0 parse_vtype \ew, \tp, \mp /* * The difference between the CLZ's notionally equals the VLMUL value * for 4-bit elements. But we want the value for SEW_MAX-bit elements. */ slli \tmp, \rs, 1 + VSEW_MAX + .if \addend - 1 + addi \tmp, \tmp, \addend - 1 + .endif csrr \rd, vlenb - addi \tmp, \tmp, -1 - clz \rd, \rd clz \tmp, \tmp + clz \rd, \rd sub \rd, \rd, \tmp max \rd, \rd, zero // VLMUL must be >= VSEW - VSEW_MAX .if vsew < VSEW_MAX |