aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2023-07-13 23:16:03 +0300
committerRémi Denis-Courmont <remi@remlab.net>2023-07-16 23:24:00 +0300
commitc541ecf0dc38b6da3ba3d290b0db7d147775931f (patch)
treefd3a6ef32d653a4155e28b7ef6cc43ef97767049
parenta28aa0475db99cc66c37c98fd5330e313817709c (diff)
downloadffmpeg-c541ecf0dc38b6da3ba3d290b0db7d147775931f.tar.gz
lavc/alacdsp: unroll RISC-V V loops
This increases the group multiplier as per T-Head C910 benchmarks: alac_append_extra_bits_mono_c: 803.0 alac_append_extra_bits_stereo_c: 1604.2 alac_decorrelate_stereo_c: 1077.5 LMUL=1 alac_append_extra_bits_mono_rvv_i32: 418.2 alac_append_extra_bits_stereo_rvv_i32: 693.2 alac_decorrelate_stereo_rvv_i32: 673.5 LMUL=2 alac_append_extra_bits_mono_rvv_i32: 382.2 alac_append_extra_bits_stereo_rvv_i32: 648.2 alac_decorrelate_stereo_rvv_i32: 542.7 LMUL=4 alac_append_extra_bits_mono_rvv_i32: 241.5 alac_append_extra_bits_stereo_rvv_i32: 512.7 alac_decorrelate_stereo_rvv_i32: 364.2 LMUL=8 alac_append_extra_bits_mono_rvv_i32: 239.7 alac_append_extra_bits_stereo_rvv_i32: 497.2 alac_decorrelate_stereo_rvv_i32: 426.7
-rw-r--r--libavcodec/riscv/alacdsp_rvv.S6
1 files changed, 3 insertions, 3 deletions
diff --git a/libavcodec/riscv/alacdsp_rvv.S b/libavcodec/riscv/alacdsp_rvv.S
index 8fbe3fbe77..8efb04e0c8 100644
--- a/libavcodec/riscv/alacdsp_rvv.S
+++ b/libavcodec/riscv/alacdsp_rvv.S
@@ -25,7 +25,7 @@ func ff_alac_decorrelate_stereo_rvv, zve32x
ld a4, 8(a0)
ld a0, 0(a0)
1:
- vsetvli t0, a1, e32, m1, ta, ma
+ vsetvli t0, a1, e32, m4, ta, ma
vle32.v v24, (a4)
sub a1, a1, t0
vle32.v v16, (a0)
@@ -47,7 +47,7 @@ func ff_alac_append_extra_bits_mono_rvv, zve32x
ld a0, (a0)
ld a1, (a1)
1:
- vsetvli t0, a4, e32, m1, ta, ma
+ vsetvli t0, a4, e32, m8, ta, ma
vle32.v v16, (a0)
sub a4, a4, t0
vle32.v v24, (a1)
@@ -67,7 +67,7 @@ func ff_alac_append_extra_bits_stereo_rvv, zve32x
ld a7, 8(a1)
ld a1, (a1)
1:
- vsetvli t0, a4, e32, m1, ta, ma
+ vsetvli t0, a4, e32, m8, ta, ma
vle32.v v16, (a0)
sub a4, a4, t0
vle32.v v0, (a6)