lavc/audiodsp: RISC-V V scalarproduct_int16

author: Rémi Denis-Courmont <[email protected]> 2022-09-26 17:52:42 +0300
committer: Lynne <[email protected]> 2022-09-27 13:19:52 +0200
commit: f41ae62f39ad1f91a3817325fdbba7304aba4641 (patch)
tree: d2ad7e36056dcba6b8d815ef7683f40808863e48
parent: f127a5d29d7aee99cb4dd4d576d375c678b3c388 (diff)
2 files changed, 23 insertions, 1 deletions
diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c
index 9c9265531d..32c3c6794d 100644
--- a/libavcodec/riscv/audiodsp_init.c
+++ b/libavcodec/riscv/audiodsp_init.c
@@ -26,6 +26,7 @@
 
 void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max);
 
+int32_t ff_scalarproduct_int16_rvv(const int16_t *v1, const int16_t *v2, int len);
 void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min,
                               int32_t max, unsigned int len);
 void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max);
@@ -37,8 +38,10 @@ av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c)
     if (flags & AV_CPU_FLAG_RVF)
         c->vector_clipf = ff_vector_clipf_rvf;
 #if HAVE_RVV
-    if (flags & AV_CPU_FLAG_RVV_I32)
+    if (flags & AV_CPU_FLAG_RVV_I32) {
+        c->scalarproduct_int16 = ff_scalarproduct_int16_rvv;
         c->vector_clip_int32 = ff_vector_clip_int32_rvv;
+    }
     if (flags & AV_CPU_FLAG_RVV_F32)
         c->vector_clipf = ff_vector_clipf_rvv;
 #endif
diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S
index 427b424cb9..f4308f27c5 100644
--- a/libavcodec/riscv/audiodsp_rvv.S
+++ b/libavcodec/riscv/audiodsp_rvv.S
@@ -20,6 +20,25 @@
 
 #include "libavutil/riscv/asm.S"
 
+func ff_scalarproduct_int16_rvv, zve32x
+        vsetvli     zero, zero, e16, m1, ta, ma
+        vmv.s.x     v8, zero
+1:
+        vsetvli     t0, a2, e16, m1, ta, ma
+        vle16.v     v16, (a0)
+        sub         a2, a2, t0
+        vle16.v     v24, (a1)
+        sh1add      a0, t0, a0
+        vwmul.vv    v0, v16, v24
+        sh1add      a1, t0, a1
+        vsetvli     zero, t0, e32, m2, ta, ma
+        vredsum.vs  v8, v0, v8
+        bnez        a2, 1b
+
+        vmv.x.s     a0, v8
+        ret
+endfunc
+
 func ff_vector_clip_int32_rvv, zve32x
 1:
         vsetvli t0, a4, e32, m1, ta, ma
author	Rémi Denis-Courmont <[email protected]>	2022-09-26 17:52:42 +0300
committer	Lynne <[email protected]>	2022-09-27 13:19:52 +0200
commit	f41ae62f39ad1f91a3817325fdbba7304aba4641 (patch)
tree	d2ad7e36056dcba6b8d815ef7683f40808863e48
parent	f127a5d29d7aee99cb4dd4d576d375c678b3c388 (diff)