diff options
author | RĂ©mi Denis-Courmont <remi@remlab.net> | 2022-09-26 17:52:42 +0300 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-09-27 13:19:52 +0200 |
commit | f41ae62f39ad1f91a3817325fdbba7304aba4641 (patch) | |
tree | d2ad7e36056dcba6b8d815ef7683f40808863e48 | |
parent | f127a5d29d7aee99cb4dd4d576d375c678b3c388 (diff) | |
download | ffmpeg-f41ae62f39ad1f91a3817325fdbba7304aba4641.tar.gz |
lavc/audiodsp: RISC-V V scalarproduct_int16
-rw-r--r-- | libavcodec/riscv/audiodsp_init.c | 5 | ||||
-rw-r--r-- | libavcodec/riscv/audiodsp_rvv.S | 19 |
2 files changed, 23 insertions, 1 deletions
diff --git a/libavcodec/riscv/audiodsp_init.c b/libavcodec/riscv/audiodsp_init.c index 9c9265531d..32c3c6794d 100644 --- a/libavcodec/riscv/audiodsp_init.c +++ b/libavcodec/riscv/audiodsp_init.c @@ -26,6 +26,7 @@ void ff_vector_clipf_rvf(float *dst, const float *src, int len, float min, float max); +int32_t ff_scalarproduct_int16_rvv(const int16_t *v1, const int16_t *v2, int len); void ff_vector_clip_int32_rvv(int32_t *dst, const int32_t *src, int32_t min, int32_t max, unsigned int len); void ff_vector_clipf_rvv(float *dst, const float *src, int len, float min, float max); @@ -37,8 +38,10 @@ av_cold void ff_audiodsp_init_riscv(AudioDSPContext *c) if (flags & AV_CPU_FLAG_RVF) c->vector_clipf = ff_vector_clipf_rvf; #if HAVE_RVV - if (flags & AV_CPU_FLAG_RVV_I32) + if (flags & AV_CPU_FLAG_RVV_I32) { + c->scalarproduct_int16 = ff_scalarproduct_int16_rvv; c->vector_clip_int32 = ff_vector_clip_int32_rvv; + } if (flags & AV_CPU_FLAG_RVV_F32) c->vector_clipf = ff_vector_clipf_rvv; #endif diff --git a/libavcodec/riscv/audiodsp_rvv.S b/libavcodec/riscv/audiodsp_rvv.S index 427b424cb9..f4308f27c5 100644 --- a/libavcodec/riscv/audiodsp_rvv.S +++ b/libavcodec/riscv/audiodsp_rvv.S @@ -20,6 +20,25 @@ #include "libavutil/riscv/asm.S" +func ff_scalarproduct_int16_rvv, zve32x + vsetvli zero, zero, e16, m1, ta, ma + vmv.s.x v8, zero +1: + vsetvli t0, a2, e16, m1, ta, ma + vle16.v v16, (a0) + sub a2, a2, t0 + vle16.v v24, (a1) + sh1add a0, t0, a0 + vwmul.vv v0, v16, v24 + sh1add a1, t0, a1 + vsetvli zero, t0, e32, m2, ta, ma + vredsum.vs v8, v0, v8 + bnez a2, 1b + + vmv.x.s a0, v8 + ret +endfunc + func ff_vector_clip_int32_rvv, zve32x 1: vsetvli t0, a4, e32, m1, ta, ma |