aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/riscv
diff options
context:
space:
mode:
authorRĂ©mi Denis-Courmont <remi@remlab.net>2022-09-26 17:52:44 +0300
committerLynne <dev@lynne.ee>2022-09-27 13:19:52 +0200
commit220dfd0945ee8481d8cdbf713f515a94ceee9992 (patch)
tree6934c2dde32ec146143dead4091de8bd39e207bb /libavcodec/riscv
parent47a10b9a99130457c27b220afba7d7de4a69bb18 (diff)
downloadffmpeg-220dfd0945ee8481d8cdbf713f515a94ceee9992.tar.gz
lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r--libavcodec/riscv/fmtconvert_init.c7
-rw-r--r--libavcodec/riscv/fmtconvert_rvv.S28
2 files changed, 34 insertions, 1 deletions
diff --git a/libavcodec/riscv/fmtconvert_init.c b/libavcodec/riscv/fmtconvert_init.c
index b2c240c1ce..fd1a8e0ca1 100644
--- a/libavcodec/riscv/fmtconvert_init.c
+++ b/libavcodec/riscv/fmtconvert_init.c
@@ -27,13 +27,18 @@
void ff_int32_to_float_fmul_scalar_rvv(float *dst, const int32_t *src,
float mul, int len);
+void ff_int32_to_float_fmul_array8_rvv(FmtConvertContext *c, float *dst,
+ const int32_t *src, const float *mul,
+ int len);
av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c)
{
#ifdef HAVE_RVV
int flags = av_get_cpu_flags();
- if (flags & AV_CPU_FLAG_RVV_F32)
+ if (flags & AV_CPU_FLAG_RVV_F32) {
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_rvv;
+ c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_rvv;
+ }
#endif
}
diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S
index b7c78831a0..c79f80cc47 100644
--- a/libavcodec/riscv/fmtconvert_rvv.S
+++ b/libavcodec/riscv/fmtconvert_rvv.S
@@ -37,3 +37,31 @@ NOHWF mv a2, a3
ret
endfunc
+
+func ff_int32_to_float_fmul_array8_rvv, zve32f
+ srai a4, a4, 3
+
+1: vsetvli t0, a4, e32, m1, ta, ma
+ vle32.v v24, (a3)
+ slli t2, t0, 2 + 3
+ vlseg8e32.v v16, (a2)
+ vsetvli t3, zero, e32, m8, ta, ma
+ vfcvt.f.x.v v16, v16
+ vsetvli zero, a4, e32, m1, ta, ma
+ vfmul.vv v16, v16, v24
+ sub a4, a4, t0
+ vfmul.vv v17, v17, v24
+ sh2add a3, t0, a3
+ vfmul.vv v18, v18, v24
+ add a2, a2, t2
+ vfmul.vv v19, v19, v24
+ vfmul.vv v20, v20, v24
+ vfmul.vv v21, v21, v24
+ vfmul.vv v22, v22, v24
+ vfmul.vv v23, v23, v24
+ vsseg8e32.v v16, (a1)
+ add a1, a1, t2
+ bnez a4, 1b
+
+ ret
+endfunc