diff options
author | RĂ©mi Denis-Courmont <remi@remlab.net> | 2022-09-26 17:52:44 +0300 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-09-27 13:19:52 +0200 |
commit | 220dfd0945ee8481d8cdbf713f515a94ceee9992 (patch) | |
tree | 6934c2dde32ec146143dead4091de8bd39e207bb /libavcodec/riscv | |
parent | 47a10b9a99130457c27b220afba7d7de4a69bb18 (diff) | |
download | ffmpeg-220dfd0945ee8481d8cdbf713f515a94ceee9992.tar.gz |
lavc/fmtconvert: RISC-V V int32_to_float_fmul_array8
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r-- | libavcodec/riscv/fmtconvert_init.c | 7 | ||||
-rw-r--r-- | libavcodec/riscv/fmtconvert_rvv.S | 28 |
2 files changed, 34 insertions, 1 deletions
diff --git a/libavcodec/riscv/fmtconvert_init.c b/libavcodec/riscv/fmtconvert_init.c index b2c240c1ce..fd1a8e0ca1 100644 --- a/libavcodec/riscv/fmtconvert_init.c +++ b/libavcodec/riscv/fmtconvert_init.c @@ -27,13 +27,18 @@ void ff_int32_to_float_fmul_scalar_rvv(float *dst, const int32_t *src, float mul, int len); +void ff_int32_to_float_fmul_array8_rvv(FmtConvertContext *c, float *dst, + const int32_t *src, const float *mul, + int len); av_cold void ff_fmt_convert_init_riscv(FmtConvertContext *c) { #ifdef HAVE_RVV int flags = av_get_cpu_flags(); - if (flags & AV_CPU_FLAG_RVV_F32) + if (flags & AV_CPU_FLAG_RVV_F32) { c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_rvv; + c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_rvv; + } #endif } diff --git a/libavcodec/riscv/fmtconvert_rvv.S b/libavcodec/riscv/fmtconvert_rvv.S index b7c78831a0..c79f80cc47 100644 --- a/libavcodec/riscv/fmtconvert_rvv.S +++ b/libavcodec/riscv/fmtconvert_rvv.S @@ -37,3 +37,31 @@ NOHWF mv a2, a3 ret endfunc + +func ff_int32_to_float_fmul_array8_rvv, zve32f + srai a4, a4, 3 + +1: vsetvli t0, a4, e32, m1, ta, ma + vle32.v v24, (a3) + slli t2, t0, 2 + 3 + vlseg8e32.v v16, (a2) + vsetvli t3, zero, e32, m8, ta, ma + vfcvt.f.x.v v16, v16 + vsetvli zero, a4, e32, m1, ta, ma + vfmul.vv v16, v16, v24 + sub a4, a4, t0 + vfmul.vv v17, v17, v24 + sh2add a3, t0, a3 + vfmul.vv v18, v18, v24 + add a2, a2, t2 + vfmul.vv v19, v19, v24 + vfmul.vv v20, v20, v24 + vfmul.vv v21, v21, v24 + vfmul.vv v22, v22, v24 + vfmul.vv v23, v23, v24 + vsseg8e32.v v16, (a1) + add a1, a1, t2 + bnez a4, 1b + + ret +endfunc |