diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2023-10-30 22:03:59 +0200 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2023-11-06 19:42:49 +0200 |
commit | f68ad5d2de40e42b57b9f44ec69d9cbc0f709460 (patch) | |
tree | 9fb0133c98ec706659c8e0be7a829f5998a0c3a0 | |
parent | 44a0148fadc97798acf6c93016a14e7c956f0825 (diff) | |
download | ffmpeg-f68ad5d2de40e42b57b9f44ec69d9cbc0f709460.tar.gz |
lavc/sbrdsp: R-V V sbr_hf_g_filt
hf_g_filt_c: 1552.5
hf_g_filt_rvv_f32: 679.5
-rw-r--r-- | libavcodec/riscv/sbrdsp_init.c | 3 | ||||
-rw-r--r-- | libavcodec/riscv/sbrdsp_rvv.S | 20 |
2 files changed, 23 insertions, 0 deletions
diff --git a/libavcodec/riscv/sbrdsp_init.c b/libavcodec/riscv/sbrdsp_init.c index 1b85b2cae9..71de681185 100644 --- a/libavcodec/riscv/sbrdsp_init.c +++ b/libavcodec/riscv/sbrdsp_init.c @@ -26,6 +26,8 @@ void ff_sbr_sum64x5_rvv(float *z); float ff_sbr_sum_square_rvv(float (*x)[2], int n); void ff_sbr_neg_odd_64_rvv(float *x); +void ff_sbr_hf_g_filt_rvv(float (*Y)[2], const float (*X_high)[40][2], + const float *g_filt, int m_max, intptr_t ixh); av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c) { @@ -35,6 +37,7 @@ av_cold void ff_sbrdsp_init_riscv(SBRDSPContext *c) if ((flags & AV_CPU_FLAG_RVV_F32) && (flags & AV_CPU_FLAG_RVB_ADDR)) { c->sum64x5 = ff_sbr_sum64x5_rvv; c->sum_square = ff_sbr_sum_square_rvv; + c->hf_g_filt = ff_sbr_hf_g_filt_rvv; } #if __riscv_xlen >= 64 if ((flags & AV_CPU_FLAG_RVV_I64) && (flags & AV_CPU_FLAG_RVB_ADDR)) diff --git a/libavcodec/riscv/sbrdsp_rvv.S b/libavcodec/riscv/sbrdsp_rvv.S index b510190b15..932a5dd7d1 100644 --- a/libavcodec/riscv/sbrdsp_rvv.S +++ b/libavcodec/riscv/sbrdsp_rvv.S @@ -84,3 +84,23 @@ func ff_sbr_neg_odd_64_rvv, zve64x ret endfunc #endif + +func ff_sbr_hf_g_filt_rvv, zve32f + li t1, 40 * 2 * 4 + sh3add a1, a4, a1 +1: + vsetvli t0, a3, e32, m4, ta, ma + vlsseg2e32.v v16, (a1), t1 + mul t2, t0, t1 + vle32.v v8, (a2) + sub a3, a3, t0 + vfmul.vv v16, v16, v8 + add a1, t2, a1 + vfmul.vv v20, v20, v8 + sh2add a2, t0, a2 + vsseg2e32.v v16, (a0) + sh3add a0, t0, a0 + bnez a3, 1b + + ret +endfunc |