diff options
author | RĂ©mi Denis-Courmont <remi@remlab.net> | 2022-09-26 17:52:50 +0300 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-09-27 13:19:52 +0200 |
commit | a15edb0bc0108362fa3c71de3bf763072341b8b0 (patch) | |
tree | 3c358f15ed24b60ac99b8ab5248850e06424bf71 | |
parent | 09f907999f6ff4204d5848e5fd01e1143cb76d9c (diff) | |
download | ffmpeg-a15edb0bc0108362fa3c71de3bf763072341b8b0.tar.gz |
lavc/aacpsdsp: RISC-V V hybrid_synthesis_deint
-rw-r--r-- | libavcodec/riscv/aacpsdsp_init.c | 6 | ||||
-rw-r--r-- | libavcodec/riscv/aacpsdsp_rvv.S | 35 |
2 files changed, 40 insertions, 1 deletions
diff --git a/libavcodec/riscv/aacpsdsp_init.c b/libavcodec/riscv/aacpsdsp_init.c index 1d36f89f6e..c2201ffb6a 100644 --- a/libavcodec/riscv/aacpsdsp_init.c +++ b/libavcodec/riscv/aacpsdsp_init.c @@ -31,6 +31,8 @@ void ff_ps_hybrid_analysis_rvv(float (*out)[2], float (*in)[2], const float (*filter)[8][2], ptrdiff_t, int n); void ff_ps_hybrid_analysis_ileave_rvv(float (*out)[32][2], float L[2][38][64], int i, int len); +void ff_ps_hybrid_synthesis_deint_rvv(float out[2][38][64], float (*in)[32][2], + int i, int len); av_cold void ff_psdsp_init_riscv(PSDSPContext *c) { @@ -43,7 +45,9 @@ av_cold void ff_psdsp_init_riscv(PSDSPContext *c) c->hybrid_analysis = ff_ps_hybrid_analysis_rvv; } - if (flags & AV_CPU_FLAG_RVV_I32) + if (flags & AV_CPU_FLAG_RVV_I32) { c->hybrid_analysis_ileave = ff_ps_hybrid_analysis_ileave_rvv; + c->hybrid_synthesis_deint = ff_ps_hybrid_synthesis_deint_rvv; + } #endif } diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S index c9cc15e73d..0cbe4c1d3c 100644 --- a/libavcodec/riscv/aacpsdsp_rvv.S +++ b/libavcodec/riscv/aacpsdsp_rvv.S @@ -184,3 +184,38 @@ func ff_ps_hybrid_analysis_ileave_rvv, zve32x /* no needs for zve32f here */ 3: ret endfunc + +func ff_ps_hybrid_synthesis_deint_rvv, zve32x + slli t1, a2, 5 + 1 + 2 + sh2add a0, a2, a0 + add a1, a1, t1 + addi a2, a2, -64 + li t1, 38 * 64 * 4 + li t6, 64 * 4 + add a4, a0, t1 + beqz a2, 3f +1: + mv t0, a0 + mv t1, a1 + mv t3, a3 + mv t4, a4 + addi a2, a2, 1 +2: + vsetvli t5, t3, e32, m1, ta, ma + vlseg2e32.v v16, (t1) + sub t3, t3, t5 + vsse32.v v16, (t0), t6 + mul t2, t5, t6 + vsse32.v v17, (t4), t6 + sh3add t1, t5, t1 + add t0, t0, t2 + add t4, t4, t2 + bnez t3, 2b + + add a0, a0, 4 + add a1, a1, 32 * 2 * 4 + add a4, a4, 4 + bnez a2, 1b +3: + ret +endfunc |