diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2023-12-22 18:12:57 +0200 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-01-15 19:00:25 +0200 |
commit | 278b4b60d636ac0673524471a8ae35108d31f3d7 (patch) | |
tree | dcdb6e692de648e9e76be9b5606d9676f6222db2 /libavcodec | |
parent | 34a47b97de9d90a28ead36ca200f6a19a08ce2df (diff) | |
download | ffmpeg-278b4b60d636ac0673524471a8ae35108d31f3d7.tar.gz |
lavc/takdsp: R-V V decorrelate_sf
decorrelate_sf_c: 259.2
decorrelate_sf_rvv_i32: 45.5
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/riscv/takdsp_init.c | 2 | ||||
-rw-r--r-- | libavcodec/riscv/takdsp_rvv.S | 21 |
2 files changed, 23 insertions, 0 deletions
diff --git a/libavcodec/riscv/takdsp_init.c b/libavcodec/riscv/takdsp_init.c index 4312c8d99d..58be83860b 100644 --- a/libavcodec/riscv/takdsp_init.c +++ b/libavcodec/riscv/takdsp_init.c @@ -28,6 +28,7 @@ void ff_decorrelate_ls_rvv(const int32_t *p1, int32_t *p2, int length); void ff_decorrelate_sr_rvv(int32_t *p1, const int32_t *p2, int length); void ff_decorrelate_sm_rvv(int32_t *p1, int32_t *p2, int length); +void ff_decorrelate_sf_rvv(int32_t *p1, const int32_t *p2, int len, int, int); av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp) { @@ -38,6 +39,7 @@ av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp) dsp->decorrelate_ls = ff_decorrelate_ls_rvv; dsp->decorrelate_sr = ff_decorrelate_sr_rvv; dsp->decorrelate_sm = ff_decorrelate_sm_rvv; + dsp->decorrelate_sf = ff_decorrelate_sf_rvv; } #endif } diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S index b593d9139a..fa942a3be6 100644 --- a/libavcodec/riscv/takdsp_rvv.S +++ b/libavcodec/riscv/takdsp_rvv.S @@ -1,5 +1,6 @@ /* * Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS). + * Copyright (c) 2023 Rémi Denis-Courmont * * This file is part of FFmpeg. * @@ -65,3 +66,23 @@ func ff_decorrelate_sm_rvv, zve32x ret endfunc + +func ff_decorrelate_sf_rvv, zve32x + csrwi vxrm, 0 +1: + vsetvli t0, a2, e32, m8, ta, ma + vle32.v v8, (a1) + sub a2, a2, t0 + vsra.vx v8, v8, a3 + sh2add a1, t0, a1 + vle32.v v0, (a0) + vmul.vx v8, v8, a4 + vssra.vi v8, v8, 8 + vsll.vx v8, v8, a3 + vsub.vv v0, v8, v0 + vse32.v v0, (a0) + sh2add a0, t0, a0 + bnez a2, 1b + + ret +endfunc |