aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/riscv
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2023-12-22 18:12:57 +0200
committerRémi Denis-Courmont <remi@remlab.net>2024-01-15 19:00:25 +0200
commit278b4b60d636ac0673524471a8ae35108d31f3d7 (patch)
treedcdb6e692de648e9e76be9b5606d9676f6222db2 /libavcodec/riscv
parent34a47b97de9d90a28ead36ca200f6a19a08ce2df (diff)
downloadffmpeg-278b4b60d636ac0673524471a8ae35108d31f3d7.tar.gz
lavc/takdsp: R-V V decorrelate_sf
decorrelate_sf_c: 259.2 decorrelate_sf_rvv_i32: 45.5
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r--libavcodec/riscv/takdsp_init.c2
-rw-r--r--libavcodec/riscv/takdsp_rvv.S21
2 files changed, 23 insertions, 0 deletions
diff --git a/libavcodec/riscv/takdsp_init.c b/libavcodec/riscv/takdsp_init.c
index 4312c8d99d..58be83860b 100644
--- a/libavcodec/riscv/takdsp_init.c
+++ b/libavcodec/riscv/takdsp_init.c
@@ -28,6 +28,7 @@
void ff_decorrelate_ls_rvv(const int32_t *p1, int32_t *p2, int length);
void ff_decorrelate_sr_rvv(int32_t *p1, const int32_t *p2, int length);
void ff_decorrelate_sm_rvv(int32_t *p1, int32_t *p2, int length);
+void ff_decorrelate_sf_rvv(int32_t *p1, const int32_t *p2, int len, int, int);
av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp)
{
@@ -38,6 +39,7 @@ av_cold void ff_takdsp_init_riscv(TAKDSPContext *dsp)
dsp->decorrelate_ls = ff_decorrelate_ls_rvv;
dsp->decorrelate_sr = ff_decorrelate_sr_rvv;
dsp->decorrelate_sm = ff_decorrelate_sm_rvv;
+ dsp->decorrelate_sf = ff_decorrelate_sf_rvv;
}
#endif
}
diff --git a/libavcodec/riscv/takdsp_rvv.S b/libavcodec/riscv/takdsp_rvv.S
index b593d9139a..fa942a3be6 100644
--- a/libavcodec/riscv/takdsp_rvv.S
+++ b/libavcodec/riscv/takdsp_rvv.S
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2023 Institue of Software Chinese Academy of Sciences (ISCAS).
+ * Copyright (c) 2023 Rémi Denis-Courmont
*
* This file is part of FFmpeg.
*
@@ -65,3 +66,23 @@ func ff_decorrelate_sm_rvv, zve32x
ret
endfunc
+
+func ff_decorrelate_sf_rvv, zve32x
+ csrwi vxrm, 0
+1:
+ vsetvli t0, a2, e32, m8, ta, ma
+ vle32.v v8, (a1)
+ sub a2, a2, t0
+ vsra.vx v8, v8, a3
+ sh2add a1, t0, a1
+ vle32.v v0, (a0)
+ vmul.vx v8, v8, a4
+ vssra.vi v8, v8, 8
+ vsll.vx v8, v8, a3
+ vsub.vv v0, v8, v0
+ vse32.v v0, (a0)
+ sh2add a0, t0, a0
+ bnez a2, 1b
+
+ ret
+endfunc