aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRĂ©mi Denis-Courmont <remi@remlab.net>2022-10-05 19:12:56 +0300
committerLynne <dev@lynne.ee>2022-10-10 02:23:17 +0200
commitbfc69297c5d8d130b9cf1082dd1728b39a0e75f6 (patch)
treeda51678275cd33ed8ec44d51d4214b626baed1d8
parent97d34befea598d34e92ed384acb3dced5490ae8a (diff)
downloadffmpeg-bfc69297c5d8d130b9cf1082dd1728b39a0e75f6.tar.gz
lavc/opusdsp: RISC-V V (512-bit) postfilter
This adds a variant of the postfilter for use with 512-bit vectors. Half a vector is enough to perform the scalar product. Normally a whole vector would be used anyhow. Indeed fractional multiplers are no faster than the unit multipler. But in this particular function, a full vector makes up 16 samples, which would be loaded at each iteration of the outer loop. The minimum guaranteed CELT postfilter period is only 15. Accounting for the edges, we can only safely preload up to 13 samples. The fractional multipler is thus used to cap the selected vector length to a safe value of 8 elements or 256 bits. Likewise, we have the 1024-bit variant with the quarter multipler. In theory, a 2048-bit one would be possible with the eigth multipler, but that length is not even defined in the specifications as of yet, nor is it supported by any emulator - forget actual hardware.
-rw-r--r--libavcodec/riscv/opusdsp_init.c8
-rw-r--r--libavcodec/riscv/opusdsp_rvv.S10
2 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/riscv/opusdsp_init.c b/libavcodec/riscv/opusdsp_init.c
index a5e3e4748a..7fde9b1fa8 100644
--- a/libavcodec/riscv/opusdsp_init.c
+++ b/libavcodec/riscv/opusdsp_init.c
@@ -27,6 +27,8 @@
void ff_opus_postfilter_rvv_128(float *data, int period, float *g, int len);
void ff_opus_postfilter_rvv_256(float *data, int period, float *g, int len);
+void ff_opus_postfilter_rvv_512(float *data, int period, float *g, int len);
+void ff_opus_postfilter_rvv_1024(float *data, int period, float *g, int len);
av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
{
@@ -41,6 +43,12 @@ av_cold void ff_opus_dsp_init_riscv(OpusDSP *d)
case 32:
d->postfilter = ff_opus_postfilter_rvv_256;
break;
+ case 64:
+ d->postfilter = ff_opus_postfilter_rvv_512;
+ break;
+ case 128:
+ d->postfilter = ff_opus_postfilter_rvv_512;
+ break;
}
#endif
}
diff --git a/libavcodec/riscv/opusdsp_rvv.S b/libavcodec/riscv/opusdsp_rvv.S
index 243c9a5e52..b3d23a9de5 100644
--- a/libavcodec/riscv/opusdsp_rvv.S
+++ b/libavcodec/riscv/opusdsp_rvv.S
@@ -25,6 +25,16 @@ func ff_opus_postfilter_rvv_128, zve32f
j 1f
endfunc
+func ff_opus_postfilter_rvv_512, zve32f
+ lvtypei a5, e32, mf2, ta, ma
+ j 1f
+endfunc
+
+func ff_opus_postfilter_rvv_1024, zve32f
+ lvtypei a5, e32, mf4, ta, ma
+ j 1f
+endfunc
+
func ff_opus_postfilter_rvv_256, zve32f
lvtypei a5, e32, m1, ta, ma
1: