lavc/aacpsdsp: fix clobber on RISC-V LP64D/ILP32D

Although the DSP function only uses single precision from RISC-V F, the caller may leave double precision values in the spilled registers if the calling convention supports double precision hardware floats. Then, we need to save and restore FS registers as double precision. Conversely, we do not need to save anything at all if an integer calling convention is in use. However we can assume that single precision floats are supported, since the Zve32f extension implies the F extension. So for the sake of simplicity, we always save at least single precision values. In theory, we should even save quadruple precision values if the LP64Q ABI is in use. I have yet to see a compiler that supports it though.
author: Rémi Denis-Courmont <remi@remlab.net> 2022-10-06 21:46:12 +0300
committer: Lynne <dev@lynne.ee> 2022-10-10 02:23:18 +0200
commit: 105921251ab35b870887e0c7348016f4dff3ec5b (patch)
tree: 08d2d68c6304aca51aa3b931b5ea0282334de2d5
parent: bfc69297c5d8d130b9cf1082dd1728b39a0e75f6 (diff)
download: ffmpeg-105921251ab35b870887e0c7348016f4dff3ec5b.tar.gz
1 files changed, 6 insertions, 4 deletions
diff --git a/libavcodec/riscv/aacpsdsp_rvv.S b/libavcodec/riscv/aacpsdsp_rvv.S
index 1d6e73fd2d..80bd19f6ad 100644
--- a/libavcodec/riscv/aacpsdsp_rvv.S
+++ b/libavcodec/riscv/aacpsdsp_rvv.S
@@ -55,9 +55,10 @@ endfunc
 
 func ff_ps_hybrid_analysis_rvv, zve32f
         /* We need 26 FP registers, for 20 scratch ones. Spill fs0-fs5. */
-        addi    sp, sp, -32
+        addi    sp, sp, -48
         .irp n, 0, 1, 2, 3, 4, 5
-        fsw     fs\n, (4 * \n)(sp)
+HWD     fsd     fs\n, (8 * \n)(sp)
+NOHWD   fsw     fs\n, (4 * \n)(sp)
         .endr
 
         .macro input, j, fd0, fd1, fd2, fd3
@@ -142,9 +143,10 @@ func ff_ps_hybrid_analysis_rvv, zve32f
         bnez       a4, 1b
 
         .irp n, 5, 4, 3, 2, 1, 0
-        flw     fs\n, (4 * \n)(sp)
+HWD     fld     fs\n, (8 * \n)(sp)
+NOHWD   flw     fs\n, (4 * \n)(sp)
         .endr
-        addi    sp, sp, 32
+        addi    sp, sp, 48
         ret
         .purgem input
         .purgem filter
author	Rémi Denis-Courmont <remi@remlab.net>	2022-10-06 21:46:12 +0300
committer	Lynne <dev@lynne.ee>	2022-10-10 02:23:18 +0200
commit	105921251ab35b870887e0c7348016f4dff3ec5b (patch)
tree	08d2d68c6304aca51aa3b931b5ea0282334de2d5
parent	bfc69297c5d8d130b9cf1082dd1728b39a0e75f6 (diff)
download	ffmpeg-105921251ab35b870887e0c7348016f4dff3ec5b.tar.gz