aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/aarch64
diff options
context:
space:
mode:
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>2022-10-09 21:17:47 +0200
committerReimar Döffinger <Reimar.Doeffinger@gmx.de>2022-10-11 09:12:02 +0200
commit38cd829dce7184400c944ead299a11e57c8ec7f8 (patch)
tree1003ec1a6835e3aa61dd47aca9af74e9de7750c3 /libavutil/aarch64
parente10e27a2ead8848648b29a1b397cc240206e9c3d (diff)
downloadffmpeg-38cd829dce7184400c944ead299a11e57c8ec7f8.tar.gz
aarch64: Implement stack spilling in a consistent way.
Currently it is done in several different ways, which might cause needless dependencies or in case of tx_float_neon.S is incorrect. Reviewed-by: Martin Storsjö <martin@martin.st> Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
Diffstat (limited to 'libavutil/aarch64')
-rw-r--r--libavutil/aarch64/tx_float_neon.S52
1 files changed, 26 insertions, 26 deletions
diff --git a/libavutil/aarch64/tx_float_neon.S b/libavutil/aarch64/tx_float_neon.S
index 4126c3b812..e5531dcc7c 100644
--- a/libavutil/aarch64/tx_float_neon.S
+++ b/libavutil/aarch64/tx_float_neon.S
@@ -866,10 +866,10 @@ FFT16_FN ns_float, 1
.macro FFT32_FN name, no_perm
function ff_tx_fft32_\name\()_neon, export=1
- stp d8, d9, [sp, #-16]
- stp d10, d11, [sp, #-32]
- stp d12, d13, [sp, #-48]
- stp d14, d15, [sp, #-64]
+ stp d14, d15, [sp, #-16*4]!
+ stp d8, d9, [sp, #16*3]
+ stp d10, d11, [sp, #16*2]
+ stp d12, d13, [sp, #16]
LOAD_SUBADD
SETUP_SR_RECOMB 32, x7, x8, x9
@@ -911,10 +911,10 @@ function ff_tx_fft32_\name\()_neon, export=1
zip2 v31.2d, v11.2d, v15.2d
st1 { v28.4s, v29.4s, v30.4s, v31.4s }, [x1]
- ldp d14, d15, [sp, #-64]
- ldp d12, d13, [sp, #-48]
- ldp d10, d11, [sp, #-32]
- ldp d8, d9, [sp, #-16]
+ ldp d12, d13, [sp, #16]
+ ldp d10, d11, [sp, #16*2]
+ ldp d8, d9, [sp, #16*3]
+ ldp d14, d15, [sp], #16*4
ret
endfunc
@@ -966,12 +966,12 @@ FFT32_FN ns_float, 1
.macro FFT_SPLIT_RADIX_FN name, no_perm
function ff_tx_fft_sr_\name\()_neon, export=1
- stp d8, d9, [sp, #-16]!
- stp d10, d11, [sp, #-16]!
- stp d12, d13, [sp, #-16]!
- stp d14, d15, [sp, #-16]!
- stp x19, x20, [sp, #-16]!
- stp x21, x22, [sp, #-16]!
+ stp x21, x22, [sp, #-16*6]!
+ stp d8, d9, [sp, #16*5]
+ stp d10, d11, [sp, #16*4]
+ stp d12, d13, [sp, #16*3]
+ stp d14, d15, [sp, #16*2]
+ stp x19, x20, [sp, #16]
ldr w19, [x0, #0] // global target
mov w20, w19 // local length
@@ -1185,12 +1185,12 @@ SR_TRANSFORM_DEF 131072
subs w19, w19, #32*4
b.gt 0b
- ldp x21, x22, [sp], #16
- ldp x19, x20, [sp], #16
- ldp d14, d15, [sp], #16
- ldp d12, d13, [sp], #16
- ldp d10, d11, [sp], #16
- ldp d8, d9, [sp], #16
+ ldp x19, x20, [sp, #16]
+ ldp d14, d15, [sp, #16*2]
+ ldp d12, d13, [sp, #16*3]
+ ldp d10, d11, [sp, #16*4]
+ ldp d8, d9, [sp, #16*5]
+ ldp x21, x22, [sp], #16*6
ret
@@ -1279,12 +1279,12 @@ SR_TRANSFORM_DEF 131072
zip2 v7.2d, v15.2d, v23.2d
st1 { v4.4s, v5.4s, v6.4s, v7.4s }, [x15]
- ldp x21, x22, [sp], #16
- ldp x19, x20, [sp], #16
- ldp d14, d15, [sp], #16
- ldp d12, d13, [sp], #16
- ldp d10, d11, [sp], #16
- ldp d8, d9, [sp], #16
+ ldp x19, x20, [sp, #16]
+ ldp d14, d15, [sp, #16*2]
+ ldp d12, d13, [sp, #16*3]
+ ldp d10, d11, [sp, #16*4]
+ ldp d8, d9, [sp, #16*5]
+ ldp x21, x22, [sp], #16*6
ret
endfunc