aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64/ac3dsp_neon.S
diff options
context:
space:
mode:
authorGeoff Hill <geoff@geoffhill.org>2024-04-06 07:26:06 -0700
committerMartin Storsjö <martin@martin.st>2024-04-08 13:36:40 +0300
commit42e88f18f30c275f52cea001b33ca62d8a3ff2d1 (patch)
tree8e6a310b28be1590d02090c7963912cea44007ba /libavcodec/aarch64/ac3dsp_neon.S
parent69cb34f8859ed62fd1c46d3612912a334542fe28 (diff)
downloadffmpeg-42e88f18f30c275f52cea001b33ca62d8a3ff2d1.tar.gz
avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/ac3dsp_neon.S')
-rw-r--r--libavcodec/aarch64/ac3dsp_neon.S23
1 files changed, 23 insertions, 0 deletions
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index c350c1f173..77f9d20275 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -64,3 +64,26 @@ function ff_float_to_fixed24_neon, export=1
b.ne 1b
ret
endfunc
+
+function ff_ac3_sum_square_butterfly_int32_neon, export=1
+ movi v0.2d, #0
+ movi v1.2d, #0
+ movi v2.2d, #0
+ movi v3.2d, #0
+1: ld1 {v4.2s}, [x1], #8
+ ld1 {v5.2s}, [x2], #8
+ add v6.2s, v4.2s, v5.2s
+ sub v7.2s, v4.2s, v5.2s
+ smlal v0.2d, v4.2s, v4.2s
+ smlal v1.2d, v5.2s, v5.2s
+ smlal v2.2d, v6.2s, v6.2s
+ smlal v3.2d, v7.2s, v7.2s
+ subs w3, w3, #2
+ b.gt 1b
+ addp d0, v0.2d
+ addp d1, v1.2d
+ addp d2, v2.2d
+ addp d3, v3.2d
+ st1 {v0.1d-v3.1d}, [x0]
+ ret
+endfunc