diff options
author | Geoff Hill <geoff@geoffhill.org> | 2024-04-06 07:26:06 -0700 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2024-04-08 13:36:40 +0300 |
commit | 42e88f18f30c275f52cea001b33ca62d8a3ff2d1 (patch) | |
tree | 8e6a310b28be1590d02090c7963912cea44007ba /libavcodec/aarch64/ac3dsp_neon.S | |
parent | 69cb34f8859ed62fd1c46d3612912a334542fe28 (diff) | |
download | ffmpeg-42e88f18f30c275f52cea001b33ca62d8a3ff2d1.tar.gz |
avcodec/ac3: Implement sum_square_butterfly_int32 for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/ac3dsp_neon.S')
-rw-r--r-- | libavcodec/aarch64/ac3dsp_neon.S | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S index c350c1f173..77f9d20275 100644 --- a/libavcodec/aarch64/ac3dsp_neon.S +++ b/libavcodec/aarch64/ac3dsp_neon.S @@ -64,3 +64,26 @@ function ff_float_to_fixed24_neon, export=1 b.ne 1b ret endfunc + +function ff_ac3_sum_square_butterfly_int32_neon, export=1 + movi v0.2d, #0 + movi v1.2d, #0 + movi v2.2d, #0 + movi v3.2d, #0 +1: ld1 {v4.2s}, [x1], #8 + ld1 {v5.2s}, [x2], #8 + add v6.2s, v4.2s, v5.2s + sub v7.2s, v4.2s, v5.2s + smlal v0.2d, v4.2s, v4.2s + smlal v1.2d, v5.2s, v5.2s + smlal v2.2d, v6.2s, v6.2s + smlal v3.2d, v7.2s, v7.2s + subs w3, w3, #2 + b.gt 1b + addp d0, v0.2d + addp d1, v1.2d + addp d2, v2.2d + addp d3, v3.2d + st1 {v0.1d-v3.1d}, [x0] + ret +endfunc |