aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64
diff options
context:
space:
mode:
authorGeoff Hill <geoff@geoffhill.org>2024-04-06 07:25:56 -0700
committerMartin Storsjö <martin@martin.st>2024-04-08 13:36:40 +0300
commit6f6bd10531e439de67d1354c5fc6f78cc031c66b (patch)
tree37c9e05d513519f37d4fd7b7ff651083528a4977 /libavcodec/aarch64
parentb69486ea18ed1a278501911c161a5f72f1622ad3 (diff)
downloadffmpeg-6f6bd10531e439de67d1354c5fc6f78cc031c66b.tar.gz
avcodec/ac3: Implement ac3_exponent_min for aarch64 NEON
Signed-off-by: Geoff Hill <geoff@geoffhill.org> Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r--libavcodec/aarch64/ac3dsp_init_aarch64.c2
-rw-r--r--libavcodec/aarch64/ac3dsp_neon.S16
2 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index e3320de0f5..8874b41393 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -25,6 +25,7 @@
#include "libavcodec/ac3dsp.h"
#include "config.h"
+void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
@@ -32,5 +33,6 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
int cpu_flags = av_get_cpu_flags();
if (!have_neon(cpu_flags)) return;
+ c->ac3_exponent_min = ff_ac3_exponent_min_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
}
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index c4d204b51a..f916c32538 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -21,6 +21,22 @@
#include "libavutil/aarch64/asm.S"
+function ff_ac3_exponent_min_neon, export=1
+ cbz w1, 3f
+1: ld1 {v0.16b}, [x0]
+ mov w3, w1
+ add x4, x0, #256
+2: ld1 {v1.16b}, [x4]
+ umin v0.16b, v0.16b, v1.16b
+ add x4, x4, #256
+ subs w3, w3, #1
+ b.gt 2b
+ st1 {v0.16b}, [x0], #16
+ subs w2, w2, #16
+ b.gt 1b
+3: ret
+endfunc
+
function ff_float_to_fixed24_neon, export=1
1: ld1 {v0.4s, v1.4s}, [x1], #32
fcvtzs v0.4s, v0.4s, #24