aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64
diff options
context:
space:
mode:
authorRamiro Polla <ramiro.polla@gmail.com>2024-08-21 16:55:52 +0200
committerRamiro Polla <ramiro.polla@gmail.com>2024-08-26 12:49:04 +0200
commit8c203ea7c794e01efcf985ba8303f598a8180864 (patch)
treee9c15bdb8209b200a3f23d1fe94b12b9b9f90d03 /libavcodec/aarch64
parent9f68a3712eb47983cb0a067a4ff2e722c4c026fd (diff)
downloadffmpeg-8c203ea7c794e01efcf985ba8303f598a8180864.tar.gz
avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1
A55 A76 pix_norm1_c: 484.3 235.2 pix_norm1_neon: 193.8 ( 2.50x) 44.7 ( 5.26x) pix_norm1_dotprod: 91.8 ( 5.28x) 21.2 (11.09x)
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r--libavcodec/aarch64/mpegvideoencdsp_init.c10
-rw-r--r--libavcodec/aarch64/mpegvideoencdsp_neon.S28
2 files changed, 38 insertions, 0 deletions
diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c
index 7eb632ed1b..d0ce07e178 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_init.c
+++ b/libavcodec/aarch64/mpegvideoencdsp_init.c
@@ -27,6 +27,10 @@
int ff_pix_sum16_neon(const uint8_t *pix, int line_size);
int ff_pix_norm1_neon(const uint8_t *pix, int line_size);
+#if HAVE_DOTPROD
+int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size);
+#endif
+
av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
AVCodecContext *avctx)
{
@@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c,
c->pix_sum = ff_pix_sum16_neon;
c->pix_norm1 = ff_pix_norm1_neon;
}
+
+#if HAVE_DOTPROD
+ if (have_dotprod(cpu_flags)) {
+ c->pix_norm1 = ff_pix_norm1_neon_dotprod;
+ }
+#endif
}
diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S
index f562ee3eba..4944e7b7f4 100644
--- a/libavcodec/aarch64/mpegvideoencdsp_neon.S
+++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S
@@ -66,3 +66,31 @@ function ff_pix_norm1_neon, export=1
ret
endfunc
+
+#if HAVE_DOTPROD
+ENABLE_DOTPROD
+
+function ff_pix_norm1_neon_dotprod, export=1
+// x0 const uint8_t *pix
+// x1 int line_size
+
+ sxtw x1, w1
+ movi v0.16b, #0
+ mov w2, #16
+
+1:
+ ld1 {v1.16b}, [x0], x1
+ ld1 {v2.16b}, [x0], x1
+ udot v0.4s, v1.16b, v1.16b
+ subs w2, w2, #2
+ udot v0.4s, v2.16b, v2.16b
+ b.ne 1b
+
+ uaddlv d0, v0.4s
+ fmov w0, s0
+
+ ret
+endfunc
+
+DISABLE_DOTPROD
+#endif