diff options
author | Ramiro Polla <ramiro.polla@gmail.com> | 2024-08-21 16:55:52 +0200 |
---|---|---|
committer | Ramiro Polla <ramiro.polla@gmail.com> | 2024-08-26 12:49:04 +0200 |
commit | 8c203ea7c794e01efcf985ba8303f598a8180864 (patch) | |
tree | e9c15bdb8209b200a3f23d1fe94b12b9b9f90d03 /libavcodec/aarch64 | |
parent | 9f68a3712eb47983cb0a067a4ff2e722c4c026fd (diff) | |
download | ffmpeg-8c203ea7c794e01efcf985ba8303f598a8180864.tar.gz |
avcodec/aarch64/mpegvideoencdsp: add dotprod implementation for pix_norm1
A55 A76
pix_norm1_c: 484.3 235.2
pix_norm1_neon: 193.8 ( 2.50x) 44.7 ( 5.26x)
pix_norm1_dotprod: 91.8 ( 5.28x) 21.2 (11.09x)
Diffstat (limited to 'libavcodec/aarch64')
-rw-r--r-- | libavcodec/aarch64/mpegvideoencdsp_init.c | 10 | ||||
-rw-r--r-- | libavcodec/aarch64/mpegvideoencdsp_neon.S | 28 |
2 files changed, 38 insertions, 0 deletions
diff --git a/libavcodec/aarch64/mpegvideoencdsp_init.c b/libavcodec/aarch64/mpegvideoencdsp_init.c index 7eb632ed1b..d0ce07e178 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_init.c +++ b/libavcodec/aarch64/mpegvideoencdsp_init.c @@ -27,6 +27,10 @@ int ff_pix_sum16_neon(const uint8_t *pix, int line_size); int ff_pix_norm1_neon(const uint8_t *pix, int line_size); +#if HAVE_DOTPROD +int ff_pix_norm1_neon_dotprod(const uint8_t *pix, int line_size); +#endif + av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, AVCodecContext *avctx) { @@ -36,4 +40,10 @@ av_cold void ff_mpegvideoencdsp_init_aarch64(MpegvideoEncDSPContext *c, c->pix_sum = ff_pix_sum16_neon; c->pix_norm1 = ff_pix_norm1_neon; } + +#if HAVE_DOTPROD + if (have_dotprod(cpu_flags)) { + c->pix_norm1 = ff_pix_norm1_neon_dotprod; + } +#endif } diff --git a/libavcodec/aarch64/mpegvideoencdsp_neon.S b/libavcodec/aarch64/mpegvideoencdsp_neon.S index f562ee3eba..4944e7b7f4 100644 --- a/libavcodec/aarch64/mpegvideoencdsp_neon.S +++ b/libavcodec/aarch64/mpegvideoencdsp_neon.S @@ -66,3 +66,31 @@ function ff_pix_norm1_neon, export=1 ret endfunc + +#if HAVE_DOTPROD +ENABLE_DOTPROD + +function ff_pix_norm1_neon_dotprod, export=1 +// x0 const uint8_t *pix +// x1 int line_size + + sxtw x1, w1 + movi v0.16b, #0 + mov w2, #16 + +1: + ld1 {v1.16b}, [x0], x1 + ld1 {v2.16b}, [x0], x1 + udot v0.4s, v1.16b, v1.16b + subs w2, w2, #2 + udot v0.4s, v2.16b, v2.16b + b.ne 1b + + uaddlv d0, v0.4s + fmov w0, s0 + + ret +endfunc + +DISABLE_DOTPROD +#endif |