aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNiklas Haas <git@haasn.dev>2025-07-11 13:52:37 +0200
committerNiklas Haas <git@haasn.dev>2025-07-17 12:26:06 +0200
commit91f2d146d418d536e14b0d0c2d32f81cb95f6b7f (patch)
tree4039fbdc03b7fc4a9a4069cb1a8513d3a9cfef94
parent9251af058a0498a5b9fda72fb44433e425b1ff57 (diff)
downloadffmpeg-91f2d146d418d536e14b0d0c2d32f81cb95f6b7f.tar.gz
avfilter/x86/scene_sad: add AVX512 implementation
Trivial to add, but a lot faster (on my machine). scene_sad8_c: 114476.4 ( 1.00x) scene_sad8_sse2: 8644.3 (13.24x) scene_sad8_avx2: 4520.1 (25.33x) scene_sad8_avx512: 3153.0 (36.31x)
-rw-r--r--libavfilter/x86/scene_sad.asm7
-rw-r--r--libavfilter/x86/scene_sad_init.c7
2 files changed, 14 insertions, 0 deletions
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm
index bf7236b3a3..2cd9dddb5c 100644
--- a/libavfilter/x86/scene_sad.asm
+++ b/libavfilter/x86/scene_sad.asm
@@ -72,3 +72,10 @@ INIT_YMM avx2
SAD_FRAMES
%endif
+
+%if HAVE_AVX512_EXTERNAL
+
+INIT_ZMM avx512
+SAD_FRAMES
+
+%endif
diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c
index 4a4c40195f..2d631b376a 100644
--- a/libavfilter/x86/scene_sad_init.c
+++ b/libavfilter/x86/scene_sad_init.c
@@ -41,6 +41,9 @@ SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16)
#if HAVE_AVX2_EXTERNAL
SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32)
#endif
+#if HAVE_AVX512_EXTERNAL
+SCENE_SAD_FUNC(scene_sad_avx512, ff_scene_sad_avx512, 64)
+#endif
#endif
ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
@@ -48,6 +51,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth)
#if HAVE_X86ASM
int cpu_flags = av_get_cpu_flags();
if (depth <= 8) {
+#if HAVE_AVX512_EXTERNAL
+ if (EXTERNAL_AVX512(cpu_flags))
+ return scene_sad_avx512;
+#endif
#if HAVE_AVX2_EXTERNAL
if (EXTERNAL_AVX2_FAST(cpu_flags))
return scene_sad_avx2;