diff options
author | Niklas Haas <git@haasn.dev> | 2025-07-11 13:52:37 +0200 |
---|---|---|
committer | Niklas Haas <git@haasn.dev> | 2025-07-17 12:26:06 +0200 |
commit | 91f2d146d418d536e14b0d0c2d32f81cb95f6b7f (patch) | |
tree | 4039fbdc03b7fc4a9a4069cb1a8513d3a9cfef94 | |
parent | 9251af058a0498a5b9fda72fb44433e425b1ff57 (diff) | |
download | ffmpeg-91f2d146d418d536e14b0d0c2d32f81cb95f6b7f.tar.gz |
avfilter/x86/scene_sad: add AVX512 implementation
Trivial to add, but a lot faster (on my machine).
scene_sad8_c: 114476.4 ( 1.00x)
scene_sad8_sse2: 8644.3 (13.24x)
scene_sad8_avx2: 4520.1 (25.33x)
scene_sad8_avx512: 3153.0 (36.31x)
-rw-r--r-- | libavfilter/x86/scene_sad.asm | 7 | ||||
-rw-r--r-- | libavfilter/x86/scene_sad_init.c | 7 |
2 files changed, 14 insertions, 0 deletions
diff --git a/libavfilter/x86/scene_sad.asm b/libavfilter/x86/scene_sad.asm index bf7236b3a3..2cd9dddb5c 100644 --- a/libavfilter/x86/scene_sad.asm +++ b/libavfilter/x86/scene_sad.asm @@ -72,3 +72,10 @@ INIT_YMM avx2 SAD_FRAMES %endif + +%if HAVE_AVX512_EXTERNAL + +INIT_ZMM avx512 +SAD_FRAMES + +%endif diff --git a/libavfilter/x86/scene_sad_init.c b/libavfilter/x86/scene_sad_init.c index 4a4c40195f..2d631b376a 100644 --- a/libavfilter/x86/scene_sad_init.c +++ b/libavfilter/x86/scene_sad_init.c @@ -41,6 +41,9 @@ SCENE_SAD_FUNC(scene_sad_sse2, ff_scene_sad_sse2, 16) #if HAVE_AVX2_EXTERNAL SCENE_SAD_FUNC(scene_sad_avx2, ff_scene_sad_avx2, 32) #endif +#if HAVE_AVX512_EXTERNAL +SCENE_SAD_FUNC(scene_sad_avx512, ff_scene_sad_avx512, 64) +#endif #endif ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth) @@ -48,6 +51,10 @@ ff_scene_sad_fn ff_scene_sad_get_fn_x86(int depth) #if HAVE_X86ASM int cpu_flags = av_get_cpu_flags(); if (depth <= 8) { +#if HAVE_AVX512_EXTERNAL + if (EXTERNAL_AVX512(cpu_flags)) + return scene_sad_avx512; +#endif #if HAVE_AVX2_EXTERNAL if (EXTERNAL_AVX2_FAST(cpu_flags)) return scene_sad_avx2; |