aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter
diff options
context:
space:
mode:
authorWu Jianhua <jianhua.wu@intel.com>2021-08-04 10:06:12 +0800
committerPaul B Mahol <onemda@gmail.com>2021-08-29 19:58:33 +0200
commit4a5e24721c2bd1839aec57730061884fe2c5dd3b (patch)
tree49e830acca80992d14b822addeb0912809d95ff2 /libavfilter
parentfdc0bb78feb7a4684dd958d2538a0974c12b12cd (diff)
downloadffmpeg-4a5e24721c2bd1839aec57730061884fe2c5dd3b.tar.gz
libavfilter/x86/vf_gblur: add ff_postscale_slice_avx512()
Co-authored-by: Cheng Yanfei <yanfei.cheng@intel.com> Co-authored-by: Jin Jun <jun.i.jin@intel.com> Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/vf_gblur.asm21
-rw-r--r--libavfilter/x86/vf_gblur_init.c4
2 files changed, 16 insertions, 9 deletions
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index 4d84e6d011..276fe347f5 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -194,19 +194,17 @@ cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
VBROADCASTSS m1, minm
VBROADCASTSS m2, maxm
%elif WIN64
- SWAP 0, 2
- SWAP 1, 3
- VBROADCASTSS m0, xm0
- VBROADCASTSS m1, xm1
+ VBROADCASTSS m0, xmm2
+ VBROADCASTSS m1, xmm3
VBROADCASTSS m2, maxm
-%else ; UNIX64
- VBROADCASTSS m0, xm0
- VBROADCASTSS m1, xm1
- VBROADCASTSS m2, xm2
+%else ; UNIX
+ VBROADCASTSS m0, xmm0
+ VBROADCASTSS m1, xmm1
+ VBROADCASTSS m2, xmm2
%endif
.loop:
-%if cpuflag(avx2)
+%if cpuflag(avx2) || cpuflag(avx512)
mulps m3, m0, [ptrq + lengthq]
%else
movu m3, [ptrq + lengthq]
@@ -229,3 +227,8 @@ POSTSCALE_SLICE
INIT_YMM avx2
POSTSCALE_SLICE
%endif
+
+%if HAVE_AVX512_EXTERNAL
+INIT_ZMM avx512
+POSTSCALE_SLICE
+%endif
diff --git a/libavfilter/x86/vf_gblur_init.c b/libavfilter/x86/vf_gblur_init.c
index d80fb46fe4..34aba4ca6e 100644
--- a/libavfilter/x86/vf_gblur_init.c
+++ b/libavfilter/x86/vf_gblur_init.c
@@ -29,6 +29,7 @@ void ff_horiz_slice_avx2(float *ptr, int width, int height, int steps, float nu,
void ff_postscale_slice_sse(float *ptr, int length, float postscale, float min, float max);
void ff_postscale_slice_avx2(float *ptr, int length, float postscale, float min, float max);
+void ff_postscale_slice_avx512(float *ptr, int length, float postscale, float min, float max);
av_cold void ff_gblur_init_x86(GBlurContext *s)
{
@@ -47,5 +48,8 @@ av_cold void ff_gblur_init_x86(GBlurContext *s)
if (EXTERNAL_AVX2(cpu_flags)) {
s->horiz_slice = ff_horiz_slice_avx2;
}
+ if (EXTERNAL_AVX512(cpu_flags)) {
+ s->postscale_slice = ff_postscale_slice_avx512;
+ }
#endif
}