diff options
author | James Almer <jamrial@gmail.com> | 2021-02-17 10:46:36 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2021-02-17 13:33:20 -0300 |
commit | 2b4da1cb8c2984b37e5c912e103a1b8b734e7c1f (patch) | |
tree | f309e7984e741faddd2124847a3d211e91c516f2 | |
parent | 670051b52487cf71890909d1020a067169d743cc (diff) | |
download | ffmpeg-2b4da1cb8c2984b37e5c912e103a1b8b734e7c1f.tar.gz |
x86/vf_gblur: fix postscale_slice prologue
x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64
ABI uses only the first four regs for this purpose.
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavfilter/vf_gblur.c | 3 | ||||
-rw-r--r-- | libavfilter/x86/vf_gblur.asm | 29 |
2 files changed, 14 insertions, 18 deletions
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c index 109a7a95f9..40956e122d 100644 --- a/libavfilter/vf_gblur.c +++ b/libavfilter/vf_gblur.c @@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s) { s->horiz_slice = horiz_slice_c; s->postscale_slice = postscale_c; - if (ARCH_X86_64) - ff_gblur_init_x86(s); + ff_gblur_init_x86(s); } static int config_input(AVFilterLink *inlink) diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm index c29ecba889..c2b2998202 100644 --- a/libavfilter/x86/vf_gblur.asm +++ b/libavfilter/x86/vf_gblur.asm @@ -185,27 +185,24 @@ HORIZ_SLICE %endif %macro POSTSCALE_SLICE 0 -%if UNIX64 -cglobal postscale_slice, 2, 2, 4, ptr, length -%else -cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max -%endif +cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max shl lengthd, 2 add ptrq, lengthq neg lengthq -%if WIN64 +%if ARCH_X86_32 + VBROADCASTSS m0, postscalem + VBROADCASTSS m1, minm + VBROADCASTSS m2, maxm +%elif WIN64 SWAP 0, 2 SWAP 1, 3 - SWAP 2, 4 -%endif -%if cpuflag(avx2) - vbroadcastss m0, xm0 - vbroadcastss m1, xm1 - vbroadcastss m2, xm2 -%else - shufps xm0, xm0, 0 - shufps xm1, xm1, 0 - shufps xm2, xm2, 0 + VBROADCASTSS m0, xm0 + VBROADCASTSS m1, xm1 + VBROADCASTSS m2, maxm +%else ; UNIX64 + VBROADCASTSS m0, xm0 + VBROADCASTSS m1, xm1 + VBROADCASTSS m2, xm3 %endif .loop: |