aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2021-02-17 10:46:36 -0300
committerJames Almer <jamrial@gmail.com>2021-02-17 13:33:20 -0300
commit2b4da1cb8c2984b37e5c912e103a1b8b734e7c1f (patch)
treef309e7984e741faddd2124847a3d211e91c516f2
parent670051b52487cf71890909d1020a067169d743cc (diff)
downloadffmpeg-2b4da1cb8c2984b37e5c912e103a1b8b734e7c1f.tar.gz
x86/vf_gblur: fix postscale_slice prologue
x86_32 ABI does not pass float arguments directly on xmm regs, and the Win64 ABI uses only the first four regs for this purpose. Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavfilter/vf_gblur.c3
-rw-r--r--libavfilter/x86/vf_gblur.asm29
2 files changed, 14 insertions, 18 deletions
diff --git a/libavfilter/vf_gblur.c b/libavfilter/vf_gblur.c
index 109a7a95f9..40956e122d 100644
--- a/libavfilter/vf_gblur.c
+++ b/libavfilter/vf_gblur.c
@@ -234,8 +234,7 @@ void ff_gblur_init(GBlurContext *s)
{
s->horiz_slice = horiz_slice_c;
s->postscale_slice = postscale_c;
- if (ARCH_X86_64)
- ff_gblur_init_x86(s);
+ ff_gblur_init_x86(s);
}
static int config_input(AVFilterLink *inlink)
diff --git a/libavfilter/x86/vf_gblur.asm b/libavfilter/x86/vf_gblur.asm
index c29ecba889..c2b2998202 100644
--- a/libavfilter/x86/vf_gblur.asm
+++ b/libavfilter/x86/vf_gblur.asm
@@ -185,27 +185,24 @@ HORIZ_SLICE
%endif
%macro POSTSCALE_SLICE 0
-%if UNIX64
-cglobal postscale_slice, 2, 2, 4, ptr, length
-%else
-cglobal postscale_slice, 5, 5, 4, ptr, length, postscale, min, max
-%endif
+cglobal postscale_slice, 2, 2, 4, ptr, length, postscale, min, max
shl lengthd, 2
add ptrq, lengthq
neg lengthq
-%if WIN64
+%if ARCH_X86_32
+ VBROADCASTSS m0, postscalem
+ VBROADCASTSS m1, minm
+ VBROADCASTSS m2, maxm
+%elif WIN64
SWAP 0, 2
SWAP 1, 3
- SWAP 2, 4
-%endif
-%if cpuflag(avx2)
- vbroadcastss m0, xm0
- vbroadcastss m1, xm1
- vbroadcastss m2, xm2
-%else
- shufps xm0, xm0, 0
- shufps xm1, xm1, 0
- shufps xm2, xm2, 0
+ VBROADCASTSS m0, xm0
+ VBROADCASTSS m1, xm1
+ VBROADCASTSS m2, maxm
+%else ; UNIX64
+ VBROADCASTSS m0, xm0
+ VBROADCASTSS m1, xm1
+ VBROADCASTSS m2, xm3
%endif
.loop: