diff options
author | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-06-09 16:57:34 +0200 |
---|---|---|
committer | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-06-22 13:36:04 +0200 |
commit | a05f22eaf393177b94432431c145cbc5ba10390a (patch) | |
tree | 31a2ae01b520b2578477ea4b6b2febf1d0e6c1e6 /libswscale/x86/output.asm | |
parent | 2831837182fe26f0a19a4d366f3f0553311f1291 (diff) | |
download | ffmpeg-a05f22eaf393177b94432431c145cbc5ba10390a.tar.gz |
swscale/x86/swscale: Remove obsolete and harmful MMX(EXT) functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT, SSE and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2). So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.
Moreover, some of the removed code was buggy/not bitexact
and lead to failures involving the f32le and f32be versions of
gray, gbrp and gbrap on x86-32 when SSE2 was not disabled.
See e.g.
https://fate.ffmpeg.org/report.cgi?time=20220609221253&slot=x86_32-debian-kfreebsd-gcc-4.4-cpuflags-mmx
Notice that yuv2yuvX_mmx is not removed, because it is used
by SSE3 and AVX2 as fallback in case of unaligned data and
also for tail processing. I don't know why yuv2yuvX_mmxext
isn't being used for this; an earlier version [1] of
554c2bc7086f49ef5a6a989ad6bc4bc11807eb6f used it, but
the version that was eventually applied does not.
[1]: https://ffmpeg.org/pipermail/ffmpeg-devel/2020-November/272124.html
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Diffstat (limited to 'libswscale/x86/output.asm')
-rw-r--r-- | libswscale/x86/output.asm | 30 |
1 files changed, 3 insertions, 27 deletions
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 1e498fddf6..84e94baaf6 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -312,11 +312,9 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset %endif ; %1 == 8/9/10/16 %endmacro -%if ARCH_X86_32 +%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 INIT_MMX mmxext yuv2planeX_fn 8, 0, 7 -yuv2planeX_fn 9, 0, 5 -yuv2planeX_fn 10, 0, 5 %endif INIT_XMM sse2 @@ -407,19 +405,11 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset movq m3, [ditherq] ; dither test offsetd, offsetd jz .no_rot -%if mmsize == 16 punpcklqdq m3, m3 -%endif ; mmsize == 16 PALIGNR m3, m3, 3, m2 .no_rot: -%if mmsize == 8 - mova m2, m3 - punpckhbw m3, m4 ; byte->word - punpcklbw m2, m4 ; byte->word -%else punpcklbw m3, m4 mova m2, m3 -%endif %elif %1 == 9 pxor m4, m4 mova m3, [pw_512] @@ -431,36 +421,22 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset %else ; %1 == 16 %if cpuflag(sse4) ; sse4/avx mova m4, [pd_4] -%else ; mmx/sse2 +%else ; sse2 mova m4, [pd_4min0x40000] mova m5, [minshort] -%endif ; mmx/sse2/sse4/avx +%endif ; sse2/sse4/avx %endif ; %1 == .. ; actual pixel scaling -%if mmsize == 8 - yuv2plane1_mainloop %1, a -%else ; mmsize == 16 test dstq, 15 jnz .unaligned yuv2plane1_mainloop %1, a REP_RET .unaligned: yuv2plane1_mainloop %1, u -%endif ; mmsize == 8/16 REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -yuv2plane1_fn 8, 0, 5 -yuv2plane1_fn 16, 0, 3 - -INIT_MMX mmxext -yuv2plane1_fn 9, 0, 3 -yuv2plane1_fn 10, 0, 3 -%endif - INIT_XMM sse2 yuv2plane1_fn 8, 5, 5 yuv2plane1_fn 9, 5, 3 |