aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/videodsp.asm
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-10-24 06:54:32 -0400
committerMichael Niedermayer <michaelni@gmx.at>2013-10-24 13:36:55 +0200
commit1b3a7e1f42c3d89253e9837ada98e6bfb0cbab2f (patch)
tree3d69ff98c1ab4fd5d99dc651183f18f74b46e8aa /libavcodec/x86/videodsp.asm
parent210afae0ba651a4f11468449989c6334bb856268 (diff)
downloadffmpeg-1b3a7e1f42c3d89253e9837ada98e6bfb0cbab2f.tar.gz
avcodec/x86/videodsp: Properly mark sse2 instructions in emulated_edge_mc x86 simd as such.
Should fix crashes or corrupt output on pre-SSE2 CPUs when they were using SSE2-code (e.g. AMD Athlon XP 2400+ or Intel Pentium III) in hfix or hvar single-edge (left/right) extension functions. Tested-by: Ingo Brückl <ib@wupperonline.de> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/videodsp.asm')
-rw-r--r--libavcodec/x86/videodsp.asm12
1 files changed, 6 insertions, 6 deletions
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index aa865f5a2b..6d5b57e086 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -100,10 +100,10 @@ cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w
; FIXME also write a ssse3 version using pshufb
movzx wd, byte [dstq+start_xq] ; w = read(1)
imul wd, 0x01010101 ; w *= 0x01010101
- movd m0, wd ; FIXME this is sse2, not sse
+ movd m0, wd
mov wq, n_wordsq ; initialize w
-%if cpuflag(sse)
- shufps m0, m0, q0000 ; splat
+%if cpuflag(sse2)
+ pshufd m0, m0, q0000 ; splat
%else ; mmx
punpckldq m0, m0 ; splat
%endif ; mmx/sse
@@ -124,7 +124,7 @@ INIT_MMX mmx
hvar_fn
%endif
-INIT_XMM sse
+INIT_XMM sse2
hvar_fn
; macro to read/write a horizontal number of pixels (%2) to/from registers
@@ -353,7 +353,7 @@ VERTICAL_EXTEND 16, 22
%if %1 >= 8
movd m0, vald
%if mmsize == 16
- shufps m0, m0, q0000
+ pshufd m0, m0, q0000
%else
punpckldq m0, m0
%endif
@@ -423,7 +423,7 @@ H_EXTEND 2, 14
H_EXTEND 16, 22
%endif
-INIT_XMM sse
+INIT_XMM sse2
H_EXTEND 16, 22
%macro PREFETCH_FN 1