dsputil_mmx: fix incorrect assembly code

In file libavcodec/x86/dsputil_mmx.c, function ff_put_pixels_clamped_mmx(), there are two assembly code blocks. In the first block (in the unrolled loop), the instructions "movq 8%3, %%mm1 \n\t" etc have problem. For above instruction, it is clear what the programmer wants: a load from p + 8. But this assembly code doesn’t guarantee that. It only works if the compiler puts p in a register to produce an instruction like this: “movq 8(%edi), %mm1”. During compiler optimization, it is possible that the compiler will be able to constant propagate into p. Suppose p = &x[10000]. Then operand 3 can become 10000(%edi), where %edi holds &x. And the instruction becomes “movq 810000(%edx)”. That is, it will stride by 810000 instead of 8. This will cause the segmentation fault. This error was fixed in the second block of the assembly code, but not in the unrolled loop. How to reproduce: This error is exposed when we build the ffmpeg using Intel C++ Compiler, IPO+PGO optimization. The ffmpeg was crashed when decoding a mjpeg video. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
author: yang <yang.y.wang@intel.com> 2012-07-24 00:51:10 +0200
committer: Michael Niedermayer <michaelni@gmx.at> 2012-07-24 00:55:05 +0200
commit: 6a2bad2c4f609c2a35672c2e5b9775b0865b4c3a (patch)
tree: c10a702318d02245f1a70273189fc5af6c120070 /libavcodec
parent: dc31b84cbfb73b31211afaacfb8d340c14282a2e (diff)
download: ffmpeg-6a2bad2c4f609c2a35672c2e5b9775b0865b4c3a.tar.gz
1 files changed, 9 insertions, 9 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index ee3243c27b..c2ee5a85e4 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -246,14 +246,14 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
     pix = pixels;
     /* unrolled loop */
     __asm__ volatile (
-        "movq        %3, %%mm0          \n\t"
-        "movq       8%3, %%mm1          \n\t"
-        "movq      16%3, %%mm2          \n\t"
-        "movq      24%3, %%mm3          \n\t"
-        "movq      32%3, %%mm4          \n\t"
-        "movq      40%3, %%mm5          \n\t"
-        "movq      48%3, %%mm6          \n\t"
-        "movq      56%3, %%mm7          \n\t"
+        "movq        (%3), %%mm0          \n\t"
+        "movq       8(%3), %%mm1          \n\t"
+        "movq      16(%3), %%mm2          \n\t"
+        "movq      24(%3), %%mm3          \n\t"
+        "movq      32(%3), %%mm4          \n\t"
+        "movq      40(%3), %%mm5          \n\t"
+        "movq      48(%3), %%mm6          \n\t"
+        "movq      56(%3), %%mm7          \n\t"
         "packuswb %%mm1, %%mm0          \n\t"
         "packuswb %%mm3, %%mm2          \n\t"
         "packuswb %%mm5, %%mm4          \n\t"
@@ -263,7 +263,7 @@ void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels,
         "movq     %%mm4, (%0, %1, 2)    \n\t"
         "movq     %%mm6, (%0, %2)       \n\t"
         :: "r"(pix), "r"((x86_reg)line_size), "r"((x86_reg)line_size * 3),
-           "m"(*p)
+           "r"(p)
         : "memory");
     pix += line_size * 4;
     p   += 32;
author	yang <yang.y.wang@intel.com>	2012-07-24 00:51:10 +0200
committer	Michael Niedermayer <michaelni@gmx.at>	2012-07-24 00:55:05 +0200
commit	6a2bad2c4f609c2a35672c2e5b9775b0865b4c3a (patch)
tree	c10a702318d02245f1a70273189fc5af6c120070 /libavcodec
parent	dc31b84cbfb73b31211afaacfb8d340c14282a2e (diff)
download	ffmpeg-6a2bad2c4f609c2a35672c2e5b9775b0865b4c3a.tar.gz