aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Converse <alex.converse@gmail.com>2009-04-02 21:02:42 +0000
committerAlex Converse <alex.converse@gmail.com>2009-04-02 21:02:42 +0000
commit3dd6531208f14f932d880de0661ad5163f0395a9 (patch)
tree239878fec6e404fe8b7e595111399f74d710e37a
parentdd3bbb49037b29a6a4d6567b32526779afd1020b (diff)
downloadffmpeg-3dd6531208f14f932d880de0661ad5163f0395a9.tar.gz
Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
Originally committed as revision 18319 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/x86/dsputil_mmx.c44
1 files changed, 31 insertions, 13 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 2f47f5fde9..7d64cfb584 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -28,7 +28,6 @@
#include "libavcodec/mpegvideo.h"
#include "libavcodec/simple_idct.h"
#include "dsputil_mmx.h"
-#include "mmx.h"
#include "vp3dsp_mmx.h"
#include "vp3dsp_sse2.h"
#include "vp6dsp_mmx.h"
@@ -273,22 +272,41 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
:"memory");
}
-static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
+DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) =
{ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+#define put_signed_pixels_clamped_mmx_half(off) \
+ "movq "#off"(%2), %%mm1 \n\t"\
+ "movq 16+"#off"(%2), %%mm2 \n\t"\
+ "movq 32+"#off"(%2), %%mm3 \n\t"\
+ "movq 48+"#off"(%2), %%mm4 \n\t"\
+ "packsswb 8+"#off"(%2), %%mm1 \n\t"\
+ "packsswb 24+"#off"(%2), %%mm2 \n\t"\
+ "packsswb 40+"#off"(%2), %%mm3 \n\t"\
+ "packsswb 56+"#off"(%2), %%mm4 \n\t"\
+ "paddb %%mm0, %%mm1 \n\t"\
+ "paddb %%mm0, %%mm2 \n\t"\
+ "paddb %%mm0, %%mm3 \n\t"\
+ "paddb %%mm0, %%mm4 \n\t"\
+ "movq %%mm1, (%0) \n\t"\
+ "movq %%mm2, (%0, %3) \n\t"\
+ "movq %%mm3, (%0, %3, 2) \n\t"\
+ "movq %%mm4, (%0, %1) \n\t"
+
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
{
- int i;
-
- movq_m2r(*vector128, mm1);
- for (i = 0; i < 8; i++) {
- movq_m2r(*(block), mm0);
- packsswb_m2r(*(block + 4), mm0);
- block += 8;
- paddb_r2r(mm1, mm0);
- movq_r2m(mm0, *pixels);
- pixels += line_size;
- }
+ x86_reg line_skip = line_size;
+ x86_reg av_uninit(line_skip3);
+
+ __asm__ volatile (
+ "movq "MANGLE(ff_vector128)", %%mm0 \n\t"
+ "lea (%3, %3, 2), %1 \n\t"
+ put_signed_pixels_clamped_mmx_half(0)
+ "lea (%0, %3, 4), %0 \n\t"
+ put_signed_pixels_clamped_mmx_half(64)
+ :"+r" (pixels), "+r" (line_skip3)
+ :"r" (block), "r"(line_skip)
+ :"memory");
}
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)