diff options
author | Alex Converse <alex.converse@gmail.com> | 2009-04-02 21:02:42 +0000 |
---|---|---|
committer | Alex Converse <alex.converse@gmail.com> | 2009-04-02 21:02:42 +0000 |
commit | 3dd6531208f14f932d880de0661ad5163f0395a9 (patch) | |
tree | 239878fec6e404fe8b7e595111399f74d710e37a /libavcodec/x86 | |
parent | dd3bbb49037b29a6a4d6567b32526779afd1020b (diff) | |
download | ffmpeg-3dd6531208f14f932d880de0661ad5163f0395a9.tar.gz |
Rewrite put_signed_pixels_clamped_mmx() to eliminate mmx.h from dsputil_mmx.c.
Originally committed as revision 18319 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 44 |
1 files changed, 31 insertions, 13 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 2f47f5fde9..7d64cfb584 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -28,7 +28,6 @@ #include "libavcodec/mpegvideo.h" #include "libavcodec/simple_idct.h" #include "dsputil_mmx.h" -#include "mmx.h" #include "vp3dsp_mmx.h" #include "vp3dsp_sse2.h" #include "vp6dsp_mmx.h" @@ -273,22 +272,41 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size :"memory"); } -static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) = +DECLARE_ALIGNED_8(const unsigned char, ff_vector128[8]) = { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 }; +#define put_signed_pixels_clamped_mmx_half(off) \ + "movq "#off"(%2), %%mm1 \n\t"\ + "movq 16+"#off"(%2), %%mm2 \n\t"\ + "movq 32+"#off"(%2), %%mm3 \n\t"\ + "movq 48+"#off"(%2), %%mm4 \n\t"\ + "packsswb 8+"#off"(%2), %%mm1 \n\t"\ + "packsswb 24+"#off"(%2), %%mm2 \n\t"\ + "packsswb 40+"#off"(%2), %%mm3 \n\t"\ + "packsswb 56+"#off"(%2), %%mm4 \n\t"\ + "paddb %%mm0, %%mm1 \n\t"\ + "paddb %%mm0, %%mm2 \n\t"\ + "paddb %%mm0, %%mm3 \n\t"\ + "paddb %%mm0, %%mm4 \n\t"\ + "movq %%mm1, (%0) \n\t"\ + "movq %%mm2, (%0, %3) \n\t"\ + "movq %%mm3, (%0, %3, 2) \n\t"\ + "movq %%mm4, (%0, %1) \n\t" + void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) { - int i; - - movq_m2r(*vector128, mm1); - for (i = 0; i < 8; i++) { - movq_m2r(*(block), mm0); - packsswb_m2r(*(block + 4), mm0); - block += 8; - paddb_r2r(mm1, mm0); - movq_r2m(mm0, *pixels); - pixels += line_size; - } + x86_reg line_skip = line_size; + x86_reg av_uninit(line_skip3); + + __asm__ volatile ( + "movq "MANGLE(ff_vector128)", %%mm0 \n\t" + "lea (%3, %3, 2), %1 \n\t" + put_signed_pixels_clamped_mmx_half(0) + "lea (%0, %3, 4), %0 \n\t" + put_signed_pixels_clamped_mmx_half(64) + :"+r" (pixels), "+r" (line_skip3) + :"r" (block), "r"(line_skip) + :"memory"); } void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size) |