diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2008-12-10 21:35:17 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2008-12-10 21:35:17 +0000 |
commit | 5fecfb7d58a12baf326e99f2d071060f2638d93c (patch) | |
tree | c133296ab20628e90ac488b2d2e265117aaff769 /libavcodec/i386/dsputil_mmx.c | |
parent | 5fac277602ebab5990c4d20342a4e55bb7c43b93 (diff) | |
download | ffmpeg-5fecfb7d58a12baf326e99f2d071060f2638d93c.tar.gz |
clear_block mmx
Originally committed as revision 16045 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx.c')
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 50 |
1 files changed, 37 insertions, 13 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 28e0e83bbd..c3dcb77842 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -464,21 +464,42 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si ); } -static void clear_blocks_mmx(DCTELEM *blocks) +#define CLEAR_BLOCKS(name,n) \ +static void name(DCTELEM *blocks)\ +{\ + __asm__ volatile(\ + "pxor %%mm7, %%mm7 \n\t"\ + "mov %1, %%"REG_a" \n\t"\ + "1: \n\t"\ + "movq %%mm7, (%0, %%"REG_a") \n\t"\ + "movq %%mm7, 8(%0, %%"REG_a") \n\t"\ + "movq %%mm7, 16(%0, %%"REG_a") \n\t"\ + "movq %%mm7, 24(%0, %%"REG_a") \n\t"\ + "add $32, %%"REG_a" \n\t"\ + " js 1b \n\t"\ + : : "r" (((uint8_t *)blocks)+128*n),\ + "i" (-128*n)\ + : "%"REG_a\ + );\ +} +CLEAR_BLOCKS(clear_blocks_mmx, 6) +CLEAR_BLOCKS(clear_block_mmx, 1) + +static void clear_block_sse(DCTELEM *block) { __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" - "mov $-128*6, %%"REG_a" \n\t" - "1: \n\t" - "movq %%mm7, (%0, %%"REG_a") \n\t" - "movq %%mm7, 8(%0, %%"REG_a") \n\t" - "movq %%mm7, 16(%0, %%"REG_a") \n\t" - "movq %%mm7, 24(%0, %%"REG_a") \n\t" - "add $32, %%"REG_a" \n\t" - " js 1b \n\t" - : : "r" (((uint8_t *)blocks)+128*6) - : "%"REG_a - ); + "xorps %%xmm0, %%xmm0 \n" + "movaps %%xmm0, (%0) \n" + "movaps %%xmm0, 16(%0) \n" + "movaps %%xmm0, 32(%0) \n" + "movaps %%xmm0, 48(%0) \n" + "movaps %%xmm0, 64(%0) \n" + "movaps %%xmm0, 80(%0) \n" + "movaps %%xmm0, 96(%0) \n" + "movaps %%xmm0, 112(%0) \n" + :: "r"(block) + : "memory" + ); } static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ @@ -2569,7 +2590,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = put_pixels_clamped_mmx; c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; c->add_pixels_clamped = add_pixels_clamped_mmx; + c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; + if (mm_flags & FF_MM_SSE) + c->clear_block = clear_block_sse; #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ |