diff options
author | Baptiste Coudurier <baptiste.coudurier@gmail.com> | 2008-10-09 18:36:49 +0000 |
---|---|---|
committer | Baptiste Coudurier <baptiste.coudurier@gmail.com> | 2008-10-09 18:36:49 +0000 |
commit | f76543c9b9a73a386788daf91ce3eba165e5ec8e (patch) | |
tree | 8dcbf9367ff34c7379c445f3bc1e6d3907e77b56 /libavcodec/i386/dsputilenc_mmx.c | |
parent | 965530e156035305c6262dc2f3ce3842fab931a7 (diff) | |
download | ffmpeg-f76543c9b9a73a386788daf91ce3eba165e5ec8e.tar.gz |
get_pixels_sse2, ~+12% performance compared to mmx
Originally committed as revision 15591 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputilenc_mmx.c')
-rw-r--r-- | libavcodec/i386/dsputilenc_mmx.c | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputilenc_mmx.c b/libavcodec/i386/dsputilenc_mmx.c index be423f8f24..8c7c4f07f7 100644 --- a/libavcodec/i386/dsputilenc_mmx.c +++ b/libavcodec/i386/dsputilenc_mmx.c @@ -56,6 +56,40 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) ); } +static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) +{ + asm volatile( + "pxor %%xmm7, %%xmm7 \n\t" + "movq (%0), %%xmm0 \n\t" + "movq (%0, %2), %%xmm1 \n\t" + "movq (%0, %2,2), %%xmm2 \n\t" + "movq (%0, %3), %%xmm3 \n\t" + "lea (%0,%2,4), %0 \n\t" + "punpcklbw %%xmm7, %%xmm0 \n\t" + "punpcklbw %%xmm7, %%xmm1 \n\t" + "punpcklbw %%xmm7, %%xmm2 \n\t" + "punpcklbw %%xmm7, %%xmm3 \n\t" + "movdqa %%xmm0, (%1) \n\t" + "movdqa %%xmm1, 16(%1) \n\t" + "movdqa %%xmm2, 32(%1) \n\t" + "movdqa %%xmm3, 48(%1) \n\t" + "movq (%0), %%xmm0 \n\t" + "movq (%0, %2), %%xmm1 \n\t" + "movq (%0, %2,2), %%xmm2 \n\t" + "movq (%0, %3), %%xmm3 \n\t" + "punpcklbw %%xmm7, %%xmm0 \n\t" + "punpcklbw %%xmm7, %%xmm1 \n\t" + "punpcklbw %%xmm7, %%xmm2 \n\t" + "punpcklbw %%xmm7, %%xmm3 \n\t" + "movdqa %%xmm0, 64(%1) \n\t" + "movdqa %%xmm1, 80(%1) \n\t" + "movdqa %%xmm2, 96(%1) \n\t" + "movdqa %%xmm3, 112(%1) \n\t" + : "+r" (pixels) + : "r" (block), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3) + ); +} + static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) { asm volatile( @@ -1375,6 +1409,7 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) } if(mm_flags & MM_SSE2){ + c->get_pixels = get_pixels_sse2; c->sum_abs_dctelem= sum_abs_dctelem_sse2; c->hadamard8_diff[0]= hadamard8_diff16_sse2; c->hadamard8_diff[1]= hadamard8_diff_sse2; |