diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2007-08-20 22:29:21 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2007-08-20 22:29:21 +0000 |
commit | 3a9f44d5d5dcc9a805aa8345d922c1c5a53b681a (patch) | |
tree | 5dec3e9f5189916a4b28c52f32381047fe838b41 /libavcodec/i386/snowdsp_mmx.c | |
parent | f29bd6fa714482f92de4115c7fd7b81058b153ff (diff) | |
download | ffmpeg-3a9f44d5d5dcc9a805aa8345d922c1c5a53b681a.tar.gz |
and of course the unneeded double subtractions were blindly put in the
mmx code
this also makes the affected code 4% faster
Originally committed as revision 10156 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/snowdsp_mmx.c')
-rw-r--r-- | libavcodec/i386/snowdsp_mmx.c | 33 |
1 files changed, 15 insertions, 18 deletions
diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index 1d5a4f4ff7..4d40e46f8f 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -294,9 +294,10 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){ DWTELEM * const ref = b+w2 - 1; i = 1; - b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS); + b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS); asm volatile( - "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */ + "pcmpeqd %%mm7, %%mm7 \n\t" + "psrld $29, %%mm7 \n\t" ::); for(; i<w_l-3; i+=4){ asm volatile( @@ -304,22 +305,18 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){ "movq 8(%1), %%mm4 \n\t" "paddd 4(%1), %%mm0 \n\t" "paddd 12(%1), %%mm4 \n\t" - "movq %%mm7, %%mm1 \n\t" - "movq %%mm7, %%mm5 \n\t" - "psubd %%mm0, %%mm1 \n\t" - "psubd %%mm4, %%mm5 \n\t" - "movq (%0), %%mm0 \n\t" - "movq 8(%0), %%mm4 \n\t" - "pslld $2, %%mm0 \n\t" - "pslld $2, %%mm4 \n\t" - "psubd %%mm0, %%mm1 \n\t" - "psubd %%mm4, %%mm5 \n\t" - "psrad $4, %%mm1 \n\t" - "psrad $4, %%mm5 \n\t" - "movq (%0), %%mm0 \n\t" - "movq 8(%0), %%mm4 \n\t" - "psubd %%mm1, %%mm0 \n\t" - "psubd %%mm5, %%mm4 \n\t" + "paddd %%mm7, %%mm0 \n\t" + "paddd %%mm7, %%mm4 \n\t" + "psrad $2, %%mm0 \n\t" + "psrad $2, %%mm4 \n\t" + "movq (%0), %%mm1 \n\t" + "movq 8(%0), %%mm5 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm5, %%mm4 \n\t" + "psrad $2, %%mm0 \n\t" + "psrad $2, %%mm4 \n\t" + "paddd %%mm1, %%mm0 \n\t" + "paddd %%mm5, %%mm4 \n\t" "movq %%mm0, (%0) \n\t" "movq %%mm4, 8(%0) \n\t" :: "r"(&b[i]), "r"(&ref[i]) |