aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/i386/snowdsp_mmx.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2007-08-20 22:29:21 +0000
committerMichael Niedermayer <michaelni@gmx.at>2007-08-20 22:29:21 +0000
commit3a9f44d5d5dcc9a805aa8345d922c1c5a53b681a (patch)
tree5dec3e9f5189916a4b28c52f32381047fe838b41 /libavcodec/i386/snowdsp_mmx.c
parentf29bd6fa714482f92de4115c7fd7b81058b153ff (diff)
downloadffmpeg-3a9f44d5d5dcc9a805aa8345d922c1c5a53b681a.tar.gz
and of course the unneeded double subtractions were blindly put in the
mmx code this also makes the affected code 4% faster Originally committed as revision 10156 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/snowdsp_mmx.c')
-rw-r--r--libavcodec/i386/snowdsp_mmx.c33
1 files changed, 15 insertions, 18 deletions
diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c
index 1d5a4f4ff7..4d40e46f8f 100644
--- a/libavcodec/i386/snowdsp_mmx.c
+++ b/libavcodec/i386/snowdsp_mmx.c
@@ -294,9 +294,10 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
DWTELEM * const ref = b+w2 - 1;
i = 1;
- b[0] = b[0] - (((-2 * ref[1] + W_BO) - 4 * b[0]) >> W_BS);
+ b[0] = b[0] + (((2 * ref[1] + W_BO-1) + 4 * b[0]) >> W_BS);
asm volatile(
- "pslld $1, %%mm7 \n\t" /* xmm7 already holds a '4' from 2 lifts ago. */
+ "pcmpeqd %%mm7, %%mm7 \n\t"
+ "psrld $29, %%mm7 \n\t"
::);
for(; i<w_l-3; i+=4){
asm volatile(
@@ -304,22 +305,18 @@ void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width){
"movq 8(%1), %%mm4 \n\t"
"paddd 4(%1), %%mm0 \n\t"
"paddd 12(%1), %%mm4 \n\t"
- "movq %%mm7, %%mm1 \n\t"
- "movq %%mm7, %%mm5 \n\t"
- "psubd %%mm0, %%mm1 \n\t"
- "psubd %%mm4, %%mm5 \n\t"
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm4 \n\t"
- "pslld $2, %%mm0 \n\t"
- "pslld $2, %%mm4 \n\t"
- "psubd %%mm0, %%mm1 \n\t"
- "psubd %%mm4, %%mm5 \n\t"
- "psrad $4, %%mm1 \n\t"
- "psrad $4, %%mm5 \n\t"
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm4 \n\t"
- "psubd %%mm1, %%mm0 \n\t"
- "psubd %%mm5, %%mm4 \n\t"
+ "paddd %%mm7, %%mm0 \n\t"
+ "paddd %%mm7, %%mm4 \n\t"
+ "psrad $2, %%mm0 \n\t"
+ "psrad $2, %%mm4 \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq 8(%0), %%mm5 \n\t"
+ "paddd %%mm1, %%mm0 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
+ "psrad $2, %%mm0 \n\t"
+ "psrad $2, %%mm4 \n\t"
+ "paddd %%mm1, %%mm0 \n\t"
+ "paddd %%mm5, %%mm4 \n\t"
"movq %%mm0, (%0) \n\t"
"movq %%mm4, 8(%0) \n\t"
:: "r"(&b[i]), "r"(&ref[i])