aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/i386/dsputil_mmx.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2008-07-07 21:25:18 +0000
committerMichael Niedermayer <michaelni@gmx.at>2008-07-07 21:25:18 +0000
commit35ee72b1d72a4c8fc0ae4e76ad00a71e831b8dbe (patch)
tree0255103a0ced43ca8b212302f41bfb04b30f2420 /libavcodec/i386/dsputil_mmx.c
parent560fa9bf51e20039120ffd1bfd7bcde6974264c6 (diff)
downloadffmpeg-35ee72b1d72a4c8fc0ae4e76ad00a71e831b8dbe.tar.gz
1 c-asm loop less and 1x unroll of float_to_int16_sse()
25% faster Originally committed as revision 14104 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx.c')
-rw-r--r--libavcodec/i386/dsputil_mmx.c31
1 files changed, 19 insertions, 12 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index cef9a106a1..5d5c1b3f7a 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2045,18 +2045,25 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
);
}
static void float_to_int16_sse(int16_t *dst, const float *src, long len){
- int i;
- for(i=0; i<len; i+=4) {
- asm volatile(
- "cvtps2pi %1, %%mm0 \n\t"
- "cvtps2pi %2, %%mm1 \n\t"
- "packssdw %%mm1, %%mm0 \n\t"
- "movq %%mm0, %0 \n\t"
- :"=m"(dst[i])
- :"m"(src[i]), "m"(src[i+2])
- );
- }
- asm volatile("emms");
+ asm volatile(
+ "add %0 , %0 \n\t"
+ "lea (%2,%0,2) , %2 \n\t"
+ "add %0 , %1 \n\t"
+ "neg %0 \n\t"
+ "1: \n\t"
+ "cvtps2pi (%2,%0,2) , %%mm0 \n\t"
+ "cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
+ "cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
+ "cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
+ "packssdw %%mm1 , %%mm0 \n\t"
+ "packssdw %%mm3 , %%mm2 \n\t"
+ "movq %%mm0 , (%1,%0) \n\t"
+ "movq %%mm2 , 8(%1,%0) \n\t"
+ "add $16 , %0 \n\t"
+ " js 1b \n\t"
+ "emms \n\t"
+ :"+r"(len), "+r"(dst), "+r"(src)
+ );
}
extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);