diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2008-07-09 07:21:12 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2008-07-09 07:21:12 +0000 |
commit | e98750c3736410b451e2f4d8b5eeee830425226f (patch) | |
tree | c0f7096dd549a9d9e7285384a0fb917734091792 | |
parent | 4e999ebe038fa71e6530bfce0cbc271804a3a542 (diff) | |
download | ffmpeg-e98750c3736410b451e2f4d8b5eeee830425226f.tar.gz |
float_to_int16_sse2()
20% faster than sse
Originally committed as revision 14138 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 5d5c1b3f7a..dd6061cb09 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -2066,6 +2066,23 @@ static void float_to_int16_sse(int16_t *dst, const float *src, long len){ ); } +static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ + asm volatile( + "add %0 , %0 \n\t" + "lea (%2,%0,2) , %2 \n\t" + "add %0 , %1 \n\t" + "neg %0 \n\t" + "1: \n\t" + "cvtps2dq (%2,%0,2) , %%xmm0 \n\t" + "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t" + "packssdw %%xmm1 , %%xmm0 \n\t" + "movdqa %%xmm0 , (%1,%0) \n\t" + "add $16 , %0 \n\t" + " js 1b \n\t" + :"+r"(len), "+r"(dst), "+r"(src) + ); +} + extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); extern void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); @@ -2441,6 +2458,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->vector_fmul_reverse = vector_fmul_reverse_sse; c->vector_fmul_add_add = vector_fmul_add_add_sse; } + if(mm_flags & MM_SSE2){ + c->float_to_int16 = float_to_int16_sse2; + } if(mm_flags & MM_3DNOW) c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse } |