aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/fmtconvert_mmx.c
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2011-10-10 00:43:08 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2011-10-21 10:13:05 -0400
commitaad3429d4e34b74e4eb0b37b17f32804e217cf02 (patch)
tree8f80e3579d489b84296ec60165002a13723dc334 /libavcodec/x86/fmtconvert_mmx.c
parent4e8e2624767f4af0eaa932c543d072fed96fd586 (diff)
downloadffmpeg-aad3429d4e34b74e4eb0b37b17f32804e217cf02.tar.gz
fmtconvert: port float_to_int16_interleave() 2-channel x86 inline asm to yasm
Diffstat (limited to 'libavcodec/x86/fmtconvert_mmx.c')
-rw-r--r--libavcodec/x86/fmtconvert_mmx.c69
1 files changed, 9 insertions, 60 deletions
diff --git a/libavcodec/x86/fmtconvert_mmx.c b/libavcodec/x86/fmtconvert_mmx.c
index 86957b45ff..17079d3c82 100644
--- a/libavcodec/x86/fmtconvert_mmx.c
+++ b/libavcodec/x86/fmtconvert_mmx.c
@@ -35,13 +35,17 @@ void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
+void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
+void ff_float_to_int16_interleave2_sse (int16_t *dst, const float **src, long len);
+void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
+
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
-#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
+#define FLOAT_TO_INT16_INTERLEAVE(cpu) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
DECLARE_ALIGNED(16, int16_t, tmp)[len];\
@@ -57,71 +61,16 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
if(channels==1)\
ff_float_to_int16_##cpu(dst, src[0], len);\
else if(channels==2){\
- x86_reg reglen = len; \
- const float *src0 = src[0];\
- const float *src1 = src[1];\
- __asm__ volatile(\
- "shl $2, %0 \n"\
- "add %0, %1 \n"\
- "add %0, %2 \n"\
- "add %0, %3 \n"\
- "neg %0 \n"\
- body\
- :"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
- );\
+ ff_float_to_int16_interleave2_##cpu(dst, src, len);\
}else if(channels==6){\
ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
-FLOAT_TO_INT16_INTERLEAVE(3dnow,
- "1: \n"
- "pf2id (%2,%0), %%mm0 \n"
- "pf2id 8(%2,%0), %%mm1 \n"
- "pf2id (%3,%0), %%mm2 \n"
- "pf2id 8(%3,%0), %%mm3 \n"
- "packssdw %%mm1, %%mm0 \n"
- "packssdw %%mm3, %%mm2 \n"
- "movq %%mm0, %%mm1 \n"
- "punpcklwd %%mm2, %%mm0 \n"
- "punpckhwd %%mm2, %%mm1 \n"
- "movq %%mm0, (%1,%0)\n"
- "movq %%mm1, 8(%1,%0)\n"
- "add $16, %0 \n"
- "js 1b \n"
- "femms \n"
-)
-
-FLOAT_TO_INT16_INTERLEAVE(sse,
- "1: \n"
- "cvtps2pi (%2,%0), %%mm0 \n"
- "cvtps2pi 8(%2,%0), %%mm1 \n"
- "cvtps2pi (%3,%0), %%mm2 \n"
- "cvtps2pi 8(%3,%0), %%mm3 \n"
- "packssdw %%mm1, %%mm0 \n"
- "packssdw %%mm3, %%mm2 \n"
- "movq %%mm0, %%mm1 \n"
- "punpcklwd %%mm2, %%mm0 \n"
- "punpckhwd %%mm2, %%mm1 \n"
- "movq %%mm0, (%1,%0)\n"
- "movq %%mm1, 8(%1,%0)\n"
- "add $16, %0 \n"
- "js 1b \n"
- "emms \n"
-)
-
-FLOAT_TO_INT16_INTERLEAVE(sse2,
- "1: \n"
- "cvtps2dq (%2,%0), %%xmm0 \n"
- "cvtps2dq (%3,%0), %%xmm1 \n"
- "packssdw %%xmm1, %%xmm0 \n"
- "movhlps %%xmm0, %%xmm1 \n"
- "punpcklwd %%xmm1, %%xmm0 \n"
- "movdqa %%xmm0, (%1,%0) \n"
- "add $16, %0 \n"
- "js 1b \n"
-)
+FLOAT_TO_INT16_INTERLEAVE(3dnow)
+FLOAT_TO_INT16_INTERLEAVE(sse)
+FLOAT_TO_INT16_INTERLEAVE(sse2)
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
if(channels==6)