aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2008-08-14 04:40:46 +0000
committerLoren Merritt <lorenm@u.washington.edu>2008-08-14 04:40:46 +0000
commitebceaa1cd5f17e7fea0ac87dde0f45fd360ff457 (patch)
tree6a350d3953e3e919ea1f428b434c7ad57e79fb33
parentee4675373988bccc96a39f48ffad200926778ad2 (diff)
downloadffmpeg-ebceaa1cd5f17e7fea0ac87dde0f45fd360ff457.tar.gz
gcc chokes on the 7 registers needed for float_to_int16_interleave6 (even inside HAVE_7REGS), so write it in yasm
Originally committed as revision 14749 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/i386/dsputil_mmx.c62
2 files changed, 20 insertions, 43 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index df08e02605..c8d8c63739 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -393,6 +393,7 @@ OBJS-$(HAVE_YASM) += i386/fft_mmx.o \
i386/fft_sse.o \
i386/fft_3dn.o \
i386/fft_3dn2.o \
+ i386/dsputil_yasm.o \
OBJS-$(CONFIG_GPL) += i386/idct_mmx.o
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 58d6f09f7a..0ff06d48cb 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2297,50 +2297,16 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
);
}
-#ifdef HAVE_7REGS
-#define FLOAT_TO_INT16_INTERLEAVE6(cpu, cvtps2pi, pswapd) \
-static void float_to_int16_interleave6_##cpu(int16_t *dst, const float **src, int len){\
- const float *src0 = src[0];\
- asm volatile(\
- "1: \n"\
- cvtps2pi" (%2), %%mm0 \n"\
- cvtps2pi" (%2,%3), %%mm1 \n"\
- cvtps2pi" (%2,%4), %%mm2 \n"\
- cvtps2pi" (%2,%5), %%mm3 \n"\
- cvtps2pi" (%2,%6), %%mm4 \n"\
- cvtps2pi" (%2,%7), %%mm5 \n"\
- "packssdw %%mm3, %%mm0 \n"\
- "packssdw %%mm4, %%mm1 \n"\
- "packssdw %%mm5, %%mm2 \n"\
- pswapd" %%mm0, %%mm3 \n"\
- "punpcklwd %%mm1, %%mm0 \n"\
- "punpckhwd %%mm2, %%mm1 \n"\
- "punpcklwd %%mm3, %%mm2 \n"\
- pswapd" %%mm0, %%mm3 \n"\
- "punpckldq %%mm2, %%mm0 \n"\
- "punpckhdq %%mm1, %%mm2 \n"\
- "punpckldq %%mm3, %%mm1 \n"\
- "movq %%mm0, (%1) \n"\
- "movq %%mm2, 16(%1) \n"\
- "movq %%mm1, 8(%1) \n"\
- "add $8, %2 \n"\
- "add $24, %1 \n"\
- "sub $2, %0 \n"\
- "jg 1b \n"\
- "emms \n"\
- :"+g"(len), "+r"(dst), "+r"(src0)\
- :"r"(4*(src[1]-src0)), "r"(4*(src[2]-src0)),\
- "r"(4*(src[3]-src0)), "r"(4*(src[4]-src0)),\
- "r"(4*(src[5]-src0))\
- );\
-}
-FLOAT_TO_INT16_INTERLEAVE6(sse, "cvtps2pi", "pshufw $0x4e,")
-FLOAT_TO_INT16_INTERLEAVE6(3dnow, "pf2id", "pswapd")
+#ifdef HAVE_YASM
+void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
#else
-#define float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
-#define float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
+#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
+#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
+#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
#endif
-#define float_to_int16_interleave6_sse2 float_to_int16_interleave6_sse
+#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
@@ -2370,7 +2336,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
:"+r"(len), "+r"(dst), "+r"(src0), "+r"(src1)\
);\
}else if(channels==6){\
- float_to_int16_interleave6_##cpu(dst, src, len);\
+ ff_float_to_int16_interleave6_##cpu(dst, src, len);\
}else\
float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
}
@@ -2423,6 +2389,13 @@ FLOAT_TO_INT16_INTERLEAVE(sse2,
"js 1b \n"
)
+static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
+ if(channels==6)
+ ff_float_to_int16_interleave6_3dn2(dst, src, len);
+ else
+ float_to_int16_interleave_3dnow(dst, src, len, channels);
+}
+
extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
@@ -2868,6 +2841,9 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if(mm_flags & MM_3DNOWEXT){
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
c->vector_fmul_window = vector_fmul_window_3dnow2;
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
+ }
}
if(mm_flags & MM_SSE){
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;