Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.

Conveniently (together with Justin's earlier patches), this makes our vorbis decoder entirely independent of dsputil.
author: Ronald S. Bultje <rsbultje@gmail.com> 2013-01-19 22:21:10 -0800
committer: Ronald S. Bultje <rsbultje@gmail.com> 2013-01-19 22:21:10 -0800
commit: fef906c77c09940a2fdad155b2adc05080e17eda (patch)
tree: 04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/x86
parent: aeaf268e52fc11c1f64914a319e0edddf1346d6a (diff)
download: ffmpeg-fef906c77c09940a2fdad155b2adc05080e17eda.tar.gz
3 files changed, 102 insertions, 63 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index b5a7694bcf..6069968a09 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER)            += x86/rv34dsp_init.o            \
 OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
 OBJS-$(CONFIG_VC1_DECODER)             += x86/vc1dsp_init.o
 OBJS-$(CONFIG_VIDEODSP)                += x86/videodsp_init.o
+OBJS-$(CONFIG_VORBIS_DECODER)          += x86/vorbisdsp_init.o
 OBJS-$(CONFIG_VP3DSP)                  += x86/vp3dsp_init.o
 OBJS-$(CONFIG_VP5_DECODER)             += x86/vp56dsp_init.o
 OBJS-$(CONFIG_VP6_DECODER)             += x86/vp56dsp_init.o
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 13f215135a..74f7df5002 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1829,65 +1829,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src,
     avg_pixels8_mmxext(dst, src, stride, 8);
 }
 
-static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
-{
-    int i;
-    __asm__ volatile ("pxor %%mm7, %%mm7":);
-    for (i = 0; i < blocksize; i += 2) {
-        __asm__ volatile (
-            "movq       %0, %%mm0   \n\t"
-            "movq       %1, %%mm1   \n\t"
-            "movq    %%mm0, %%mm2   \n\t"
-            "movq    %%mm1, %%mm3   \n\t"
-            "pfcmpge %%mm7, %%mm2   \n\t" // m <= 0.0
-            "pfcmpge %%mm7, %%mm3   \n\t" // a <= 0.0
-            "pslld     $31, %%mm2   \n\t" // keep only the sign bit
-            "pxor    %%mm2, %%mm1   \n\t"
-            "movq    %%mm3, %%mm4   \n\t"
-            "pand    %%mm1, %%mm3   \n\t"
-            "pandn   %%mm1, %%mm4   \n\t"
-            "pfadd   %%mm0, %%mm3   \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
-            "pfsub   %%mm4, %%mm0   \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
-            "movq    %%mm3, %1      \n\t"
-            "movq    %%mm0, %0      \n\t"
-            : "+m"(mag[i]), "+m"(ang[i])
-            :: "memory"
-        );
-    }
-    __asm__ volatile ("femms");
-}
-
-static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
-{
-    int i;
-
-    __asm__ volatile (
-        "movaps  %0, %%xmm5 \n\t"
-        :: "m"(ff_pdw_80000000[0])
-    );
-    for (i = 0; i < blocksize; i += 4) {
-        __asm__ volatile (
-            "movaps      %0, %%xmm0 \n\t"
-            "movaps      %1, %%xmm1 \n\t"
-            "xorps   %%xmm2, %%xmm2 \n\t"
-            "xorps   %%xmm3, %%xmm3 \n\t"
-            "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
-            "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
-            "andps   %%xmm5, %%xmm2 \n\t" // keep only the sign bit
-            "xorps   %%xmm2, %%xmm1 \n\t"
-            "movaps  %%xmm3, %%xmm4 \n\t"
-            "andps   %%xmm1, %%xmm3 \n\t"
-            "andnps  %%xmm1, %%xmm4 \n\t"
-            "addps   %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
-            "subps   %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
-            "movaps  %%xmm3, %1     \n\t"
-            "movaps  %%xmm0, %0     \n\t"
-            : "+m"(mag[i]), "+m"(ang[i])
-            :: "memory"
-        );
-    }
-}
-
 static void vector_clipf_sse(float *dst, const float *src,
                              float min, float max, int len)
 {
@@ -2238,8 +2179,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
         c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
         c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
     }
-
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
 #endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
@@ -2263,8 +2202,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags)
         }
     }
 
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
-
     c->vector_clipf = vector_clipf_sse;
 #endif /* HAVE_INLINE_ASM */
 
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
new file mode 100644
index 0000000000..5243095003
--- /dev/null
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavcodec/vorbisdsp.h"
+#include "dsputil_mmx.h" // for ff_pdw_80000000
+
+#if HAVE_INLINE_ASM
+#if ARCH_X86_32
+static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+{
+    int i;
+    __asm__ volatile ("pxor %%mm7, %%mm7":);
+    for (i = 0; i < blocksize; i += 2) {
+        __asm__ volatile (
+            "movq       %0, %%mm0   \n\t"
+            "movq       %1, %%mm1   \n\t"
+            "movq    %%mm0, %%mm2   \n\t"
+            "movq    %%mm1, %%mm3   \n\t"
+            "pfcmpge %%mm7, %%mm2   \n\t" // m <= 0.0
+            "pfcmpge %%mm7, %%mm3   \n\t" // a <= 0.0
+            "pslld     $31, %%mm2   \n\t" // keep only the sign bit
+            "pxor    %%mm2, %%mm1   \n\t"
+            "movq    %%mm3, %%mm4   \n\t"
+            "pand    %%mm1, %%mm3   \n\t"
+            "pandn   %%mm1, %%mm4   \n\t"
+            "pfadd   %%mm0, %%mm3   \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+            "pfsub   %%mm4, %%mm0   \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+            "movq    %%mm3, %1      \n\t"
+            "movq    %%mm0, %0      \n\t"
+            : "+m"(mag[i]), "+m"(ang[i])
+            :: "memory"
+        );
+    }
+    __asm__ volatile ("femms");
+}
+#endif
+
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+    int i;
+
+    __asm__ volatile (
+        "movaps  %0, %%xmm5 \n\t"
+        :: "m"(ff_pdw_80000000[0])
+    );
+    for (i = 0; i < blocksize; i += 4) {
+        __asm__ volatile (
+            "movaps      %0, %%xmm0 \n\t"
+            "movaps      %1, %%xmm1 \n\t"
+            "xorps   %%xmm2, %%xmm2 \n\t"
+            "xorps   %%xmm3, %%xmm3 \n\t"
+            "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+            "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+            "andps   %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+            "xorps   %%xmm2, %%xmm1 \n\t"
+            "movaps  %%xmm3, %%xmm4 \n\t"
+            "andps   %%xmm1, %%xmm3 \n\t"
+            "andnps  %%xmm1, %%xmm4 \n\t"
+            "addps   %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+            "subps   %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+            "movaps  %%xmm3, %1     \n\t"
+            "movaps  %%xmm0, %0     \n\t"
+            : "+m"(mag[i]), "+m"(ang[i])
+            :: "memory"
+        );
+    }
+}
+#endif
+
+void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
+{
+#if HAVE_INLINE_ASM
+    int mm_flags = av_get_cpu_flags();
+
+#if ARCH_X86_32
+    if (mm_flags & AV_CPU_FLAG_3DNOW)
+        dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+#endif /* ARCH_X86_32 */
+    if (mm_flags & AV_CPU_FLAG_SSE)
+        dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+#endif /* HAVE_INLINE_ASM */
+}
author	Ronald S. Bultje <rsbultje@gmail.com>	2013-01-19 22:21:10 -0800
committer	Ronald S. Bultje <rsbultje@gmail.com>	2013-01-19 22:21:10 -0800
commit	fef906c77c09940a2fdad155b2adc05080e17eda (patch)
tree	04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/x86
parent	aeaf268e52fc11c1f64914a319e0edddf1346d6a (diff)
download	ffmpeg-fef906c77c09940a2fdad155b2adc05080e17eda.tar.gz