diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-19 22:21:10 -0800 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-19 22:21:10 -0800 |
commit | fef906c77c09940a2fdad155b2adc05080e17eda (patch) | |
tree | 04fe0b67be6917b07bfb94a6af45b669f3a66107 /libavcodec/x86 | |
parent | aeaf268e52fc11c1f64914a319e0edddf1346d6a (diff) | |
download | ffmpeg-fef906c77c09940a2fdad155b2adc05080e17eda.tar.gz |
Move vorbis_inverse_coupling from dsputil to vorbisdspcontext.
Conveniently (together with Justin's earlier patches), this makes
our vorbis decoder entirely independent of dsputil.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/Makefile | 1 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 63 | ||||
-rw-r--r-- | libavcodec/x86/vorbisdsp_init.c | 101 |
3 files changed, 102 insertions, 63 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index b5a7694bcf..6069968a09 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o +OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 13f215135a..74f7df5002 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1829,65 +1829,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, avg_pixels8_mmxext(dst, src, stride, 8); } -static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) -{ - int i; - __asm__ volatile ("pxor %%mm7, %%mm7":); - for (i = 0; i < blocksize; i += 2) { - __asm__ volatile ( - "movq %0, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm1, %%mm3 \n\t" - "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 - "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 - "pslld $31, %%mm2 \n\t" // keep only the sign bit - "pxor %%mm2, %%mm1 \n\t" - "movq %%mm3, %%mm4 \n\t" - "pand %%mm1, %%mm3 \n\t" - "pandn %%mm1, %%mm4 \n\t" - "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) - "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) - "movq %%mm3, %1 \n\t" - "movq %%mm0, %0 \n\t" - : "+m"(mag[i]), "+m"(ang[i]) - :: "memory" - ); - } - __asm__ volatile ("femms"); -} - -static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) -{ - int i; - - __asm__ volatile ( - "movaps %0, %%xmm5 \n\t" - :: "m"(ff_pdw_80000000[0]) - ); - for (i = 0; i < blocksize; i += 4) { - __asm__ volatile ( - "movaps %0, %%xmm0 \n\t" - "movaps %1, %%xmm1 \n\t" - "xorps %%xmm2, %%xmm2 \n\t" - "xorps %%xmm3, %%xmm3 \n\t" - "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 - "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 - "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit - "xorps %%xmm2, %%xmm1 \n\t" - "movaps %%xmm3, %%xmm4 \n\t" - "andps %%xmm1, %%xmm3 \n\t" - "andnps %%xmm1, %%xmm4 \n\t" - "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) - "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) - "movaps %%xmm3, %1 \n\t" - "movaps %%xmm0, %0 \n\t" - : "+m"(mag[i]), "+m"(ang[i]) - :: "memory" - ); - } -} - static void vector_clipf_sse(float *dst, const float *src, float min, float max, int len) { @@ -2238,8 +2179,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; } - - c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; #endif /* HAVE_INLINE_ASM */ #if HAVE_YASM @@ -2263,8 +2202,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) } } - c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; - c->vector_clipf = vector_clipf_sse; #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c new file mode 100644 index 0000000000..5243095003 --- /dev/null +++ b/libavcodec/x86/vorbisdsp_init.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/cpu.h" +#include "libavcodec/vorbisdsp.h" +#include "dsputil_mmx.h" // for ff_pdw_80000000 + +#if HAVE_INLINE_ASM +#if ARCH_X86_32 +static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) +{ + int i; + __asm__ volatile ("pxor %%mm7, %%mm7":); + for (i = 0; i < blocksize; i += 2) { + __asm__ volatile ( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 + "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 + "pslld $31, %%mm2 \n\t" // keep only the sign bit + "pxor %%mm2, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm1, %%mm3 \n\t" + "pandn %%mm1, %%mm4 \n\t" + "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) + "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) + "movq %%mm3, %1 \n\t" + "movq %%mm0, %0 \n\t" + : "+m"(mag[i]), "+m"(ang[i]) + :: "memory" + ); + } + __asm__ volatile ("femms"); +} +#endif + +static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) +{ + int i; + + __asm__ volatile ( + "movaps %0, %%xmm5 \n\t" + :: "m"(ff_pdw_80000000[0]) + ); + for (i = 0; i < blocksize; i += 4) { + __asm__ volatile ( + "movaps %0, %%xmm0 \n\t" + "movaps %1, %%xmm1 \n\t" + "xorps %%xmm2, %%xmm2 \n\t" + "xorps %%xmm3, %%xmm3 \n\t" + "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 + "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 + "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit + "xorps %%xmm2, %%xmm1 \n\t" + "movaps %%xmm3, %%xmm4 \n\t" + "andps %%xmm1, %%xmm3 \n\t" + "andnps %%xmm1, %%xmm4 \n\t" + "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) + "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) + "movaps %%xmm3, %1 \n\t" + "movaps %%xmm0, %0 \n\t" + : "+m"(mag[i]), "+m"(ang[i]) + :: "memory" + ); + } +} +#endif + +void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) +{ +#if HAVE_INLINE_ASM + int mm_flags = av_get_cpu_flags(); + +#if ARCH_X86_32 + if (mm_flags & AV_CPU_FLAG_3DNOW) + dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; +#endif /* ARCH_X86_32 */ + if (mm_flags & AV_CPU_FLAG_SSE) + dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; +#endif /* HAVE_INLINE_ASM */ +} |