diff options
-rw-r--r-- | libavcodec/dcadec.c | 3 | ||||
-rw-r--r-- | libavcodec/dcadsp.c | 1 | ||||
-rw-r--r-- | libavcodec/dcadsp.h | 1 | ||||
-rw-r--r-- | libavcodec/x86/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/x86/dca.h | 52 | ||||
-rw-r--r-- | libavcodec/x86/dcadsp.asm | 90 | ||||
-rw-r--r-- | libavcodec/x86/dcadsp_init.c | 47 |
7 files changed, 196 insertions, 0 deletions
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c index b6df3b9f2f..6ffb040aaa 100644 --- a/libavcodec/dcadec.c +++ b/libavcodec/dcadec.c @@ -50,6 +50,9 @@ #if ARCH_ARM # include "arm/dca.h" #endif +#if ARCH_X86 +# include "x86/dca.h" +#endif //#define TRACE diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index b98486422b..148f6dd607 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -88,4 +88,5 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) s->qmf_32_subbands = dca_qmf_32_subbands; s->int8x8_fmul_int32 = int8x8_fmul_int32_c; if (ARCH_ARM) ff_dcadsp_init_arm(s); + if (ARCH_X86) ff_dcadsp_init_x86(s); } diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h index 0f79dd643e..e2ad09adf6 100644 --- a/libavcodec/dcadsp.h +++ b/libavcodec/dcadsp.h @@ -36,5 +36,6 @@ typedef struct DCADSPContext { void ff_dcadsp_init(DCADSPContext *s); void ff_dcadsp_init_arm(DCADSPContext *s); +void ff_dcadsp_init_x86(DCADSPContext *s); #endif /* AVCODEC_DCADSP_H */ diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 6f4935bc3e..f985525061 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -4,6 +4,7 @@ OBJS += x86/constants.o \ OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp_init.o OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp.o +OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp_init.o OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \ @@ -54,6 +55,7 @@ YASM-OBJS += x86/deinterlace.o \ YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o +YASM-OBJS-$(CONFIG_DCA_DECODER) += x86/dcadsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \ x86/fpel.o \ diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h new file mode 100644 index 0000000000..c14e94f4aa --- /dev/null +++ b/libavcodec/x86/dca.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if ARCH_X86_64 +# include "libavutil/x86/asm.h" +# include "libavutil/mem.h" + +# define int8x8_fmul_int32 int8x8_fmul_int32 +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, + float *dst, const int8_t *src, int scale) +{ + DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000; + __asm__ volatile ( + "cvtsi2ss %2, %%xmm0 \n\t" + "mulss %3, %%xmm0 \n\t" + "movq (%1), %%xmm1 \n\t" + "punpcklbw %%xmm1, %%xmm1 \n\t" + "movaps %%xmm1, %%xmm2 \n\t" + "punpcklwd %%xmm1, %%xmm1 \n\t" + "punpckhwd %%xmm2, %%xmm2 \n\t" + "psrad $24, %%xmm1 \n\t" + "psrad $24, %%xmm2 \n\t" + "shufps $0, %%xmm0, %%xmm0 \n\t" + "cvtdq2ps %%xmm1, %%xmm1 \n\t" + "cvtdq2ps %%xmm2, %%xmm2 \n\t" + "mulps %%xmm0, %%xmm1 \n\t" + "mulps %%xmm0, %%xmm2 \n\t" + "movaps %%xmm1, 0(%0) \n\t" + "movaps %%xmm2, 16(%0) \n\t" + :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16) + XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2") + ); +} + +#endif /* ARCH_X86_64 */ diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm new file mode 100644 index 0000000000..214f5146aa --- /dev/null +++ b/libavcodec/x86/dcadsp.asm @@ -0,0 +1,90 @@ +;****************************************************************************** +;* SSE-optimized functions for the DCA decoder +;* Copyright (C) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA +pf_inv16: times 4 dd 0x3D800000 ; 1/16 + +SECTION_TEXT + +; void int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale) +%macro INT8X8_FMUL_INT32 0 +cglobal int8x8_fmul_int32, 3,3,5, dst, src, scale + cvtsi2ss m0, scalem + mulss m0, [pf_inv16] + shufps m0, m0, 0 +%if cpuflag(sse2) +%if cpuflag(sse4) + pmovsxbd m1, [srcq+0] + pmovsxbd m2, [srcq+4] +%else + movq m1, [srcq] + punpcklbw m1, m1 + mova m2, m1 + punpcklwd m1, m1 + punpckhwd m2, m2 + psrad m1, 24 + psrad m2, 24 +%endif + cvtdq2ps m1, m1 + cvtdq2ps m2, m2 +%else + movd mm0, [srcq+0] + movd mm1, [srcq+4] + punpcklbw mm0, mm0 + punpcklbw mm1, mm1 + movq mm2, mm0 + movq mm3, mm1 + punpcklwd mm0, mm0 + punpcklwd mm1, mm1 + punpckhwd mm2, mm2 + punpckhwd mm3, mm3 + psrad mm0, 24 + psrad mm1, 24 + psrad mm2, 24 + psrad mm3, 24 + cvtpi2ps m1, mm0 + cvtpi2ps m2, mm1 + cvtpi2ps m3, mm2 + cvtpi2ps m4, mm3 + shufps m0, m0, 0 + emms + shufps m1, m3, q1010 + shufps m2, m4, q1010 +%endif + mulps m1, m0 + mulps m2, m0 + mova [dstq+ 0], m1 + mova [dstq+16], m2 + REP_RET +%endmacro + +%if ARCH_X86_32 +INIT_XMM sse +INT8X8_FMUL_INT32 +%endif + +INIT_XMM sse2 +INT8X8_FMUL_INT32 + +INIT_XMM sse4 +INT8X8_FMUL_INT32 diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c new file mode 100644 index 0000000000..976d8a3ba3 --- /dev/null +++ b/libavcodec/x86/dcadsp_init.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2012-2014 Christophe Gisquet <christophe.gisquet@gmail.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/dcadsp.h" + +void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale); +void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale); +void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale); + +av_cold void ff_dcadsp_init_x86(DCADSPContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_SSE(cpu_flags)) { +#if ARCH_X86_32 + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse; +#endif + } + + if (EXTERNAL_SSE2(cpu_flags)) { + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) { + s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4; + } +} |