aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnton Khirnov <anton@khirnov.net>2016-08-09 20:20:00 +0200
committerAnton Khirnov <anton@khirnov.net>2016-09-22 09:47:52 +0200
commit12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5 (patch)
tree41d8ae02dbf5ac8d31abfc3332398d2153d366c1
parent683da86aabb4fbeddc3ead5fce737c63c0ee762c (diff)
downloadffmpeg-12004a9a7f20e44f4da2ee6c372d5e1794c8d6c5.tar.gz
audiodsp/x86: yasmify vector_clipf_sse
-rw-r--r--libavcodec/x86/Makefile1
-rw-r--r--libavcodec/x86/audiodsp.asm43
-rw-r--r--libavcodec/x86/audiodsp_init.c2
-rw-r--r--libavcodec/x86/audiodsp_mmx.c58
4 files changed, 44 insertions, 60 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 204c856340..872b7faddb 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
# GCC inline assembly optimizations
# subsystems
-MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o
MMX-OBJS-$(CONFIG_FDCTDSP) += x86/fdct.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o
diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm
index d7e63eb0cb..1bc7e32a68 100644
--- a/libavcodec/x86/audiodsp.asm
+++ b/libavcodec/x86/audiodsp.asm
@@ -135,3 +135,46 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
%else
VECTOR_CLIP_INT32 6, 1, 0, 0
%endif
+
+; void ff_vector_clipf_sse(float *dst, const float *src,
+; int len, float min, float max)
+INIT_XMM sse
+cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
+%if ARCH_X86_32
+ VBROADCASTSS m0, minm
+ VBROADCASTSS m1, maxm
+%elif WIN64
+ VBROADCASTSS m0, m3
+ VBROADCASTSS m1, maxm
+%else ; 64bit sysv
+ VBROADCASTSS m0, m0
+ VBROADCASTSS m1, m1
+%endif
+
+ movsxdifnidn lenq, lend
+
+.loop
+ mova m2, [srcq + 4 * lenq - 4 * mmsize]
+ mova m3, [srcq + 4 * lenq - 3 * mmsize]
+ mova m4, [srcq + 4 * lenq - 2 * mmsize]
+ mova m5, [srcq + 4 * lenq - 1 * mmsize]
+
+ maxps m2, m0
+ maxps m3, m0
+ maxps m4, m0
+ maxps m5, m0
+
+ minps m2, m1
+ minps m3, m1
+ minps m4, m1
+ minps m5, m1
+
+ mova [dstq + 4 * lenq - 4 * mmsize], m2
+ mova [dstq + 4 * lenq - 3 * mmsize], m3
+ mova [dstq + 4 * lenq - 2 * mmsize], m4
+ mova [dstq + 4 * lenq - 1 * mmsize], m5
+
+ sub lenq, mmsize
+ jg .loop
+
+ RET
diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c
index 8eb2e56bdd..23731158e5 100644
--- a/libavcodec/x86/audiodsp_init.c
+++ b/libavcodec/x86/audiodsp_init.c
@@ -49,7 +49,7 @@ av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
if (EXTERNAL_MMXEXT(cpu_flags))
c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext;
- if (INLINE_SSE(cpu_flags))
+ if (EXTERNAL_SSE(cpu_flags))
c->vector_clipf = ff_vector_clipf_sse;
if (EXTERNAL_SSE2(cpu_flags)) {
diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c
deleted file mode 100644
index 04cbb90706..0000000000
--- a/libavcodec/x86/audiodsp_mmx.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "libavutil/x86/asm.h"
-#include "audiodsp.h"
-
-#if HAVE_INLINE_ASM
-
-void ff_vector_clipf_sse(float *dst, const float *src,
- int len, float min, float max)
-{
- x86_reg i = (len - 16) * 4;
- __asm__ volatile (
- "movss %3, %%xmm4 \n\t"
- "movss %4, %%xmm5 \n\t"
- "shufps $0, %%xmm4, %%xmm4 \n\t"
- "shufps $0, %%xmm5, %%xmm5 \n\t"
- "1: \n\t"
- "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel
- "movaps 16(%2, %0), %%xmm1 \n\t"
- "movaps 32(%2, %0), %%xmm2 \n\t"
- "movaps 48(%2, %0), %%xmm3 \n\t"
- "maxps %%xmm4, %%xmm0 \n\t"
- "maxps %%xmm4, %%xmm1 \n\t"
- "maxps %%xmm4, %%xmm2 \n\t"
- "maxps %%xmm4, %%xmm3 \n\t"
- "minps %%xmm5, %%xmm0 \n\t"
- "minps %%xmm5, %%xmm1 \n\t"
- "minps %%xmm5, %%xmm2 \n\t"
- "minps %%xmm5, %%xmm3 \n\t"
- "movaps %%xmm0, (%1, %0) \n\t"
- "movaps %%xmm1, 16(%1, %0) \n\t"
- "movaps %%xmm2, 32(%1, %0) \n\t"
- "movaps %%xmm3, 48(%1, %0) \n\t"
- "sub $64, %0 \n\t"
- "jge 1b \n\t"
- : "+&r" (i)
- : "r" (dst), "r" (src), "m" (min), "m" (max)
- : "memory");
-}
-
-#endif /* HAVE_INLINE_ASM */