aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2013-04-12 21:07:01 +0200
committerAnton Khirnov <anton@khirnov.net>2013-05-03 08:08:02 +0200
commit566b7a20fd0cab44d344329538d314454a0bcc2f (patch)
treef2ca45002f7479bdaa9e1cb4abc92c794b42e338 /libavutil
parentb333f3a22a4db4cf65d6a0457ac82ecbe7c7ac44 (diff)
downloadffmpeg-566b7a20fd0cab44d344329538d314454a0bcc2f.tar.gz
x86: float dsp: butterflies_float SSE
97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't.
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/x86/float_dsp.asm26
-rw-r--r--libavutil/x86/float_dsp_init.c3
2 files changed, 29 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 779339c575..10330ff336 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -252,3 +252,29 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
fld dword r0m
%endif
RET
+
+;-----------------------------------------------------------------------------
+; void ff_butterflies_float(float *src0, float *src1, int len);
+;-----------------------------------------------------------------------------
+INIT_XMM sse
+cglobal butterflies_float, 3,3,3, src0, src1, len
+%if ARCH_X86_64
+ movsxd lenq, lend
+%endif
+ test lenq, lenq
+ jz .end
+ shl lenq, 2
+ lea src0q, [src0q + lenq]
+ lea src1q, [src1q + lenq]
+ neg lenq
+.loop:
+ mova m0, [src0q + lenq]
+ mova m1, [src1q + lenq]
+ subps m2, m0, m1
+ addps m0, m0, m1
+ mova [src1q + lenq], m2
+ mova [src0q + lenq], m0
+ add lenq, mmsize
+ jl .loop
+.end:
+ REP_RET
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index b5e9af935f..34863013cc 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -53,6 +53,8 @@ void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
+void ff_butterflies_float_sse(float *src0, float *src1, int len);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -138,6 +140,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
+ fdsp->butterflies_float = ff_butterflies_float_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;