diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2013-04-12 21:07:01 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-04-17 00:03:25 +0200 |
commit | 1a4007964c106d01f46a5a7f03c1c41fd869b35c (patch) | |
tree | 61e4cfb2b459089c084b358947ad6588b01fd14b /libavutil/x86/float_dsp.asm | |
parent | 295ce83e2f06c352e11ac1918c3f1119f8b276ab (diff) | |
download | ffmpeg-1a4007964c106d01f46a5a7f03c1c41fd869b35c.tar.gz |
x86: float dsp: butterflies_float SSE
97c -> 49c
Some codecs could benefit from more unrolling, but AAC doesn't.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86/float_dsp.asm')
-rw-r--r-- | libavutil/x86/float_dsp.asm | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 004e6cf1fe..f0310ef1b8 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset %endif RET +;----------------------------------------------------------------------------- +; void ff_butterflies_float(float *src0, float *src1, int len); +;----------------------------------------------------------------------------- +INIT_XMM sse +cglobal butterflies_float, 3,3,3, src0, src1, len + movsxdifnidn lenq, lend + test lenq, lenq + jz .end + shl lenq, 2 + lea src0q, [src0q + lenq] + lea src1q, [src1q + lenq] + neg lenq +.loop: + mova m0, [src0q + lenq] + mova m1, [src1q + lenq] + subps m2, m0, m1 + addps m0, m0, m1 + mova [src1q + lenq], m2 + mova [src0q + lenq], m0 + add lenq, mmsize + jl .loop +.end: + REP_RET |