diff options
author | Fabrice Bellard <fabrice@bellard.org> | 2002-10-28 00:34:08 +0000 |
---|---|---|
committer | Fabrice Bellard <fabrice@bellard.org> | 2002-10-28 00:34:08 +0000 |
commit | bb6f5690728486b71e280a295aef4c49d25ee758 (patch) | |
tree | bb4def001175ea9c928da56cc2eb8cbcc77488b1 /libavcodec/i386 | |
parent | 6d291820978ee1058f7154ed0b8cb7377c8bed51 (diff) | |
download | ffmpeg-bb6f5690728486b71e280a295aef4c49d25ee758.tar.gz |
new generic FFT/MDCT code for audio codecs
Originally committed as revision 1088 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386')
-rw-r--r-- | libavcodec/i386/fft_sse.c | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c new file mode 100644 index 0000000000..8e8e36b0f7 --- /dev/null +++ b/libavcodec/i386/fft_sse.c @@ -0,0 +1,128 @@ +/* + * FFT/MDCT transform with SSE optimizations + * Copyright (c) 2002 Fabrice Bellard. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include "../dsputil.h" +#include <math.h> + +#include <xmmintrin.h> + +static const float p1p1p1m1[4] __attribute__((aligned(16))) = + { 1.0, 1.0, 1.0, -1.0 }; + +static const float p1p1m1m1[4] __attribute__((aligned(16))) = + { 1.0, 1.0, -1.0, -1.0 }; + +#if 0 +static void print_v4sf(const char *str, __m128 a) +{ + float *p = (float *)&a; + printf("%s: %f %f %f %f\n", + str, p[0], p[1], p[2], p[3]); +} +#endif + +/* XXX: handle reverse case */ +void fft_calc_sse(FFTContext *s, FFTComplex *z) +{ + int ln = s->nbits; + int j, np, np2; + int nblocks, nloops; + register FFTComplex *p, *q; + FFTComplex *cptr, *cptr1; + int k; + + np = 1 << ln; + + { + __m128 *r, a, b, a1, c1, c2; + + r = (__m128 *)&z[0]; + c1 = *(__m128 *)p1p1m1m1; + c2 = *(__m128 *)p1p1p1m1; + j = (np >> 2); + do { + a = r[0]; + b = _mm_shuffle_ps(a, a, _MM_SHUFFLE(1, 0, 3, 2)); + a = _mm_mul_ps(a, c1); + /* do the pass 0 butterfly */ + a = _mm_add_ps(a, b); + + a1 = r[1]; + b = _mm_shuffle_ps(a1, a1, _MM_SHUFFLE(1, 0, 3, 2)); + a1 = _mm_mul_ps(a1, c1); + /* do the pass 0 butterfly */ + b = _mm_add_ps(a1, b); + + /* multiply third by -i */ + b = _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 3, 1, 0)); + b = _mm_mul_ps(b, c2); + + /* do the pass 1 butterfly */ + r[0] = _mm_add_ps(a, b); + r[1] = _mm_sub_ps(a, b); + r += 2; + } while (--j != 0); + } + /* pass 2 .. ln-1 */ + + nblocks = np >> 3; + nloops = 1 << 2; + np2 = np >> 1; + + cptr1 = s->exptab1; + do { + p = z; + q = z + nloops; + j = nblocks; + do { + cptr = cptr1; + k = nloops >> 1; + do { + __m128 a, b, c, t1, t2; + + a = *(__m128 *)p; + b = *(__m128 *)q; + + /* complex mul */ + c = *(__m128 *)cptr; + /* cre*re cim*re */ + t1 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0))); + c = *(__m128 *)(cptr + 2); + /* -cim*im cre*im */ + t2 = _mm_mul_ps(c, + _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1))); + b = _mm_add_ps(t1, t2); + + /* butterfly */ + *(__m128 *)p = _mm_add_ps(a, b); + *(__m128 *)q = _mm_sub_ps(a, b); + + p += 2; + q += 2; + cptr += 4; + } while (--k); + + p += nloops; + q += nloops; + } while (--j); + cptr1 += nloops * 2; + nblocks = nblocks >> 1; + nloops = nloops << 1; + } while (nblocks != 0); +} |