diff options
author | Janne Grunau <janne-libav@jannau.net> | 2014-04-28 17:56:43 +0200 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2014-05-15 18:17:02 +0200 |
commit | d3f5b94762fb803c0f3b29f9ad6c5eaa813998ba (patch) | |
tree | 62d2b426d38f8540bdcfb5e126666c89e4cdb279 /libavcodec/opus_imdct.c | |
parent | 7c5ca546a0747a20c7f7fb5550455c3042699ee9 (diff) | |
download | ffmpeg-d3f5b94762fb803c0f3b29f9ad6c5eaa813998ba.tar.gz |
aarch64: opus NEON iMDCT and FFT
Opus celt decoding 11% faster and the iMDCT over 2.5 times faster on
Apple's A7.
Diffstat (limited to 'libavcodec/opus_imdct.c')
-rw-r--r-- | libavcodec/opus_imdct.c | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/libavcodec/opus_imdct.c b/libavcodec/opus_imdct.c index 7bbaa35227..38674edd32 100644 --- a/libavcodec/opus_imdct.c +++ b/libavcodec/opus_imdct.c @@ -25,12 +25,19 @@ #include <float.h> #include <math.h> +#include <stddef.h> + +#include "config.h" #include "libavutil/attributes.h" #include "libavutil/common.h" -#include "fft.h" +#include "avfft.h" #include "opus.h" +#include "opus_imdct.h" + +// minimal iMDCT size to make SIMD opts easier +#define CELT_MIN_IMDCT_SIZE 120 // complex c = a * b #define CMUL3(cre, cim, are, aim, bre, bim) \ @@ -59,18 +66,6 @@ do { \ (d).im = -ri + ir; \ } while (0) -struct CeltIMDCTContext { - int fft_n; - int len2; - int len4; - - FFTComplex *tmp; - - FFTComplex *twiddle_exptab; - - FFTComplex *exptab[6]; -}; - av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps) { CeltIMDCTContext *s = *ps; @@ -89,6 +84,9 @@ av_cold void ff_celt_imdct_uninit(CeltIMDCTContext **ps) av_freep(ps); } +static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, + ptrdiff_t stride, float scale); + av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N) { CeltIMDCTContext *s; @@ -96,7 +94,7 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N) int len = 2 * len2; int i, j; - if (len2 > CELT_MAX_FRAME_SIZE) + if (len2 > CELT_MAX_FRAME_SIZE || len2 < CELT_MIN_IMDCT_SIZE) return AVERROR(EINVAL); s = av_mallocz(sizeof(*s)); @@ -136,6 +134,11 @@ av_cold int ff_celt_imdct_init(CeltIMDCTContext **ps, int N) for (j = 15; j < 19; j++) s->exptab[0][j] = s->exptab[0][j - 15]; + s->imdct_half = celt_imdct_half; + + if (ARCH_AARCH64) + ff_celt_imdct_init_aarch64(s); + *ps = s; return 0; @@ -144,7 +147,7 @@ fail: return AVERROR(ENOMEM); } -static void fft5(FFTComplex *out, const FFTComplex *in, int stride) +static void fft5(FFTComplex *out, const FFTComplex *in, ptrdiff_t stride) { // [0] = exp(2 * i * pi / 5), [1] = exp(2 * i * pi * 2 / 5) static const FFTComplex fact[] = { { 0.30901699437494745, 0.95105651629515353 }, @@ -177,7 +180,7 @@ static void fft5(FFTComplex *out, const FFTComplex *in, int stride) out[4].im = in[0].im + z[0][3].im + z[1][2].im + z[2][1].im + z[3][0].im; } -static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int stride) +static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, ptrdiff_t stride) { const FFTComplex *exptab = s->exptab[0]; FFTComplex tmp[5]; @@ -212,7 +215,8 @@ static void fft15(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, in /* * FFT of the length 15 * (2^N) */ -static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, int N, int stride) +static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, + int N, ptrdiff_t stride) { if (N) { const FFTComplex *exptab = s->exptab[N]; @@ -237,8 +241,8 @@ static void fft_calc(CeltIMDCTContext *s, FFTComplex *out, const FFTComplex *in, fft15(s, out, in, stride); } -void ff_celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, - int stride, float scale) +static void celt_imdct_half(CeltIMDCTContext *s, float *dst, const float *src, + ptrdiff_t stride, float scale) { FFTComplex *z = (FFTComplex *)dst; const int len8 = s->len4 / 2; |