diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-04-04 02:15:12 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-04-04 02:15:12 +0200 |
commit | 2cae9809e2d59c7336fc2cccb97b82c7f764868a (patch) | |
tree | 823d962b7237e515e14e2679084e5981d5b808a5 /libavcodec | |
parent | 3c9bfb336867ccd32a6e8490930961bcc14b3fdc (diff) | |
parent | 906fd03070c7dc39b4c937befa2c3559bccf7ba7 (diff) | |
download | ffmpeg-2cae9809e2d59c7336fc2cccb97b82c7f764868a.tar.gz |
Merge remote branch 'qatar/master'
* qatar/master:
fate: fix partial run when no samples path is specified
ARM: NEON fixed-point forward MDCT
ARM: NEON fixed-point FFT
lavf: bump minor version and add an APIChanges entry for avio changes
avio: simplify url_open_dyn_buf_internal by using avio_alloc_context()
avio: make url_fdopen internal.
avio: make url_open_dyn_packet_buf internal.
avio: avio_ prefix for url_close_dyn_buf
avio: avio_ prefix for url_open_dyn_buf
avio: introduce an AVIOContext.seekable field
ac3enc: use generic fixed-point mdct
lavfi: add fade filter
Change yadif to not use out of picture lines.
lavc: deprecate AVCodecContext.antialias_algo
lavc: mark mb_qmin/mb_qmax for removal on next major bump.
Conflicts:
doc/filters.texi
libavcodec/ac3enc_fixed.h
libavcodec/ac3enc_float.h
libavfilter/Makefile
libavfilter/allfilters.c
libavfilter/vf_fade.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/ac3enc.c | 26 | ||||
-rw-r--r-- | libavcodec/ac3enc_fixed.c | 298 | ||||
-rw-r--r-- | libavcodec/ac3enc_fixed.h | 61 | ||||
-rw-r--r-- | libavcodec/ac3enc_float.c | 11 | ||||
-rw-r--r-- | libavcodec/ac3enc_float.h | 47 | ||||
-rw-r--r-- | libavcodec/arm/Makefile | 3 | ||||
-rw-r--r-- | libavcodec/arm/fft_fixed_init_arm.c | 42 | ||||
-rw-r--r-- | libavcodec/arm/fft_fixed_neon.S | 261 | ||||
-rw-r--r-- | libavcodec/arm/mdct_fixed_neon.S | 195 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 11 | ||||
-rw-r--r-- | libavcodec/fft.c | 1 | ||||
-rw-r--r-- | libavcodec/fft.h | 4 | ||||
-rw-r--r-- | libavcodec/options.c | 4 | ||||
-rw-r--r-- | libavcodec/version.h | 6 |
14 files changed, 545 insertions, 425 deletions
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index bc6b9c8766..5f2868f5e6 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -29,6 +29,8 @@ //#define DEBUG //#define ASSERT_LEVEL 2 +#include <stdint.h> + #include "libavutil/audioconvert.h" #include "libavutil/avassert.h" #include "libavutil/crc.h" @@ -39,6 +41,7 @@ #include "ac3dsp.h" #include "ac3.h" #include "audioconvert.h" +#include "fft.h" #ifndef CONFIG_AC3ENC_FLOAT @@ -55,16 +58,22 @@ #define AC3_REMATRIXING_NONE 1 #define AC3_REMATRIXING_ALWAYS 3 -/** Scale a float value by 2^bits and convert to an integer. */ -#define SCALE_FLOAT(a, bits) lrintf((a) * (float)(1 << (bits))) - - #if CONFIG_AC3ENC_FLOAT -#include "ac3enc_float.h" +#define MAC_COEF(d,a,b) ((d)+=(a)*(b)) +typedef float SampleType; +typedef float CoefType; +typedef float CoefSumType; #else -#include "ac3enc_fixed.h" +#define MAC_COEF(d,a,b) MAC64(d,a,b) +typedef int16_t SampleType; +typedef int32_t CoefType; +typedef int64_t CoefSumType; #endif +typedef struct AC3MDCTContext { + const SampleType *window; ///< MDCT window function + FFTContext fft; ///< FFT context for MDCT calculation +} AC3MDCTContext; /** * Encoding Options used by AVOption. @@ -279,8 +288,6 @@ static av_cold void mdct_end(AC3MDCTContext *mdct); static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, int nbits); -static void mdct512(AC3MDCTContext *mdct, CoefType *out, SampleType *in); - static void apply_window(DSPContext *dsp, SampleType *output, const SampleType *input, const SampleType *window, unsigned int len); @@ -386,7 +393,8 @@ static void apply_mdct(AC3EncodeContext *s) block->coeff_shift[ch] = normalize_samples(s); - mdct512(&s->mdct, block->mdct_coef[ch], s->windowed_samples); + s->mdct.fft.mdct_calcw(&s->mdct.fft, block->mdct_coef[ch], + s->windowed_samples); } } } diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index e7942abe99..4d8b756b9b 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -26,54 +26,17 @@ * fixed-point AC-3 encoder. */ +#define CONFIG_FFT_FLOAT 0 #undef CONFIG_AC3ENC_FLOAT #include "ac3enc.c" -/** Scale a float value by 2^15, convert to an integer, and clip to range -32767..32767. */ -#define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767) - - /** * Finalize MDCT and free allocated memory. */ static av_cold void mdct_end(AC3MDCTContext *mdct) { - mdct->nbits = 0; - av_freep(&mdct->costab); - av_freep(&mdct->sintab); - av_freep(&mdct->xcos1); - av_freep(&mdct->xsin1); - av_freep(&mdct->rot_tmp); - av_freep(&mdct->cplx_tmp); -} - - -/** - * Initialize FFT tables. - * @param ln log2(FFT size) - */ -static av_cold int fft_init(AVCodecContext *avctx, AC3MDCTContext *mdct, int ln) -{ - int i, n, n2; - float alpha; - - n = 1 << ln; - n2 = n >> 1; - - FF_ALLOC_OR_GOTO(avctx, mdct->costab, n2 * sizeof(*mdct->costab), fft_alloc_fail); - FF_ALLOC_OR_GOTO(avctx, mdct->sintab, n2 * sizeof(*mdct->sintab), fft_alloc_fail); - - for (i = 0; i < n2; i++) { - alpha = 2.0 * M_PI * i / n; - mdct->costab[i] = FIX15(cos(alpha)); - mdct->sintab[i] = FIX15(sin(alpha)); - } - - return 0; -fft_alloc_fail: - mdct_end(mdct); - return AVERROR(ENOMEM); + ff_fft_end(&mdct->fft); } @@ -84,167 +47,9 @@ fft_alloc_fail: static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, int nbits) { - int i, n, n4, ret; - - n = 1 << nbits; - n4 = n >> 2; - - mdct->nbits = nbits; - - ret = fft_init(avctx, mdct, nbits - 2); - if (ret) - return ret; - + int ret = ff_mdct_init(&mdct->fft, nbits, 0, 1.0); mdct->window = ff_ac3_window; - - FF_ALLOC_OR_GOTO(avctx, mdct->xcos1, n4 * sizeof(*mdct->xcos1), mdct_alloc_fail); - FF_ALLOC_OR_GOTO(avctx, mdct->xsin1, n4 * sizeof(*mdct->xsin1), mdct_alloc_fail); - FF_ALLOC_OR_GOTO(avctx, mdct->rot_tmp, n * sizeof(*mdct->rot_tmp), mdct_alloc_fail); - FF_ALLOC_OR_GOTO(avctx, mdct->cplx_tmp, n4 * sizeof(*mdct->cplx_tmp), mdct_alloc_fail); - - for (i = 0; i < n4; i++) { - float alpha = 2.0 * M_PI * (i + 1.0 / 8.0) / n; - mdct->xcos1[i] = FIX15(-cos(alpha)); - mdct->xsin1[i] = FIX15(-sin(alpha)); - } - - return 0; -mdct_alloc_fail: - mdct_end(mdct); - return AVERROR(ENOMEM); -} - - -/** Butterfly op */ -#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ -{ \ - int ax, ay, bx, by; \ - bx = pre1; \ - by = pim1; \ - ax = qre1; \ - ay = qim1; \ - pre = (bx + ax) >> 1; \ - pim = (by + ay) >> 1; \ - qre = (bx - ax) >> 1; \ - qim = (by - ay) >> 1; \ -} - - -/** Complex multiply */ -#define CMUL(pre, pim, are, aim, bre, bim, rshift) \ -{ \ - pre = (MUL16(are, bre) - MUL16(aim, bim)) >> rshift; \ - pim = (MUL16(are, bim) + MUL16(bre, aim)) >> rshift; \ -} - - -/** - * Calculate a 2^n point complex FFT on 2^ln points. - * @param z complex input/output samples - * @param ln log2(FFT size) - */ -static void fft(AC3MDCTContext *mdct, IComplex *z, int ln) -{ - int j, l, np, np2; - int nblocks, nloops; - register IComplex *p,*q; - int tmp_re, tmp_im; - - np = 1 << ln; - - /* reverse */ - for (j = 0; j < np; j++) { - int k = av_reverse[j] >> (8 - ln); - if (k < j) - FFSWAP(IComplex, z[k], z[j]); - } - - /* pass 0 */ - - p = &z[0]; - j = np >> 1; - do { - BF(p[0].re, p[0].im, p[1].re, p[1].im, - p[0].re, p[0].im, p[1].re, p[1].im); - p += 2; - } while (--j); - - /* pass 1 */ - - p = &z[0]; - j = np >> 2; - do { - BF(p[0].re, p[0].im, p[2].re, p[2].im, - p[0].re, p[0].im, p[2].re, p[2].im); - BF(p[1].re, p[1].im, p[3].re, p[3].im, - p[1].re, p[1].im, p[3].im, -p[3].re); - p+=4; - } while (--j); - - /* pass 2 .. ln-1 */ - - nblocks = np >> 3; - nloops = 1 << 2; - np2 = np >> 1; - do { - p = z; - q = z + nloops; - for (j = 0; j < nblocks; j++) { - BF(p->re, p->im, q->re, q->im, - p->re, p->im, q->re, q->im); - p++; - q++; - for(l = nblocks; l < np2; l += nblocks) { - CMUL(tmp_re, tmp_im, mdct->costab[l], -mdct->sintab[l], q->re, q->im, 15); - BF(p->re, p->im, q->re, q->im, - p->re, p->im, tmp_re, tmp_im); - p++; - q++; - } - p += nloops; - q += nloops; - } - nblocks = nblocks >> 1; - nloops = nloops << 1; - } while (nblocks); -} - - -/** - * Calculate a 512-point MDCT - * @param out 256 output frequency coefficients - * @param in 512 windowed input audio samples - */ -static void mdct512(AC3MDCTContext *mdct, int32_t *out, int16_t *in) -{ - int i, re, im, n, n2, n4; - int16_t *rot = mdct->rot_tmp; - IComplex *x = mdct->cplx_tmp; - - n = 1 << mdct->nbits; - n2 = n >> 1; - n4 = n >> 2; - - /* shift to simplify computations */ - for (i = 0; i <n4; i++) - rot[i] = -in[i + 3*n4]; - memcpy(&rot[n4], &in[0], 3*n4*sizeof(*in)); - - /* pre rotation */ - for (i = 0; i < n4; i++) { - re = ((int)rot[ 2*i] - (int)rot[ n-1-2*i]) >> 1; - im = -((int)rot[n2+2*i] - (int)rot[n2-1-2*i]) >> 1; - CMUL(x[i].re, x[i].im, re, im, -mdct->xcos1[i], mdct->xsin1[i], 15); - } - - fft(mdct, x, mdct->nbits - 2); - - /* post rotation */ - for (i = 0; i < n4; i++) { - re = x[i].re; - im = x[i].im; - CMUL(out[n2-1-2*i], out[2*i], re, im, mdct->xsin1[i], mdct->xcos1[i], 0); - } + return ret; } @@ -304,101 +109,6 @@ static void scale_coefficients(AC3EncodeContext *s) } -#ifdef TEST -/*************************************************************************/ -/* TEST */ - -#include "libavutil/lfg.h" - -#define MDCT_NBITS 9 -#define MDCT_SAMPLES (1 << MDCT_NBITS) -#define FN (MDCT_SAMPLES/4) - - -static void fft_test(AC3MDCTContext *mdct, AVLFG *lfg) -{ - IComplex in[FN], in1[FN]; - int k, n, i; - float sum_re, sum_im, a; - - for (i = 0; i < FN; i++) { - in[i].re = av_lfg_get(lfg) % 65535 - 32767; - in[i].im = av_lfg_get(lfg) % 65535 - 32767; - in1[i] = in[i]; - } - fft(mdct, in, 7); - - /* do it by hand */ - for (k = 0; k < FN; k++) { - sum_re = 0; - sum_im = 0; - for (n = 0; n < FN; n++) { - a = -2 * M_PI * (n * k) / FN; - sum_re += in1[n].re * cos(a) - in1[n].im * sin(a); - sum_im += in1[n].re * sin(a) + in1[n].im * cos(a); - } - av_log(NULL, AV_LOG_DEBUG, "%3d: %6d,%6d %6.0f,%6.0f\n", - k, in[k].re, in[k].im, sum_re / FN, sum_im / FN); - } -} - - -static void mdct_test(AC3MDCTContext *mdct, AVLFG *lfg) -{ - int16_t input[MDCT_SAMPLES]; - int32_t output[AC3_MAX_COEFS]; - float input1[MDCT_SAMPLES]; - float output1[AC3_MAX_COEFS]; - float s, a, err, e, emax; - int i, k, n; - - for (i = 0; i < MDCT_SAMPLES; i++) { - input[i] = (av_lfg_get(lfg) % 65535 - 32767) * 9 / 10; - input1[i] = input[i]; - } - - mdct512(mdct, output, input); - - /* do it by hand */ - for (k = 0; k < AC3_MAX_COEFS; k++) { - s = 0; - for (n = 0; n < MDCT_SAMPLES; n++) { - a = (2*M_PI*(2*n+1+MDCT_SAMPLES/2)*(2*k+1) / (4 * MDCT_SAMPLES)); - s += input1[n] * cos(a); - } - output1[k] = -2 * s / MDCT_SAMPLES; - } - - err = 0; - emax = 0; - for (i = 0; i < AC3_MAX_COEFS; i++) { - av_log(NULL, AV_LOG_DEBUG, "%3d: %7d %7.0f\n", i, output[i], output1[i]); - e = output[i] - output1[i]; - if (e > emax) - emax = e; - err += e * e; - } - av_log(NULL, AV_LOG_DEBUG, "err2=%f emax=%f\n", err / AC3_MAX_COEFS, emax); -} - - -int main(void) -{ - AVLFG lfg; - AC3MDCTContext mdct; - - mdct.avctx = NULL; - av_log_set_level(AV_LOG_DEBUG); - mdct_init(&mdct, 9); - - fft_test(&mdct, &lfg); - mdct_test(&mdct, &lfg); - - return 0; -} -#endif /* TEST */ - - AVCodec ff_ac3_fixed_encoder = { "ac3_fixed", AVMEDIA_TYPE_AUDIO, diff --git a/libavcodec/ac3enc_fixed.h b/libavcodec/ac3enc_fixed.h deleted file mode 100644 index bad2306321..0000000000 --- a/libavcodec/ac3enc_fixed.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * The simplest AC-3 encoder - * Copyright (c) 2000 Fabrice Bellard - * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com> - * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file - * fixed-point AC-3 encoder header. - */ - -#ifndef AVCODEC_AC3ENC_FIXED_H -#define AVCODEC_AC3ENC_FIXED_H - -#include <stdint.h> - - -typedef int16_t SampleType; -typedef int32_t CoefType; -typedef int64_t CoefSumType; - -#define MAC_COEF(d,a,b) MAC64(d,a,b) - - -/** - * Compex number. - * Used in fixed-point MDCT calculation. - */ -typedef struct IComplex { - int16_t re,im; -} IComplex; - -typedef struct AC3MDCTContext { - const int16_t *window; ///< MDCT window function - int nbits; ///< log2(transform size) - int16_t *costab; ///< FFT cos table - int16_t *sintab; ///< FFT sin table - int16_t *xcos1; ///< MDCT cos table - int16_t *xsin1; ///< MDCT sin table - int16_t *rot_tmp; ///< temp buffer for pre-rotated samples - IComplex *cplx_tmp; ///< temp buffer for complex pre-rotated samples -} AC3MDCTContext; - -#endif /* AVCODEC_AC3ENC_FIXED_H */ diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index faed30da50..d89880535e 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -69,17 +69,6 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct, /** - * Calculate a 512-point MDCT - * @param out 256 output frequency coefficients - * @param in 512 windowed input audio samples - */ -static void mdct512(AC3MDCTContext *mdct, float *out, float *in) -{ - mdct->fft.mdct_calc(&mdct->fft, out, in); -} - - -/** * Apply KBD window to input samples prior to MDCT. */ static void apply_window(DSPContext *dsp, float *output, const float *input, diff --git a/libavcodec/ac3enc_float.h b/libavcodec/ac3enc_float.h deleted file mode 100644 index a4702bb51a..0000000000 --- a/libavcodec/ac3enc_float.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * The simplest AC-3 encoder - * Copyright (c) 2000 Fabrice Bellard - * Copyright (c) 2006-2010 Justin Ruggles <justin.ruggles@gmail.com> - * Copyright (c) 2006-2010 Prakash Punnoor <prakash@punnoor.de> - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/** - * @file - * floating-point AC-3 encoder header. - */ - -#ifndef AVCODEC_AC3ENC_FLOAT_H -#define AVCODEC_AC3ENC_FLOAT_H - -#include "fft.h" - - -typedef float SampleType; -typedef float CoefType; -typedef float CoefSumType; - -#define MAC_COEF(d,a,b) ((d)+=(a)*(b)) - - -typedef struct AC3MDCTContext { - const float *window; ///< MDCT window function - FFTContext fft; ///< FFT context for MDCT calculation -} AC3MDCTContext; - -#endif /* AVCODEC_AC3ENC_FLOAT_H */ diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 3b77a5548d..a5abfdd128 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -16,6 +16,7 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o OBJS += arm/dsputil_init_arm.o \ arm/dsputil_arm.o \ arm/fft_init_arm.o \ + arm/fft_fixed_init_arm.o \ arm/fmtconvert_init_arm.o \ arm/jrevdct_arm.o \ arm/mpegvideo_arm.o \ @@ -41,8 +42,10 @@ OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \ arm/mpegvideo_iwmmxt.o \ NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ + arm/fft_fixed_neon.o \ NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \ + arm/mdct_fixed_neon.o \ NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o \ diff --git a/libavcodec/arm/fft_fixed_init_arm.c b/libavcodec/arm/fft_fixed_init_arm.c new file mode 100644 index 0000000000..df71e7fe09 --- /dev/null +++ b/libavcodec/arm/fft_fixed_init_arm.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2009 Mans Rullgard <mans@mansr.com> + * + * This file is part of FFmpeg. + * + * FFMpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FFT_FLOAT 0 +#include "libavcodec/fft.h" + +void ff_fft_fixed_calc_neon(FFTContext *s, FFTComplex *z); +void ff_mdct_fixed_calc_neon(FFTContext *s, FFTSample *o, const FFTSample *i); +void ff_mdct_fixed_calcw_neon(FFTContext *s, FFTDouble *o, const FFTSample *i); + +av_cold void ff_fft_fixed_init_arm(FFTContext *s) +{ + if (HAVE_NEON) { + s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; + s->fft_calc = ff_fft_fixed_calc_neon; + +#if CONFIG_MDCT + if (!s->inverse && s->mdct_bits >= 5) { + s->mdct_permutation = FF_MDCT_PERM_INTERLEAVE; + s->mdct_calc = ff_mdct_fixed_calc_neon; + s->mdct_calcw = ff_mdct_fixed_calcw_neon; + } +#endif + } +} diff --git a/libavcodec/arm/fft_fixed_neon.S b/libavcodec/arm/fft_fixed_neon.S new file mode 100644 index 0000000000..14884d3736 --- /dev/null +++ b/libavcodec/arm/fft_fixed_neon.S @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +.macro bflies d0, d1, r0, r1 + vrev64.32 \r0, \d1 @ t5, t6, t1, t2 + vhsub.s16 \r1, \d1, \r0 @ t1-t5, t2-t6, t5-t1, t6-t2 + vhadd.s16 \r0, \d1, \r0 @ t1+t5, t2+t6, t5+t1, t6+t2 + vext.16 \r1, \r1, \r1, #1 @ t2-t6, t5-t1, t6-t2, t1-t5 + vtrn.32 \r0, \r1 @ t1+t5, t2+t6, t2-t6, t5-t1 + @ t5, t6, t4, t3 + vhsub.s16 \d1, \d0, \r0 + vhadd.s16 \d0, \d0, \r0 +.endm + +.macro transform01 q0, q1, d3, c0, c1, r0, w0, w1 + vrev32.16 \r0, \d3 + vmull.s16 \w0, \d3, \c0 + vmlal.s16 \w0, \r0, \c1 + vshrn.s32 \d3, \w0, #15 + bflies \q0, \q1, \w0, \w1 +.endm + +.macro transform2 d0, d1, d2, d3, q0, q1, c0, c1, c2, c3, \ + r0, r1, w0, w1 + vrev32.16 \r0, \d1 + vrev32.16 \r1, \d3 + vmull.s16 \w0, \d1, \c0 + vmlal.s16 \w0, \r0, \c1 + vmull.s16 \w1, \d3, \c2 + vmlal.s16 \w1, \r1, \c3 + vshrn.s32 \d1, \w0, #15 + vshrn.s32 \d3, \w1, #15 + bflies \q0, \q1, \w0, \w1 +.endm + +.macro fft4 d0, d1, r0, r1 + vhsub.s16 \r0, \d0, \d1 @ t3, t4, t8, t7 + vhsub.s16 \r1, \d1, \d0 + vhadd.s16 \d0, \d0, \d1 @ t1, t2, t6, t5 + vmov.i64 \d1, #0xffff<<32 + vbit \r0, \r1, \d1 + vrev64.16 \r1, \r0 @ t7, t8, t4, t3 + vtrn.32 \r0, \r1 @ t3, t4, t7, t8 + vtrn.32 \d0, \r0 @ t1, t2, t3, t4, t6, t5, t8, t7 + vhsub.s16 \d1, \d0, \r0 @ r2, i2, r3, i1 + vhadd.s16 \d0, \d0, \r0 @ r0, i0, r1, i3 +.endm + +.macro fft8 d0, d1, d2, d3, q0, q1, c0, c1, r0, r1, w0, w1 + fft4 \d0, \d1, \r0, \r1 + vtrn.32 \d0, \d1 @ z0, z2, z1, z3 + vhadd.s16 \r0, \d2, \d3 @ t1, t2, t3, t4 + vhsub.s16 \d3, \d2, \d3 @ z5, z7 + vmov \d2, \r0 + transform01 \q0, \q1, \d3, \c0, \c1, \r0, \w0, \w1 +.endm + +function fft4_neon + vld1.16 {d0-d1}, [r0,:128] + fft4 d0, d1, d2, d3 + vst1.16 {d0-d1}, [r0,:128] + bx lr +endfunc + +function fft8_neon + vld1.16 {d0-d3}, [r0,:128] + movrel r1, coefs + vld1.16 {d30}, [r1,:64] + vdup.16 d31, d30[0] + fft8 d0, d1, d2, d3, q0, q1, d31, d30, d20, d21, q8, q9 + vtrn.32 d0, d1 + vtrn.32 d2, d3 + vst1.16 {d0-d3}, [r0,:128] + bx lr +endfunc + +function fft16_neon + vld1.16 {d0-d3}, [r0,:128]! + vld1.16 {d4-d7}, [r0,:128] + movrel r1, coefs + sub r0, r0, #32 + vld1.16 {d28-d31},[r1,:128] + vdup.16 d31, d28[0] + fft8 d0, d1, d2, d3, q0, q1, d31, d28, d20, d21, q8, q9 + vswp d5, d6 + fft4 q2, q3, q8, q9 + vswp d5, d6 + vtrn.32 q0, q1 @ z0, z4, z2, z6, z1, z5, z3, z7 + vtrn.32 q2, q3 @ z8, z12,z10,z14,z9, z13,z11,z15 + vswp d1, d2 + vdup.16 d31, d28[0] + transform01 q0, q2, d5, d31, d28, d20, q8, q9 + vdup.16 d26, d29[0] + vdup.16 d27, d30[0] + transform2 d2, d6, d3, d7, q1, q3, d26, d30, d27, d29, \ + d20, d21, q8, q9 + vtrn.32 q0, q1 + vtrn.32 q2, q3 + vst1.16 {d0-d3}, [r0,:128]! + vst1.16 {d4-d7}, [r0,:128] + bx lr +endfunc + +function fft_pass_neon + push {r4,lr} + movrel lr, coefs + 24 + vld1.16 {d30}, [lr,:64] + lsl r12, r2, #3 + vmov d31, d30 + add r3, r1, r2, lsl #2 + mov lr, #-8 + sub r3, r3, #2 + mov r4, r0 + vld1.16 {d27[]}, [r3,:16] + sub r3, r3, #6 + vld1.16 {q0}, [r4,:128], r12 + vld1.16 {q1}, [r4,:128], r12 + vld1.16 {q2}, [r4,:128], r12 + vld1.16 {q3}, [r4,:128], r12 + vld1.16 {d28}, [r1,:64]! + vld1.16 {d29}, [r3,:64], lr + vswp d1, d2 + vswp d5, d6 + vtrn.32 d0, d1 + vtrn.32 d4, d5 + vdup.16 d25, d28[1] + vmul.s16 d27, d27, d31 + transform01 q0, q2, d5, d25, d27, d20, q8, q9 + b 2f +1: + mov r4, r0 + vdup.16 d26, d29[0] + vld1.16 {q0}, [r4,:128], r12 + vld1.16 {q1}, [r4,:128], r12 + vld1.16 {q2}, [r4,:128], r12 + vld1.16 {q3}, [r4,:128], r12 + vld1.16 {d28}, [r1,:64]! + vld1.16 {d29}, [r3,:64], lr + vswp d1, d2 + vswp d5, d6 + vtrn.32 d0, d1 + vtrn.32 d4, d5 + vdup.16 d24, d28[0] + vdup.16 d25, d28[1] + vdup.16 d27, d29[3] + vmul.s16 q13, q13, q15 + transform2 d0, d4, d1, d5, q0, q2, d24, d26, d25, d27, \ + d16, d17, q9, q10 +2: + vtrn.32 d2, d3 + vtrn.32 d6, d7 + vdup.16 d24, d28[2] + vdup.16 d26, d29[2] + vdup.16 d25, d28[3] + vdup.16 d27, d29[1] + vmul.s16 q13, q13, q15 + transform2 d2, d6, d3, d7, q1, q3, d24, d26, d25, d27, \ + d16, d17, q9, q10 + vtrn.32 d0, d1 + vtrn.32 d2, d3 + vtrn.32 d4, d5 + vtrn.32 d6, d7 + vswp d1, d2 + vswp d5, d6 + mov r4, r0 + vst1.16 {q0}, [r4,:128], r12 + vst1.16 {q1}, [r4,:128], r12 + vst1.16 {q2}, [r4,:128], r12 + vst1.16 {q3}, [r4,:128], r12 + add r0, r0, #16 + subs r2, r2, #2 + bgt 1b + pop {r4,pc} +endfunc + +#define F_SQRT1_2 23170 +#define F_COS_16_1 30274 +#define F_COS_16_3 12540 + +const coefs, align=4 + .short F_SQRT1_2, -F_SQRT1_2, -F_SQRT1_2, F_SQRT1_2 + .short F_COS_16_1,-F_COS_16_1,-F_COS_16_1, F_COS_16_1 + .short F_COS_16_3,-F_COS_16_3,-F_COS_16_3, F_COS_16_3 + .short 1, -1, -1, 1 +endconst + +.macro def_fft n, n2, n4 +function fft\n\()_neon + push {r4, lr} + mov r4, r0 + bl fft\n2\()_neon + add r0, r4, #\n4*2*4 + bl fft\n4\()_neon + add r0, r4, #\n4*3*4 + bl fft\n4\()_neon + mov r0, r4 + pop {r4, lr} + movrel r1, X(ff_cos_\n\()_fixed) + mov r2, #\n4/2 + b fft_pass_neon +endfunc +.endm + + def_fft 32, 16, 8 + def_fft 64, 32, 16 + def_fft 128, 64, 32 + def_fft 256, 128, 64 + def_fft 512, 256, 128 + def_fft 1024, 512, 256 + def_fft 2048, 1024, 512 + def_fft 4096, 2048, 1024 + def_fft 8192, 4096, 2048 + def_fft 16384, 8192, 4096 + def_fft 32768, 16384, 8192 + def_fft 65536, 32768, 16384 + +function ff_fft_fixed_calc_neon, export=1 + ldr r2, [r0] + sub r2, r2, #2 + movrel r3, fft_fixed_tab_neon + ldr r3, [r3, r2, lsl #2] + mov r0, r1 + bx r3 +endfunc + +const fft_fixed_tab_neon + .word fft4_neon + .word fft8_neon + .word fft16_neon + .word fft32_neon + .word fft64_neon + .word fft128_neon + .word fft256_neon + .word fft512_neon + .word fft1024_neon + .word fft2048_neon + .word fft4096_neon + .word fft8192_neon + .word fft16384_neon + .word fft32768_neon + .word fft65536_neon +endconst diff --git a/libavcodec/arm/mdct_fixed_neon.S b/libavcodec/arm/mdct_fixed_neon.S new file mode 100644 index 0000000000..d219216a20 --- /dev/null +++ b/libavcodec/arm/mdct_fixed_neon.S @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + + preserve8 + +.macro prerot dst, rt + lsr r3, r6, #2 @ n4 + add \rt, r4, r6, lsr #1 @ revtab + n4 + add r9, r3, r3, lsl #1 @ n3 + add r8, r7, r6 @ tcos + n4 + add r3, r2, r6, lsr #1 @ in + n4 + add r9, r2, r9, lsl #1 @ in + n3 + sub r8, r8, #16 + sub r10, r3, #16 + sub r11, r9, #16 + mov r12, #-16 +1: + vld2.16 {d0,d1}, [r9, :128]! + vld2.16 {d2,d3}, [r11,:128], r12 + vld2.16 {d4,d5}, [r3, :128]! + vld2.16 {d6,d7}, [r10,:128], r12 + vld2.16 {d16,d17},[r7, :128]! @ cos, sin + vld2.16 {d18,d19},[r8, :128], r12 + vrev64.16 q1, q1 + vrev64.16 q3, q3 + vrev64.16 q9, q9 + vneg.s16 d0, d0 + vneg.s16 d2, d2 + vneg.s16 d16, d16 + vneg.s16 d18, d18 + vhsub.s16 d0, d0, d3 @ re + vhsub.s16 d4, d7, d4 @ im + vhsub.s16 d6, d6, d5 + vhsub.s16 d2, d2, d1 + vmull.s16 q10, d0, d16 + vmlsl.s16 q10, d4, d17 + vmull.s16 q11, d0, d17 + vmlal.s16 q11, d4, d16 + vmull.s16 q12, d6, d18 + vmlsl.s16 q12, d2, d19 + vmull.s16 q13, d6, d19 + vmlal.s16 q13, d2, d18 + vshrn.s32 d0, q10, #15 + vshrn.s32 d1, q11, #15 + vshrn.s32 d2, q12, #15 + vshrn.s32 d3, q13, #15 + vzip.16 d0, d1 + vzip.16 d2, d3 + ldrh lr, [r4], #2 + ldrh r2, [\rt, #-2]! + add lr, \dst, lr, lsl #2 + add r2, \dst, r2, lsl #2 + vst1.32 {d0[0]}, [lr,:32] + vst1.32 {d2[0]}, [r2,:32] + ldrh lr, [r4], #2 + ldrh r2, [\rt, #-2]! + add lr, \dst, lr, lsl #2 + add r2, \dst, r2, lsl #2 + vst1.32 {d0[1]}, [lr,:32] + vst1.32 {d2[1]}, [r2,:32] + ldrh lr, [r4], #2 + ldrh r2, [\rt, #-2]! + add lr, \dst, lr, lsl #2 + add r2, \dst, r2, lsl #2 + vst1.32 {d1[0]}, [lr,:32] + vst1.32 {d3[0]}, [r2,:32] + ldrh lr, [r4], #2 + ldrh r2, [\rt, #-2]! + add lr, \dst, lr, lsl #2 + add r2, \dst, r2, lsl #2 + vst1.32 {d1[1]}, [lr,:32] + vst1.32 {d3[1]}, [r2,:32] + subs r6, r6, #32 + bgt 1b +.endm + +function ff_mdct_fixed_calc_neon, export=1 + push {r1,r4-r11,lr} + + ldr r4, [r0, #8] @ revtab + ldr r6, [r0, #16] @ mdct_size; n + ldr r7, [r0, #24] @ tcos + + prerot r1, r5 + + mov r4, r0 + bl X(ff_fft_fixed_calc_neon) + + pop {r5} + mov r12, #-16 + ldr r6, [r4, #16] @ mdct_size; n + ldr r7, [r4, #24] @ tcos + add r5, r5, r6, lsr #1 + add r7, r7, r6, lsr #1 + sub r1, r5, #16 + sub r2, r7, #16 +1: + vld2.16 {d4,d5}, [r7,:128]! + vld2.16 {d6,d7}, [r2,:128], r12 + vld2.16 {d0,d1}, [r5,:128] + vld2.16 {d2,d3}, [r1,:128] + vrev64.16 q3, q3 + vrev64.16 q1, q1 + vneg.s16 q3, q3 + vneg.s16 q2, q2 + vmull.s16 q11, d2, d6 + vmlal.s16 q11, d3, d7 + vmull.s16 q8, d0, d5 + vmlsl.s16 q8, d1, d4 + vmull.s16 q9, d0, d4 + vmlal.s16 q9, d1, d5 + vmull.s16 q10, d2, d7 + vmlsl.s16 q10, d3, d6 + vshrn.s32 d0, q11, #15 + vshrn.s32 d1, q8, #15 + vshrn.s32 d2, q9, #15 + vshrn.s32 d3, q10, #15 + vrev64.16 q0, q0 + vst2.16 {d2,d3}, [r5,:128]! + vst2.16 {d0,d1}, [r1,:128], r12 + subs r6, r6, #32 + bgt 1b + + pop {r4-r11,pc} +endfunc + +function ff_mdct_fixed_calcw_neon, export=1 + push {r1,r4-r11,lr} + + ldrd r4, r5, [r0, #8] @ revtab, tmp_buf + ldr r6, [r0, #16] @ mdct_size; n + ldr r7, [r0, #24] @ tcos + + prerot r5, r1 + + mov r4, r0 + mov r1, r5 + bl X(ff_fft_fixed_calc_neon) + + pop {r7} + mov r12, #-16 + ldr r6, [r4, #16] @ mdct_size; n + ldr r9, [r4, #24] @ tcos + add r5, r5, r6, lsr #1 + add r7, r7, r6 + add r9, r9, r6, lsr #1 + sub r3, r5, #16 + sub r1, r7, #16 + sub r2, r9, #16 +1: + vld2.16 {d4,d5}, [r9,:128]! + vld2.16 {d6,d7}, [r2,:128], r12 + vld2.16 {d0,d1}, [r5,:128]! + vld2.16 {d2,d3}, [r3,:128], r12 + vrev64.16 q3, q3 + vrev64.16 q1, q1 + vneg.s16 q3, q3 + vneg.s16 q2, q2 + vmull.s16 q8, d2, d6 + vmlal.s16 q8, d3, d7 + vmull.s16 q9, d0, d5 + vmlsl.s16 q9, d1, d4 + vmull.s16 q10, d0, d4 + vmlal.s16 q10, d1, d5 + vmull.s16 q11, d2, d7 + vmlsl.s16 q11, d3, d6 + vrev64.32 q8, q8 + vrev64.32 q9, q9 + vst2.32 {q10,q11},[r7,:128]! + vst2.32 {d16,d18},[r1,:128], r12 + vst2.32 {d17,d19},[r1,:128], r12 + subs r6, r6, #32 + bgt 1b + + pop {r4-r11,pc} +endfunc diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 4659719a42..c6fb2efaf4 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -1829,19 +1829,21 @@ typedef struct AVCodecContext { */ uint64_t error[4]; +#if FF_API_MB_Q /** * minimum MB quantizer * - encoding: unused * - decoding: unused */ - int mb_qmin; + attribute_deprecated int mb_qmin; /** * maximum MB quantizer * - encoding: unused * - decoding: unused */ - int mb_qmax; + attribute_deprecated int mb_qmax; +#endif /** * motion estimation comparison function @@ -2162,16 +2164,19 @@ typedef struct AVCodecContext { */ int error_rate; +#if FF_API_ANTIALIAS_ALGO /** * MP3 antialias algorithm, see FF_AA_* below. * - encoding: unused * - decoding: Set by user. */ - int antialias_algo; + attribute_deprecated int antialias_algo; #define FF_AA_AUTO 0 #define FF_AA_FASTINT 1 //not implemented yet #define FF_AA_INT 2 #define FF_AA_FLOAT 3 +#endif + /** * quantizer noise shaping * - encoding: Set by user. diff --git a/libavcodec/fft.c b/libavcodec/fft.c index c8f74a2d89..58484e047b 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -126,6 +126,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; #else if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; + if (ARCH_ARM) ff_fft_fixed_init_arm(s); #endif for(j=4; j<=nbits; j++) { diff --git a/libavcodec/fft.h b/libavcodec/fft.h index f3f0f18289..8280a361cc 100644 --- a/libavcodec/fft.h +++ b/libavcodec/fft.h @@ -132,9 +132,13 @@ void ff_init_ff_cos_tabs(int index); */ int ff_fft_init(FFTContext *s, int nbits, int inverse); +#if CONFIG_FFT_FLOAT void ff_fft_init_altivec(FFTContext *s); void ff_fft_init_mmx(FFTContext *s); void ff_fft_init_arm(FFTContext *s); +#else +void ff_fft_fixed_init_arm(FFTContext *s); +#endif void ff_fft_end(FFTContext *s); diff --git a/libavcodec/options.c b/libavcodec/options.c index a5fa5eb606..f6010bc638 100644 --- a/libavcodec/options.c +++ b/libavcodec/options.c @@ -259,8 +259,10 @@ static const AVOption options[]={ {"pf", "forward predicted MVs of P-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_P_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bf", "forward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_FOR, INT_MIN, INT_MAX, V|D, "debug_mv"}, {"bb", "backward predicted MVs of B-frames", 0, FF_OPT_TYPE_CONST, FF_DEBUG_VIS_MV_B_BACK, INT_MIN, INT_MAX, V|D, "debug_mv"}, +#if FF_API_MB_Q {"mb_qmin", "obsolete, use qmin", OFFSET(mb_qmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"mb_qmax", "obsolete, use qmax", OFFSET(mb_qmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +#endif {"cmp", "full pel me compare function", OFFSET(me_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"subcmp", "sub pel me compare function", OFFSET(me_sub_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, {"mbcmp", "macroblock compare function", OFFSET(mb_cmp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "cmp_func"}, @@ -317,7 +319,9 @@ static const AVOption options[]={ {"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, CODEC_FLAG2_FASTPSKIP|CODEC_FLAG2_BIT_RESERVOIR|CODEC_FLAG2_PSY|CODEC_FLAG2_MBTREE, 0, UINT_MAX, V|A|E|D, "flags2"}, {"error", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, +#if FF_API_ANTIALIAS_ALGO {"antialias", "MP3 antialias algorithm", OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"}, +#endif {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"}, {"fastint", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_FASTINT, INT_MIN, INT_MAX, V|D, "aa"}, {"int", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_INT, INT_MIN, INT_MAX, V|D, "aa"}, diff --git a/libavcodec/version.h b/libavcodec/version.h index 9d211b90fe..b93e7534bb 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -77,5 +77,11 @@ #ifndef FF_API_RATE_EMU #define FF_API_RATE_EMU (LIBAVCODEC_VERSION_MAJOR < 53) #endif +#ifndef FF_API_MB_Q +#define FF_API_MB_Q (LIBAVCODEC_VERSION_MAJOR < 53) +#endif +#ifndef FF_API_ANTIALIAS_ALGO +#define FF_API_ANTIALIAS_ALGO (LIBAVCODEC_VERSION_MAJOR < 54) +#endif #endif /* AVCODEC_VERSION_H */ |