diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-04-02 01:51:44 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-04-02 01:54:27 +0200 |
commit | 4defa68fe25eae4d7c27341e3b35811c047dcd3f (patch) | |
tree | 9d4d37343ec4c99801c1b76b813b0a5b2b04576b /libavcodec | |
parent | a2f5e14a867768019b49b830e29801f1bfb2abb7 (diff) | |
parent | aa05f2126e18d23432bde77e6f44e41691472fef (diff) | |
download | ffmpeg-4defa68fe25eae4d7c27341e3b35811c047dcd3f.tar.gz |
Merge remote branch 'qatar/master'
* qatar/master:
ac3enc: ARM optimised ac3_compute_matissa_size
ac3: armv6 optimised bit_alloc_calc_bap
fate: simplify fft test rules
avio: document avio_alloc_context.
lavf: make compute_chapters_end less picky.
sierravmd: fix Indeo3 videos
FFT: simplify fft8()
fate: add fixed-point fft/mdct tests
Fixed-point support in fft-test
ape: check that number of seektable entries is equal to number of frames
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/Makefile | 2 | ||||
-rw-r--r-- | libavcodec/arm/Makefile | 6 | ||||
-rw-r--r-- | libavcodec/arm/ac3dsp_arm.S | 52 | ||||
-rw-r--r-- | libavcodec/arm/ac3dsp_armv6.S | 83 | ||||
-rw-r--r-- | libavcodec/arm/ac3dsp_init_arm.c | 13 | ||||
-rw-r--r-- | libavcodec/fft-fixed-test.c | 20 | ||||
-rw-r--r-- | libavcodec/fft-test.c | 73 | ||||
-rw-r--r-- | libavcodec/fft.c | 13 |
8 files changed, 229 insertions, 33 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 7f771be2b1..0730124870 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -661,7 +661,7 @@ SKIPHEADERS += mpegaudio3.h EXAMPLES = api -TESTPROGS = cabac dct eval fft h264 iirfilter rangecoder snow +TESTPROGS = cabac dct eval fft fft-fixed h264 iirfilter rangecoder snow TESTPROGS-$(HAVE_MMX) += motion TESTOBJS = dctref.o diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 08697da29b..3b77a5548d 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -1,6 +1,10 @@ -OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o +OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ + arm/ac3dsp_arm.o + OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ +ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o + OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o OBJS-$(CONFIG_VP8_DECODER) += arm/vp8dsp_init_arm.o diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S new file mode 100644 index 0000000000..d7d498e41f --- /dev/null +++ b/libavcodec/arm/ac3dsp_arm.S @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +function ff_ac3_compute_mantissa_size_arm, export=1 + push {r4-r8,lr} + ldm r0, {r4-r8} + mov r3, r0 + mov r0, #0 +1: + ldrb lr, [r1], #1 + subs r2, r2, #1 + blt 2f + cmp lr, #4 + bgt 3f + subs lr, lr, #1 + addlt r4, r4, #1 + addeq r5, r5, #1 + ble 1b + subs lr, lr, #2 + addlt r6, r6, #1 + addeq r7, r7, #1 + addgt r8, r8, #1 + b 1b +3: + cmp lr, #14 + sublt lr, lr, #1 + addgt r0, r0, #16 + addle r0, r0, lr + b 1b +2: + stm r3, {r4-r8} + pop {r4-r8,pc} +endfunc diff --git a/libavcodec/arm/ac3dsp_armv6.S b/libavcodec/arm/ac3dsp_armv6.S new file mode 100644 index 0000000000..7f01addbde --- /dev/null +++ b/libavcodec/arm/ac3dsp_armv6.S @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2011 Mans Rullgard <mans@mansr.com> + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "asm.S" + +function ff_ac3_bit_alloc_calc_bap_armv6, export=1 + ldr r12, [sp] + cmp r12, #-960 + beq 4f + push {r4-r11,lr} + add r5, sp, #40 + movrel r4, X(ff_ac3_bin_to_band_tab) + movrel lr, X(ff_ac3_band_start_tab) + ldm r5, {r5-r7} + ldrb r4, [r4, r2] + add r1, r1, r2, lsl #1 @ psd + start + add r0, r0, r4, lsl #1 @ mask + band + add r4, lr, r4 + add r7, r7, r2 @ bap + start + ldrb r10, [r4], #1 +1: + ldrsh r9, [r0], #2 @ mask[band] + movw r8, #0x1fe0 + sub r9, r9, r12 @ - snr_offset + mov r11, r10 + ldrb r10, [r4], #1 @ band_start_tab[band++] + subs r9, r9, r5 @ - floor + movlt r9, #0 + cmp r10, r3 @ - end + and r9, r9, r8 @ & 0x1fe0 + subgt r8, r3, r11 + suble r8, r10, r11 + add r9, r9, r5 @ + floor => m + tst r8, #1 + add r2, r7, r8 + bne 3f + b 5f +2: + ldrsh r8, [r1], #2 + ldrsh lr, [r1], #2 + sub r8, r8, r9 + sub lr, lr, r9 + usat r8, #6, r8, asr #5 @ address + usat lr, #6, lr, asr #5 + ldrb r8, [r6, r8] @ bap_tab[address] + ldrb lr, [r6, lr] + strb r8, [r7], #1 @ bap[bin] + strb lr, [r7], #1 +5: cmp r7, r2 + blo 2b + cmp r3, r11 + bgt 1b + pop {r4-r11,pc} +3: + ldrsh r8, [r1], #2 @ psd[bin] + sub r8, r8, r9 @ - m + usat r8, #6, r8, asr #5 @ address + ldrb r8, [r6, r8] @ bap_tab[address] + strb r8, [r7], #1 @ bap[bin] + b 5b +4: + ldr r0, [sp, #12] + mov r1, #0 + mov r2, #256 + b memset +endfunc diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c index 0dfeaa9c80..8534c9b97c 100644 --- a/libavcodec/arm/ac3dsp_init_arm.c +++ b/libavcodec/arm/ac3dsp_init_arm.c @@ -29,8 +29,21 @@ void ff_ac3_lshift_int16_neon(int16_t *src, unsigned len, unsigned shift); void ff_ac3_rshift_int32_neon(int32_t *src, unsigned len, unsigned shift); void ff_float_to_fixed24_neon(int32_t *dst, const float *src, unsigned int len); +void ff_ac3_bit_alloc_calc_bap_armv6(int16_t *mask, int16_t *psd, + int start, int end, + int snr_offset, int floor, + const uint8_t *bap_tab, uint8_t *bap); + +int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs); + av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact) { + c->compute_mantissa_size = ff_ac3_compute_mantissa_size_arm; + + if (HAVE_ARMV6) { + c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6; + } + if (HAVE_NEON) { c->ac3_exponent_min = ff_ac3_exponent_min_neon; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon; diff --git a/libavcodec/fft-fixed-test.c b/libavcodec/fft-fixed-test.c new file mode 100644 index 0000000000..fa750b6326 --- /dev/null +++ b/libavcodec/fft-fixed-test.c @@ -0,0 +1,20 @@ +/* + * This file is part of FFmpeg. + * + * FFMpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFMpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define CONFIG_FFT_FLOAT 0 +#include "fft-test.c" diff --git a/libavcodec/fft-test.c b/libavcodec/fft-test.c index 9ad8793c2b..a676627de2 100644 --- a/libavcodec/fft-test.c +++ b/libavcodec/fft-test.c @@ -27,8 +27,10 @@ #include "libavutil/lfg.h" #include "libavutil/log.h" #include "fft.h" +#if CONFIG_FFT_FLOAT #include "dct.h" #include "rdft.h" +#endif #include <math.h> #include <unistd.h> #include <sys/time.h> @@ -47,7 +49,19 @@ pim += (MUL16(are, bim) + MUL16(bre, aim));\ } -FFTComplex *exptab; +#if CONFIG_FFT_FLOAT +# define RANGE 1.0 +# define REF_SCALE(x, bits) (x) +# define FMT "%10.6f" +#else +# define RANGE 16384 +# define REF_SCALE(x, bits) ((x) / (1<<(bits))) +# define FMT "%6d" +#endif + +struct { + float re, im; +} *exptab; static void fft_ref_init(int nbits, int inverse) { @@ -55,7 +69,7 @@ static void fft_ref_init(int nbits, int inverse) double c1, s1, alpha; n = 1 << nbits; - exptab = av_malloc((n / 2) * sizeof(FFTComplex)); + exptab = av_malloc((n / 2) * sizeof(*exptab)); for (i = 0; i < (n/2); i++) { alpha = 2 * M_PI * (float)i / (float)n; @@ -92,12 +106,12 @@ static void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits) CMAC(tmp_re, tmp_im, c, s, q->re, q->im); q++; } - tabr[i].re = tmp_re; - tabr[i].im = tmp_im; + tabr[i].re = REF_SCALE(tmp_re, nbits); + tabr[i].im = REF_SCALE(tmp_im, nbits); } } -static void imdct_ref(float *out, float *in, int nbits) +static void imdct_ref(FFTSample *out, FFTSample *in, int nbits) { int n = 1<<nbits; int k, i, a; @@ -110,12 +124,12 @@ static void imdct_ref(float *out, float *in, int nbits) f = cos(M_PI * a / (double)(2 * n)); sum += f * in[k]; } - out[i] = -sum; + out[i] = REF_SCALE(-sum, nbits - 2); } } /* NOTE: no normalisation by 1 / N is done */ -static void mdct_ref(float *output, float *input, int nbits) +static void mdct_ref(FFTSample *output, FFTSample *input, int nbits) { int n = 1<<nbits; int k, i; @@ -128,10 +142,11 @@ static void mdct_ref(float *output, float *input, int nbits) a = (2*M_PI*(2*i+1+n/2)*(2*k+1) / (4 * n)); s += input[i] * cos(a); } - output[k] = s; + output[k] = REF_SCALE(s, nbits - 1); } } +#if CONFIG_FFT_FLOAT static void idct_ref(float *output, float *input, int nbits) { int n = 1<<nbits; @@ -164,11 +179,12 @@ static void dct_ref(float *output, float *input, int nbits) output[k] = s; } } +#endif -static float frandom(AVLFG *prng) +static FFTSample frandom(AVLFG *prng) { - return (int16_t)av_lfg_get(prng) / 32768.0; + return (int16_t)av_lfg_get(prng) / 32768.0 * RANGE; } static int64_t gettime(void) @@ -178,7 +194,7 @@ static int64_t gettime(void) return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; } -static int check_diff(float *tab1, float *tab2, int n, double scale) +static int check_diff(FFTSample *tab1, FFTSample *tab2, int n, double scale) { int i; double max= 0; @@ -186,9 +202,9 @@ static int check_diff(float *tab1, float *tab2, int n, double scale) int err = 0; for (i = 0; i < n; i++) { - double e= fabsf(tab1[i] - (tab2[i] / scale)); + double e = fabsf(tab1[i] - (tab2[i] / scale)) / RANGE; if (e >= 1e-3) { - av_log(NULL, AV_LOG_ERROR, "ERROR %5d: %10.6f %10.6f\n", + av_log(NULL, AV_LOG_ERROR, "ERROR %5d: "FMT" "FMT"\n", i, tab1[i], tab2[i]); err = 1; } @@ -233,8 +249,10 @@ int main(int argc, char **argv) int do_inverse = 0; FFTContext s1, *s = &s1; FFTContext m1, *m = &m1; +#if CONFIG_FFT_FLOAT RDFTContext r1, *r = &r1; DCTContext d1, *d = &d1; +#endif int fft_nbits, fft_size, fft_size_2; double scale = 1.0; AVLFG prng; @@ -297,6 +315,7 @@ int main(int argc, char **argv) ff_fft_init(s, fft_nbits, do_inverse); fft_ref_init(fft_nbits, do_inverse); break; +#if CONFIG_FFT_FLOAT case TRANSFORM_RDFT: if (do_inverse) av_log(NULL, AV_LOG_INFO,"IDFT_C2R"); @@ -312,6 +331,10 @@ int main(int argc, char **argv) av_log(NULL, AV_LOG_INFO,"DCT_II"); ff_dct_init(d, fft_nbits, do_inverse ? DCT_III : DCT_II); break; +#endif + default: + av_log(NULL, AV_LOG_ERROR, "Requested transform not supported\n"); + return 1; } av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); @@ -328,15 +351,15 @@ int main(int argc, char **argv) switch (transform) { case TRANSFORM_MDCT: if (do_inverse) { - imdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); - m->imdct_calc(m, tab2, (float *)tab1); - err = check_diff((float *)tab_ref, tab2, fft_size, scale); + imdct_ref((FFTSample *)tab_ref, (FFTSample *)tab1, fft_nbits); + m->imdct_calc(m, tab2, (FFTSample *)tab1); + err = check_diff((FFTSample *)tab_ref, tab2, fft_size, scale); } else { - mdct_ref((float *)tab_ref, (float *)tab1, fft_nbits); + mdct_ref((FFTSample *)tab_ref, (FFTSample *)tab1, fft_nbits); - m->mdct_calc(m, tab2, (float *)tab1); + m->mdct_calc(m, tab2, (FFTSample *)tab1); - err = check_diff((float *)tab_ref, tab2, fft_size / 2, scale); + err = check_diff((FFTSample *)tab_ref, tab2, fft_size / 2, scale); } break; case TRANSFORM_FFT: @@ -345,8 +368,9 @@ int main(int argc, char **argv) s->fft_calc(s, tab); fft_ref(tab_ref, tab1, fft_nbits); - err = check_diff((float *)tab_ref, (float *)tab, fft_size * 2, 1.0); + err = check_diff((FFTSample *)tab_ref, (FFTSample *)tab, fft_size * 2, 1.0); break; +#if CONFIG_FFT_FLOAT case TRANSFORM_RDFT: if (do_inverse) { tab1[ 0].im = 0; @@ -387,6 +411,7 @@ int main(int argc, char **argv) } err = check_diff((float *)tab_ref, (float *)tab, fft_size, 1.0); break; +#endif } /* do a speed test */ @@ -404,15 +429,16 @@ int main(int argc, char **argv) switch (transform) { case TRANSFORM_MDCT: if (do_inverse) { - m->imdct_calc(m, (float *)tab, (float *)tab1); + m->imdct_calc(m, (FFTSample *)tab, (FFTSample *)tab1); } else { - m->mdct_calc(m, (float *)tab, (float *)tab1); + m->mdct_calc(m, (FFTSample *)tab, (FFTSample *)tab1); } break; case TRANSFORM_FFT: memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); s->fft_calc(s, tab); break; +#if CONFIG_FFT_FLOAT case TRANSFORM_RDFT: memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); r->rdft_calc(r, tab2); @@ -421,6 +447,7 @@ int main(int argc, char **argv) memcpy(tab2, tab1, fft_size * sizeof(FFTSample)); d->dct_calc(d, tab2); break; +#endif } } duration = gettime() - time_start; @@ -441,12 +468,14 @@ int main(int argc, char **argv) case TRANSFORM_FFT: ff_fft_end(s); break; +#if CONFIG_FFT_FLOAT case TRANSFORM_RDFT: ff_rdft_end(r); break; case TRANSFORM_DCT: ff_dct_end(d); break; +#endif } av_free(tab); diff --git a/libavcodec/fft.c b/libavcodec/fft.c index 6f08662ddc..d12d9f7f99 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -246,21 +246,16 @@ static void fft4(FFTComplex *z) static void fft8(FFTComplex *z) { - FFTDouble t1, t2, t3, t4, t5, t6, t7, t8; + FFTDouble t1, t2, t3, t4, t5, t6; fft4(z); BF(t1, z[5].re, z[4].re, -z[5].re); BF(t2, z[5].im, z[4].im, -z[5].im); - BF(t3, z[7].re, z[6].re, -z[7].re); - BF(t4, z[7].im, z[6].im, -z[7].im); - BF(t8, t1, t3, t1); - BF(t7, t2, t2, t4); - BF(z[4].re, z[0].re, z[0].re, t1); - BF(z[4].im, z[0].im, z[0].im, t2); - BF(z[6].re, z[2].re, z[2].re, t7); - BF(z[6].im, z[2].im, z[2].im, t8); + BF(t5, z[7].re, z[6].re, -z[7].re); + BF(t6, z[7].im, z[6].im, -z[7].im); + BUTTERFLIES(z[0],z[2],z[4],z[6]); TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); } |