diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-04-27 03:51:04 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-04-27 03:51:04 +0200 |
commit | d7e5aebae7652ac766034f1d90e5a4f62677fb3c (patch) | |
tree | b77ee45f34455cf9aa6e28105a7533ecc204b898 /libavcodec/fft.c | |
parent | 93c28a55fd84280d97c3c0dd7b0d546043242c34 (diff) | |
parent | 79ee8977c25eee2408ef7b2822f377a983e4d65b (diff) | |
download | ffmpeg-d7e5aebae7652ac766034f1d90e5a4f62677fb3c.tar.gz |
Merge remote branch 'qatar/master'
* qatar/master: (23 commits)
ac3enc: correct the flipped sign in the ac3_fixed encoder
Eliminate pointless '#if 1' statements without matching '#else'.
Add AVX FFT implementation.
Increase alignment of av_malloc() as needed by AVX ASM.
Update x86inc.asm from x264 to allow AVX emulation using SSE and MMX.
mjpeg: Detect overreads in mjpeg_decode_scan() and error out.
documentation: extend documentation for ffmpeg -aspect option
APIChanges: update commit hashes for recent additions.
lavc: deprecate FF_*_TYPE macros in favor of AV_PICTURE_TYPE_* enums
aac: add headers needed for log2f()
lavc: remove FF_API_MB_Q cruft
lavc: remove FF_API_RATE_EMU cruft
lavc: remove FF_API_HURRY_UP cruft
pad: make the filter parametric
vsrc_movie: add key_frame and pict_type.
vsrc_movie: fix leak in request_frame()
lavfi: add key_frame and pict_type to AVFilterBufferRefVideo.
vsrc_buffer: add sample_aspect_ratio fields to arguments.
lavfi: add fieldorder filter
scale: make the filter parametric
...
Conflicts:
Changelog
doc/filters.texi
ffmpeg.c
libavcodec/ac3dec.h
libavcodec/dsputil.c
libavfilter/avfilter.h
libavfilter/vf_scale.c
libavfilter/vf_yadif.c
libavfilter/vsrc_buffer.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/fft.c')
-rw-r--r-- | libavcodec/fft.c | 53 |
1 files changed, 48 insertions, 5 deletions
diff --git a/libavcodec/fft.c b/libavcodec/fft.c index 58484e047b..4eb46f14c8 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -93,6 +93,44 @@ av_cold void ff_init_ff_cos_tabs(int index) #endif } +static const int avx_tab[] = { + 0, 4, 1, 5, 8, 12, 9, 13, 2, 6, 3, 7, 10, 14, 11, 15 +}; + +static int is_second_half_of_fft32(int i, int n) +{ + if (n <= 32) + return i >= 16; + else if (i < n/2) + return is_second_half_of_fft32(i, n/2); + else if (i < 3*n/4) + return is_second_half_of_fft32(i - n/2, n/4); + else + return is_second_half_of_fft32(i - 3*n/4, n/4); +} + +static av_cold void fft_perm_avx(FFTContext *s) +{ + int i; + int n = 1 << s->nbits; + + for (i = 0; i < n; i += 16) { + int k; + if (is_second_half_of_fft32(i, n)) { + for (k = 0; k < 16; k++) + s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = + i + avx_tab[k]; + + } else { + for (k = 0; k < 16; k++) { + int j = i + k; + j = (j & ~7) | ((j >> 1) & 3) | ((j << 2) & 4); + s->revtab[-split_radix_permutation(i + k, n, s->inverse) & (n - 1)] = j; + } + } + } +} + av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) { int i, j, n; @@ -132,11 +170,16 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) for(j=4; j<=nbits; j++) { ff_init_ff_cos_tabs(j); } - for(i=0; i<n; i++) { - int j = i; - if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) - j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); - s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; + + if (s->fft_permutation == FF_FFT_PERM_AVX) { + fft_perm_avx(s); + } else { + for(i=0; i<n; i++) { + int j = i; + if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS) + j = (j&~3) | ((j>>1)&1) | ((j<<1)&2); + s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j; + } } return 0; |