aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michael@niedermayer.cc>2016-03-04 15:39:55 +0100
committerMichael Niedermayer <michael@niedermayer.cc>2016-03-04 16:05:47 +0100
commit305344d89e21ed11c74274167cf597f151778c42 (patch)
tree56d6dd9315aa040e71a8b229f3215b5d9821acf1
parentae76b842213380758adf4828b8602ac57a7492e4 (diff)
downloadffmpeg-305344d89e21ed11c74274167cf597f151778c42.tar.gz
avcodec/fft: Add revtab32 for FFTs with more than 65536 samples
x86 optimizations are used only for the cases they support (<=65536 samples) Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
-rw-r--r--libavcodec/fft.h1
-rw-r--r--libavcodec/fft_template.c31
-rw-r--r--libavcodec/x86/fft_init.c3
3 files changed, 30 insertions, 5 deletions
diff --git a/libavcodec/fft.h b/libavcodec/fft.h
index 60df239416..c858570a21 100644
--- a/libavcodec/fft.h
+++ b/libavcodec/fft.h
@@ -110,6 +110,7 @@ struct FFTContext {
void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input);
enum fft_permutation_type fft_permutation;
enum mdct_permutation_type mdct_permutation;
+ uint32_t *revtab32;
};
#if CONFIG_HARDCODED_TABLES
diff --git a/libavcodec/fft_template.c b/libavcodec/fft_template.c
index 2781a332c6..480557f49f 100644
--- a/libavcodec/fft_template.c
+++ b/libavcodec/fft_template.c
@@ -143,14 +143,23 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
{
int i, j, n;
+ s->revtab = NULL;
+ s->revtab32 = NULL;
+
if (nbits < 2 || nbits > 17)
goto fail;
s->nbits = nbits;
n = 1 << nbits;
- s->revtab = av_malloc(n * sizeof(uint16_t));
- if (!s->revtab)
- goto fail;
+ if (nbits <= 16) {
+ s->revtab = av_malloc(n * sizeof(uint16_t));
+ if (!s->revtab)
+ goto fail;
+ } else {
+ s->revtab32 = av_malloc(n * sizeof(uint32_t));
+ if (!s->revtab32)
+ goto fail;
+ }
s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
if (!s->tmp_buf)
goto fail;
@@ -192,16 +201,22 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
fft_perm_avx(s);
} else {
for(i=0; i<n; i++) {
+ int k;
j = i;
if (s->fft_permutation == FF_FFT_PERM_SWAP_LSBS)
j = (j&~3) | ((j>>1)&1) | ((j<<1)&2);
- s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = j;
+ k = -split_radix_permutation(i, n, s->inverse) & (n-1);
+ if (s->revtab)
+ s->revtab[k] = j;
+ if (s->revtab32)
+ s->revtab32[k] = j;
}
}
return 0;
fail:
av_freep(&s->revtab);
+ av_freep(&s->revtab32);
av_freep(&s->tmp_buf);
return -1;
}
@@ -210,15 +225,21 @@ static void fft_permute_c(FFTContext *s, FFTComplex *z)
{
int j, np;
const uint16_t *revtab = s->revtab;
+ const uint32_t *revtab32 = s->revtab32;
np = 1 << s->nbits;
/* TODO: handle split-radix permute in a more optimal way, probably in-place */
- for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+ if (revtab) {
+ for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+ } else
+ for(j=0;j<np;j++) s->tmp_buf[revtab32[j]] = z[j];
+
memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
}
av_cold void ff_fft_end(FFTContext *s)
{
av_freep(&s->revtab);
+ av_freep(&s->revtab32);
av_freep(&s->tmp_buf);
}
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 5085f11380..337f32d6f7 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -26,6 +26,9 @@ av_cold void ff_fft_init_x86(FFTContext *s)
{
int cpu_flags = av_get_cpu_flags();
+ if (s->nbits > 16)
+ return;
+
#if ARCH_X86_32
if (EXTERNAL_AMD3DNOW(cpu_flags)) {
/* 3DNow! for K6-2/3 */