diff options
author | Måns Rullgård <mans@mansr.com> | 2009-09-14 21:37:41 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2009-09-14 21:37:41 +0000 |
commit | 9ecc414195f4ef931e9dcfb9e6017fb7d757f124 (patch) | |
tree | 516d62e6f8f781518e2b38997e015cf79168a5bd | |
parent | 89c4e176f6b22ddfadaea2ff698a7c479ede474e (diff) | |
download | ffmpeg-9ecc414195f4ef931e9dcfb9e6017fb7d757f124.tar.gz |
ARM: 10l: fix large FFTs
Originally committed as revision 19846 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/arm/fft_neon.S | 6 | ||||
-rw-r--r-- | libavcodec/arm/mdct_neon.S | 18 | ||||
-rw-r--r-- | libavcodec/fft.c | 5 |
3 files changed, 17 insertions, 12 deletions
diff --git a/libavcodec/arm/fft_neon.S b/libavcodec/arm/fft_neon.S index 6ed5789fb7..fcd835ab88 100644 --- a/libavcodec/arm/fft_neon.S +++ b/libavcodec/arm/fft_neon.S @@ -327,8 +327,10 @@ function ff_fft_permute_neon, export=1 1: vld1.32 {d0-d1}, [r1,:128]! ldr r4, [r0], #4 - uxtah lr, r3, r4 - uxtah r4, r3, r4, ror #16 + uxth lr, r4 + uxth r4, r4, ror #16 + add lr, r3, lr, lsl #3 + add r4, r3, r4, lsl #3 vst1.32 {d0}, [lr,:64] vst1.32 {d1}, [r4,:64] subs r12, r12, #2 diff --git a/libavcodec/arm/mdct_neon.S b/libavcodec/arm/mdct_neon.S index 26ac199457..5cd46476e8 100644 --- a/libavcodec/arm/mdct_neon.S +++ b/libavcodec/arm/mdct_neon.S @@ -52,8 +52,10 @@ function ff_imdct_half_neon, export=1 vmul.f32 d5, d17, d3 vsub.f32 d4, d6, d4 vadd.f32 d5, d5, d7 - uxtah r8, r1, r6, ror #16 - uxtah r6, r1, r6 + uxth r8, r6, ror #16 + uxth r6, r6 + add r8, r1, r8, lsl #3 + add r6, r1, r6, lsl #3 beq 1f vld2.32 {d16-d17},[r7,:128],r12 vld2.32 {d0-d1}, [r2,:128]! @@ -198,8 +200,10 @@ function ff_mdct_calc_neon, export=1 subs lr, lr, #16 vsub.f32 d6, d6, d7 @ -R*c-I*s vadd.f32 d7, d4, d5 @ -R*s+I*c - uxtah r10, r1, r6, ror #16 - uxtah r6, r1, r6 + uxth r10, r6, ror #16 + uxth r6, r6 + add r10, r1, r10, lsl #3 + add r6, r1, r6, lsl #3 beq 1f vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0 @@ -245,8 +249,10 @@ function ff_mdct_calc_neon, export=1 subs lr, lr, #16 vsub.f32 d6, d7, d6 @ I*s-R*c vadd.f32 d7, d4, d5 @ R*s-I*c - uxtah r10, r1, r6, ror #16 - uxtah r6, r1, r6 + uxth r10, r6, ror #16 + uxth r6, r6 + add r10, r1, r10, lsl #3 + add r6, r1, r6, lsl #3 beq 1f vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 diff --git a/libavcodec/fft.c b/libavcodec/fft.c index 93b4eb0434..538f66858b 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -64,7 +64,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) float alpha, c1, s1, s2; int split_radix = 1; int av_unused has_vectors; - int revtab_shift = 0; if (nbits < 2 || nbits > 16) goto fail; @@ -120,7 +119,6 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) s->imdct_calc = ff_imdct_calc_neon; s->imdct_half = ff_imdct_half_neon; s->mdct_calc = ff_mdct_calc_neon; - revtab_shift = 3; #endif if (split_radix) { @@ -134,8 +132,7 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) tab[m/2-i] = tab[i]; } for(i=0; i<n; i++) - s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = - i << revtab_shift; + s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); } else { int np, nblocks, np2, l; |