diff options
author | Måns Rullgård <mans@mansr.com> | 2010-04-12 11:14:55 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2010-04-12 11:14:55 +0000 |
commit | 766fefe8e275c693c5acd7b2d7555b2df6e7751a (patch) | |
tree | 5e48c2dd2cb9c67c8f377094c14d867a67ef8ed3 /libavcodec/dca.c | |
parent | b92d483bac3c833430a48eefbd0bfbe636772c27 (diff) | |
download | ffmpeg-766fefe8e275c693c5acd7b2d7555b2df6e7751a.tar.gz |
DCA: simplify lfe_interpolation_fir()
This reorders the lfe_fir tables, and drops the mirrored half,
such that the loops in lfe_interpolation_fir() can be simplified.
The new loop structure should be easier to implement with SIMD.
Static data size is reduced by 2kB.
3% faster on Cortex-A8.
Originally committed as revision 22849 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dca.c')
-rw-r--r-- | libavcodec/dca.c | 29 |
1 files changed, 19 insertions, 10 deletions
diff --git a/libavcodec/dca.c b/libavcodec/dca.c index 7a6841d49c..79ea37316c 100644 --- a/libavcodec/dca.c +++ b/libavcodec/dca.c @@ -802,28 +802,37 @@ static void lfe_interpolation_fir(int decimation_select, int decifactor, k, j; const float *prCoeff; - - int interp_index = 0; /* Index to the interpolated samples */ int deciindex; /* Select decimation filter */ if (decimation_select == 1) { - decifactor = 128; + decifactor = 64; prCoeff = lfe_fir_128; } else { - decifactor = 64; + decifactor = 32; prCoeff = lfe_fir_64; } /* Interpolation */ for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { - /* One decimated sample generates decifactor interpolated ones */ + float *samples_out2 = samples_out + decifactor; + const float *cf0 = prCoeff; + const float *cf1 = prCoeff + 256; + + /* One decimated sample generates 2*decifactor interpolated ones */ for (k = 0; k < decifactor; k++) { - float rTmp = 0.0; - //FIXME the coeffs are symetric, fix that - for (j = 0; j < 512 / decifactor; j++) - rTmp += samples_in[deciindex - j] * prCoeff[k + j * decifactor]; - samples_out[interp_index++] = (rTmp * scale) + bias; + float v0 = 0.0; + float v1 = 0.0; + for (j = 0; j < 256 / decifactor; j++) { + float s = samples_in[-j]; + v0 += s * *cf0++; + v1 += s * *--cf1; + } + *samples_out++ = (v0 * scale) + bias; + *samples_out2++ = (v1 * scale) + bias; } + + samples_in++; + samples_out += decifactor; } } |