aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/dca.c
diff options
context:
space:
mode:
authorMåns Rullgård <mans@mansr.com>2010-04-12 11:14:55 +0000
committerMåns Rullgård <mans@mansr.com>2010-04-12 11:14:55 +0000
commit766fefe8e275c693c5acd7b2d7555b2df6e7751a (patch)
tree5e48c2dd2cb9c67c8f377094c14d867a67ef8ed3 /libavcodec/dca.c
parentb92d483bac3c833430a48eefbd0bfbe636772c27 (diff)
downloadffmpeg-766fefe8e275c693c5acd7b2d7555b2df6e7751a.tar.gz
DCA: simplify lfe_interpolation_fir()
This reorders the lfe_fir tables, and drops the mirrored half, such that the loops in lfe_interpolation_fir() can be simplified. The new loop structure should be easier to implement with SIMD. Static data size is reduced by 2kB. 3% faster on Cortex-A8. Originally committed as revision 22849 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dca.c')
-rw-r--r--libavcodec/dca.c29
1 files changed, 19 insertions, 10 deletions
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 7a6841d49c..79ea37316c 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -802,28 +802,37 @@ static void lfe_interpolation_fir(int decimation_select,
int decifactor, k, j;
const float *prCoeff;
-
- int interp_index = 0; /* Index to the interpolated samples */
int deciindex;
/* Select decimation filter */
if (decimation_select == 1) {
- decifactor = 128;
+ decifactor = 64;
prCoeff = lfe_fir_128;
} else {
- decifactor = 64;
+ decifactor = 32;
prCoeff = lfe_fir_64;
}
/* Interpolation */
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
- /* One decimated sample generates decifactor interpolated ones */
+ float *samples_out2 = samples_out + decifactor;
+ const float *cf0 = prCoeff;
+ const float *cf1 = prCoeff + 256;
+
+ /* One decimated sample generates 2*decifactor interpolated ones */
for (k = 0; k < decifactor; k++) {
- float rTmp = 0.0;
- //FIXME the coeffs are symetric, fix that
- for (j = 0; j < 512 / decifactor; j++)
- rTmp += samples_in[deciindex - j] * prCoeff[k + j * decifactor];
- samples_out[interp_index++] = (rTmp * scale) + bias;
+ float v0 = 0.0;
+ float v1 = 0.0;
+ for (j = 0; j < 256 / decifactor; j++) {
+ float s = samples_in[-j];
+ v0 += s * *cf0++;
+ v1 += s * *--cf1;
+ }
+ *samples_out++ = (v0 * scale) + bias;
+ *samples_out2++ = (v1 * scale) + bias;
}
+
+ samples_in++;
+ samples_out += decifactor;
}
}