DCA: simplify lfe_interpolation_fir()

This reorders the lfe_fir tables, and drops the mirrored half, such that the loops in lfe_interpolation_fir() can be simplified. The new loop structure should be easier to implement with SIMD. Static data size is reduced by 2kB. 3% faster on Cortex-A8. Originally committed as revision 22849 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Måns Rullgård <mans@mansr.com> 2010-04-12 11:14:55 +0000
committer: Måns Rullgård <mans@mansr.com> 2010-04-12 11:14:55 +0000
commit: 766fefe8e275c693c5acd7b2d7555b2df6e7751a (patch)
tree: 5e48c2dd2cb9c67c8f377094c14d867a67ef8ed3 /libavcodec/dca.c
parent: b92d483bac3c833430a48eefbd0bfbe636772c27 (diff)
download: ffmpeg-766fefe8e275c693c5acd7b2d7555b2df6e7751a.tar.gz
1 files changed, 19 insertions, 10 deletions
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 7a6841d49c..79ea37316c 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -802,28 +802,37 @@ static void lfe_interpolation_fir(int decimation_select,
 
     int decifactor, k, j;
     const float *prCoeff;
-
-    int interp_index = 0;       /* Index to the interpolated samples */
     int deciindex;
 
     /* Select decimation filter */
     if (decimation_select == 1) {
-        decifactor = 128;
+        decifactor = 64;
         prCoeff = lfe_fir_128;
     } else {
-        decifactor = 64;
+        decifactor = 32;
         prCoeff = lfe_fir_64;
     }
     /* Interpolation */
     for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
-        /* One decimated sample generates decifactor interpolated ones */
+        float *samples_out2 = samples_out + decifactor;
+        const float *cf0 = prCoeff;
+        const float *cf1 = prCoeff + 256;
+
+        /* One decimated sample generates 2*decifactor interpolated ones */
         for (k = 0; k < decifactor; k++) {
-            float rTmp = 0.0;
-            //FIXME the coeffs are symetric, fix that
-            for (j = 0; j < 512 / decifactor; j++)
-                rTmp += samples_in[deciindex - j] * prCoeff[k + j * decifactor];
-            samples_out[interp_index++] = (rTmp * scale) + bias;
+            float v0 = 0.0;
+            float v1 = 0.0;
+            for (j = 0; j < 256 / decifactor; j++) {
+                float s = samples_in[-j];
+                v0 += s * *cf0++;
+                v1 += s * *--cf1;
+            }
+            *samples_out++  = (v0 * scale) + bias;
+            *samples_out2++ = (v1 * scale) + bias;
         }
+
+        samples_in++;
+        samples_out += decifactor;
     }
 }
author	Måns Rullgård <mans@mansr.com>	2010-04-12 11:14:55 +0000
committer	Måns Rullgård <mans@mansr.com>	2010-04-12 11:14:55 +0000
commit	766fefe8e275c693c5acd7b2d7555b2df6e7751a (patch)
tree	5e48c2dd2cb9c67c8f377094c14d867a67ef8ed3 /libavcodec/dca.c
parent	b92d483bac3c833430a48eefbd0bfbe636772c27 (diff)
download	ffmpeg-766fefe8e275c693c5acd7b2d7555b2df6e7751a.tar.gz