aboutsummaryrefslogtreecommitdiffstats
path: root/libswresample
diff options
context:
space:
mode:
authorGanesh Ajjanagadde <gajjanagadde@gmail.com>2015-11-08 21:39:32 -0500
committerGanesh Ajjanagadde <gajjanagadde@gmail.com>2015-11-09 18:41:03 -0500
commitb87ca4bf25a3b5a0cac76fb8b3a7ab3bc0fd9882 (patch)
tree603890d7017f2806d104a53698400722d6ec11dd /libswresample
parentb02201efb531348406e48f9252bd39a6acebd2b7 (diff)
downloadffmpeg-b87ca4bf25a3b5a0cac76fb8b3a7ab3bc0fd9882.tar.gz
swresample/resample: speed up upsampling by precomputing sines
When upsampling, factor is set to 1 and sines need to be evaluated only once for each phase, and the complexity should not depend on the number of filter taps. This does the desired precomputation, yielding significant speedups. Hard guarantees on the gain are not possible, but gains themselves are obvious and are illustrated below. Sample benchmark (x86-64, Haswell, GNU/Linux) test: fate-swr-resample-dblp-2626-44100 old: 29161085 decicycles in build_filter (loop 1000), 256 runs, 0 skips 28821467 decicycles in build_filter (loop 1000), 512 runs, 0 skips 28668201 decicycles in build_filter (loop 1000), 1000 runs, 24 skips new: 14351936 decicycles in build_filter (loop 1000), 256 runs, 0 skips 14306652 decicycles in build_filter (loop 1000), 512 runs, 0 skips 14299923 decicycles in build_filter (loop 1000), 1000 runs, 24 skips Note that this does not statically allocate the sin lookup table. This may be done for the default 1024 phases, yielding a 512*8 = 4kB array which should be small enough. This should yield a small improvement. Nevertheless, this is separate from this patch, is more ambiguous due to the binary increase, and requires a lut to be generated offline. Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
Diffstat (limited to 'libswresample')
-rw-r--r--libswresample/resample.c21
1 files changed, 17 insertions, 4 deletions
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 072e0d6a78..d0a406e5e4 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -144,24 +144,34 @@ static double bessel(double x) {
static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale,
int filter_type, double kaiser_beta){
int ph, i;
- double x, y, w, t;
+ double x, y, w, t, s;
double *tab = av_malloc_array(tap_count+1, sizeof(*tab));
+ double *sin_lut = av_malloc_array(phase_count / 2 + 1, sizeof(*sin_lut));
const int center= (tap_count-1)/2;
- if (!tab)
- return AVERROR(ENOMEM);
+ if (!tab || !sin_lut)
+ goto fail;
/* if upsampling, only need to interpolate, no filter */
if (factor > 1.0)
factor = 1.0;
av_assert0(phase_count == 1 || phase_count % 2 == 0);
+
+ if (factor == 1.0) {
+ for (ph = 0; ph <= phase_count / 2; ph++)
+ sin_lut[ph] = sin(M_PI * ph / phase_count);
+ }
for(ph = 0; ph <= phase_count / 2; ph++) {
double norm = 0;
+ s = sin_lut[ph];
for(i=0;i<=tap_count;i++) {
x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
if (x == 0) y = 1.0;
- else y = sin(x) / x;
+ else if (factor == 1.0)
+ y = s / x;
+ else
+ y = sin(x) / x;
switch(filter_type){
case SWR_FILTER_TYPE_CUBIC:{
const float d= -0.5; //first order derivative = -0.5
@@ -183,6 +193,7 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
}
tab[i] = y;
+ s = -s;
if (i < tap_count)
norm += y;
}
@@ -278,7 +289,9 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
}
#endif
+fail:
av_free(tab);
+ av_free(sin_lut);
return 0;
}