swresample/resample: improve bessel function accuracy and speed

This improves accuracy for the bessel function at large arguments, and this in turn should improve the quality of the Kaiser window. It also improves the performance of the bessel function and hence build_filter by ~ 20%. Details are given below. Algorithm: taken from the Boost project, who have done a detailed investigation of the accuracy of their method, as compared with e.g the GNU Scientific Library (GSL): http://www.boost.org/doc/libs/1_52_0/libs/math/doc/sf_and_dist/html/math_toolkit/special/bessel/mbessel.html. Boost source code (also cited and licensed in the code): https://searchcode.com/codesearch/view/14918379/. Accuracy: sample values may be obtained as follows. i0 denotes the old bessel code, i0_boost the approach here, and i0_real an arbitrary precision result (truncated) from Wolfram Alpha: type "bessel i0(6.0)" to reproduce. These are evaluation points that occur for the default kaiser_beta = 9. Some illustrations: bessel(8.0) i0 (8.000000) = 427.564115721804739678191254 i0_boost(8.000000) = 427.564115721804796521610115 i0_real (8.000000) = 427.564115721804785177396791 bessel(6.0) i0 (6.000000) = 67.234406976477956163762428 i0_boost(6.000000) = 67.234406976477970374617144 i0_real (6.000000) = 67.234406976477975326188025 Reason for accuracy: Main accuracy benefits come at larger bessel arguments, where the Taylor-Maclaurin method is not that good: 23+ iterations (at large arguments, since the series is about 0) can cause significant floating point error accumulation. Benchmarks: Obtained on x86-64, Haswell, GNU/Linux via a loop calling build_filter 1000 times: test: fate-swr-resample-dblp-44100-2626 new: 995894468 decicycles in build_filter(loop 1000), 256 runs, 0 skips 1029719302 decicycles in build_filter(loop 1000), 512 runs, 0 skips 984101131 decicycles in build_filter(loop 1000), 1024 runs, 0 skips old: 1250020763 decicycles in build_filter(loop 1000), 256 runs, 0 skips 1246353282 decicycles in build_filter(loop 1000), 512 runs, 0 skips 1220017565 decicycles in build_filter(loop 1000), 1024 runs, 0 skips A further ~ 5% may be squeezed by enabling -ftree-vectorize. However, this is a separate issue from this patch. Reviewed-by: Michael Niedermayer <michael@niedermayer.cc> Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
author: Ganesh Ajjanagadde <gajjanagadde@gmail.com> 2015-11-02 14:28:39 -0500
committer: Ganesh Ajjanagadde <gajjanagadde@gmail.com> 2015-11-08 21:18:16 -0500
commit: a5202bc968cf3ca5b64c623b9271f76e8fa02211 (patch)
tree: 4e62cfe12f8db42ca41eb77942c53bc543647be9
parent: 1bed09a30ef95cc85513688f3b3c18c3fd332fd8 (diff)
download: ffmpeg-a5202bc968cf3ca5b64c623b9271f76e8fa02211.tar.gz
1 files changed, 98 insertions, 27 deletions
diff --git a/libswresample/resample.c b/libswresample/resample.c
index 82f004c802..072e0d6a78 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -1,6 +1,7 @@
 /*
  * audio resampling
  * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ * bessel function: Copyright (c) 2006 Xiaogang Zhang
  *
  * This file is part of FFmpeg.
  *
@@ -28,38 +29,108 @@
 #include "libavutil/avassert.h"
 #include "resample.h"
 
+static inline double eval_poly(const double *coeff, int size, double x) {
+    double sum = coeff[size-1];
+    int i;
+    for (i = size-2; i >= 0; --i) {
+        sum *= x;
+        sum += coeff[i];
+    }
+    return sum;
+}
+
 /**
  * 0th order modified bessel function of the first kind.
+ * Algorithm taken from the Boost project, source:
+ * https://searchcode.com/codesearch/view/14918379/
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0 (see notice below).
+ * Boost Software License - Version 1.0 - August 17th, 2003
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
  */
-static double bessel(double x){
-    double lastv=0;
-    double t, v;
-    int i;
-    static const double inv[100]={
- 1.0/( 1* 1), 1.0/( 2* 2), 1.0/( 3* 3), 1.0/( 4* 4), 1.0/( 5* 5), 1.0/( 6* 6), 1.0/( 7* 7), 1.0/( 8* 8), 1.0/( 9* 9), 1.0/(10*10),
- 1.0/(11*11), 1.0/(12*12), 1.0/(13*13), 1.0/(14*14), 1.0/(15*15), 1.0/(16*16), 1.0/(17*17), 1.0/(18*18), 1.0/(19*19), 1.0/(20*20),
- 1.0/(21*21), 1.0/(22*22), 1.0/(23*23), 1.0/(24*24), 1.0/(25*25), 1.0/(26*26), 1.0/(27*27), 1.0/(28*28), 1.0/(29*29), 1.0/(30*30),
- 1.0/(31*31), 1.0/(32*32), 1.0/(33*33), 1.0/(34*34), 1.0/(35*35), 1.0/(36*36), 1.0/(37*37), 1.0/(38*38), 1.0/(39*39), 1.0/(40*40),
- 1.0/(41*41), 1.0/(42*42), 1.0/(43*43), 1.0/(44*44), 1.0/(45*45), 1.0/(46*46), 1.0/(47*47), 1.0/(48*48), 1.0/(49*49), 1.0/(50*50),
- 1.0/(51*51), 1.0/(52*52), 1.0/(53*53), 1.0/(54*54), 1.0/(55*55), 1.0/(56*56), 1.0/(57*57), 1.0/(58*58), 1.0/(59*59), 1.0/(60*60),
- 1.0/(61*61), 1.0/(62*62), 1.0/(63*63), 1.0/(64*64), 1.0/(65*65), 1.0/(66*66), 1.0/(67*67), 1.0/(68*68), 1.0/(69*69), 1.0/(70*70),
- 1.0/(71*71), 1.0/(72*72), 1.0/(73*73), 1.0/(74*74), 1.0/(75*75), 1.0/(76*76), 1.0/(77*77), 1.0/(78*78), 1.0/(79*79), 1.0/(80*80),
- 1.0/(81*81), 1.0/(82*82), 1.0/(83*83), 1.0/(84*84), 1.0/(85*85), 1.0/(86*86), 1.0/(87*87), 1.0/(88*88), 1.0/(89*89), 1.0/(90*90),
- 1.0/(91*91), 1.0/(92*92), 1.0/(93*93), 1.0/(94*94), 1.0/(95*95), 1.0/(96*96), 1.0/(97*97), 1.0/(98*98), 1.0/(99*99), 1.0/(10000)
-    };
 
-    x= x*x/4;
-    t = x;
-    v = 1 + x;
-    for(i=1; v != lastv; i+=2){
-        t *= x*inv[i];
-        v += t;
-        lastv=v;
-        t *= x*inv[i + 1];
-        v += t;
-        av_assert2(i<98);
+static double bessel(double x) {
+// Modified Bessel function of the first kind of order zero
+// minimax rational approximations on intervals, see
+// Blair and Edwards, Chalk River Report AECL-4928, 1974
+    static const double p1[] = {
+        -2.2335582639474375249e+15,
+        -5.5050369673018427753e+14,
+        -3.2940087627407749166e+13,
+        -8.4925101247114157499e+11,
+        -1.1912746104985237192e+10,
+        -1.0313066708737980747e+08,
+        -5.9545626019847898221e+05,
+        -2.4125195876041896775e+03,
+        -7.0935347449210549190e+00,
+        -1.5453977791786851041e-02,
+        -2.5172644670688975051e-05,
+        -3.0517226450451067446e-08,
+        -2.6843448573468483278e-11,
+        -1.5982226675653184646e-14,
+        -5.2487866627945699800e-18,
+    };
+    static const double q1[] = {
+        -2.2335582639474375245e+15,
+         7.8858692566751002988e+12,
+        -1.2207067397808979846e+10,
+         1.0377081058062166144e+07,
+        -4.8527560179962773045e+03,
+         1.0L,
+    };
+    static const double p2[] = {
+        -2.2210262233306573296e-04,
+         1.3067392038106924055e-02,
+        -4.4700805721174453923e-01,
+         5.5674518371240761397e+00,
+        -2.3517945679239481621e+01,
+         3.1611322818701131207e+01,
+        -9.6090021968656180000e+00,
+    };
+    static const double q2[] = {
+        -5.5194330231005480228e-04,
+         3.2547697594819615062e-02,
+        -1.1151759188741312645e+00,
+         1.3982595353892851542e+01,
+        -6.0228002066743340583e+01,
+         8.5539563258012929600e+01,
+        -3.1446690275135491500e+01,
+        1.0L,
+    };
+    double y, r, factor;
+    if (x == 0)
+        return 1.0;
+    x = fabs(x);
+    if (x <= 15) {
+        y = x * x;
+        return eval_poly(p1, FF_ARRAY_ELEMS(p1), y) / eval_poly(q1, FF_ARRAY_ELEMS(q1), y);
+    }
+    else {
+        y = 1 / x - 1.0 / 15;
+        r = eval_poly(p2, FF_ARRAY_ELEMS(p2), y) / eval_poly(q2, FF_ARRAY_ELEMS(q2), y);
+        factor = exp(x) / sqrt(x);
+        return factor * r;
     }
-    return v;
 }
 
 /**
author	Ganesh Ajjanagadde <gajjanagadde@gmail.com>	2015-11-02 14:28:39 -0500
committer	Ganesh Ajjanagadde <gajjanagadde@gmail.com>	2015-11-08 21:18:16 -0500
commit	a5202bc968cf3ca5b64c623b9271f76e8fa02211 (patch)
tree	4e62cfe12f8db42ca41eb77942c53bc543647be9
parent	1bed09a30ef95cc85513688f3b3c18c3fd332fd8 (diff)
download	ffmpeg-a5202bc968cf3ca5b64c623b9271f76e8fa02211.tar.gz