diff options
author | shadchin <shadchin@yandex-team.com> | 2024-02-12 07:53:52 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@ydb.tech> | 2024-02-14 14:26:16 +0000 |
commit | 31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch) | |
tree | c1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Modules/mathmodule.c | |
parent | fe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff) | |
download | ydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz |
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Modules/mathmodule.c')
-rw-r--r-- | contrib/tools/python3/src/Modules/mathmodule.c | 1022 |
1 files changed, 626 insertions, 396 deletions
diff --git a/contrib/tools/python3/src/Modules/mathmodule.c b/contrib/tools/python3/src/Modules/mathmodule.c index 5f5b71c4c0..23fa2b1816 100644 --- a/contrib/tools/python3/src/Modules/mathmodule.c +++ b/contrib/tools/python3/src/Modules/mathmodule.c @@ -55,18 +55,19 @@ raised for division by zero and mod by zero. #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 #endif -#define NEEDS_PY_IDENTIFIER #include "Python.h" #include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_call.h" // _PyObject_CallNoArgs() -#include "pycore_dtoa.h" // _Py_dg_infinity() #include "pycore_long.h" // _PyLong_GetZero() +#include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_object.h" // _PyObject_LookupSpecial() #include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR /* For DBL_EPSILON in _math.h */ #include <float.h> /* For _Py_log1p with workarounds for buggy handling of zeros. */ #include "_math.h" +#include <stdbool.h> #include "clinic/mathmodule.c.h" @@ -76,6 +77,127 @@ module math /*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/ +typedef struct { + PyObject *str___ceil__; + PyObject *str___floor__; + PyObject *str___trunc__; +} math_module_state; + +static inline math_module_state* +get_math_module_state(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (math_module_state *)state; +} + +/* +Double and triple length extended precision algorithms from: + + Accurate Sum and Dot Product + by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi + https://doi.org/10.1137/030601818 + https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf + +*/ + +typedef struct{ double hi; double lo; } DoubleLength; + +static DoubleLength +dl_fast_sum(double a, double b) +{ + /* Algorithm 1.1. Compensated summation of two floating point numbers. */ + assert(fabs(a) >= fabs(b)); + double x = a + b; + double y = (a - x) + b; + return (DoubleLength) {x, y}; +} + +static DoubleLength +dl_sum(double a, double b) +{ + /* Algorithm 3.1 Error-free transformation of the sum */ + double x = a + b; + double z = x - a; + double y = (a - (x - z)) + (b - z); + return (DoubleLength) {x, y}; +} + +#ifndef UNRELIABLE_FMA + +static DoubleLength +dl_mul(double x, double y) +{ + /* Algorithm 3.5. Error-free transformation of a product */ + double z = x * y; + double zz = fma(x, y, -z); + return (DoubleLength) {z, zz}; +} + +#else + +/* + The default implementation of dl_mul() depends on the C math library + having an accurate fma() function as required by § 7.12.13.1 of the + C99 standard. + + The UNRELIABLE_FMA option is provided as a slower but accurate + alternative for builds where the fma() function is found wanting. + The speed penalty may be modest (17% slower on an Apple M1 Max), + so don't hesitate to enable this build option. + + The algorithms are from the T. J. Dekker paper: + A Floating-Point Technique for Extending the Available Precision + https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf +*/ + +static DoubleLength +dl_split(double x) { + // Dekker (5.5) and (5.6). + double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1 + double hi = t - (t - x); + double lo = x - hi; + return (DoubleLength) {hi, lo}; +} + +static DoubleLength +dl_mul(double x, double y) +{ + // Dekker (5.12) and mul12() + DoubleLength xx = dl_split(x); + DoubleLength yy = dl_split(y); + double p = xx.hi * yy.hi; + double q = xx.hi * yy.lo + xx.lo * yy.hi; + double z = p + q; + double zz = p - z + q + xx.lo * yy.lo; + return (DoubleLength) {z, zz}; +} + +#endif + +typedef struct { double hi; double lo; double tiny; } TripleLength; + +static const TripleLength tl_zero = {0.0, 0.0, 0.0}; + +static TripleLength +tl_fma(double x, double y, TripleLength total) +{ + /* Algorithm 5.10 with SumKVert for K=3 */ + DoubleLength pr = dl_mul(x, y); + DoubleLength sm = dl_sum(total.hi, pr.hi); + DoubleLength r1 = dl_sum(total.lo, pr.lo); + DoubleLength r2 = dl_sum(r1.hi, sm.lo); + return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo}; +} + +static double +tl_to_d(TripleLength total) +{ + DoubleLength last = dl_sum(total.lo, total.hi); + return total.tiny + last.lo + last.hi; +} + + /* sin(pi*x), giving accurate results for all finite x (especially x integral or close to an integer). This is here for use in the @@ -85,10 +207,6 @@ module math static const double pi = 3.141592653589793238462643383279502884197; static const double logpi = 1.144729885849400174143427351353058711647; -#if !defined(HAVE_ERF) || !defined(HAVE_ERFC) -static const double sqrtpi = 1.772453850905516027298167483341145182798; -#endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */ - /* Version of PyFloat_AsDouble() with in-line fast paths for exact floats and integers. Gives a substantial @@ -146,7 +264,9 @@ m_sinpi(double x) return copysign(1.0, x)*r; } -/* Implementation of the real gamma function. In extensive but non-exhaustive +/* Implementation of the real gamma function. Kept here to work around + issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations + on various platforms (Windows, MacOS). In extensive but non-exhaustive random tests, this function proved accurate to within <= 10 ulps across the entire float domain. Note that accuracy may depend on the quality of the system math functions, the pow function in particular. Special cases @@ -268,34 +388,6 @@ lanczos_sum(double x) return num/den; } -/* Constant for +infinity, generated in the same way as float('inf'). */ - -static double -m_inf(void) -{ -#if _PY_SHORT_FLOAT_REPR == 1 - return _Py_dg_infinity(0); -#else - return Py_HUGE_VAL; -#endif -} - -/* Constant nan value, generated in the same way as float('nan'). */ -/* We don't currently assume that Py_NAN is defined everywhere. */ - -#if _PY_SHORT_FLOAT_REPR == 1 - -static double -m_nan(void) -{ -#if _PY_SHORT_FLOAT_REPR == 1 - return _Py_dg_stdnan(0); -#else - return Py_NAN; -#endif -} - -#endif static double m_tgamma(double x) @@ -314,7 +406,7 @@ m_tgamma(double x) if (x == 0.0) { errno = EDOM; /* tgamma(+-0.0) = +-inf, divide-by-zero */ - return copysign(Py_HUGE_VAL, x); + return copysign(Py_INFINITY, x); } /* integer arguments */ @@ -442,163 +534,6 @@ m_lgamma(double x) return r; } -#if !defined(HAVE_ERF) || !defined(HAVE_ERFC) - -/* - Implementations of the error function erf(x) and the complementary error - function erfc(x). - - Method: we use a series approximation for erf for small x, and a continued - fraction approximation for erfc(x) for larger x; - combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x), - this gives us erf(x) and erfc(x) for all x. - - The series expansion used is: - - erf(x) = x*exp(-x*x)/sqrt(pi) * [ - 2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...] - - The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k). - This series converges well for smallish x, but slowly for larger x. - - The continued fraction expansion used is: - - erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - ) - 3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...] - - after the first term, the general term has the form: - - k*(k-0.5)/(2*k+0.5 + x**2 - ...). - - This expansion converges fast for larger x, but convergence becomes - infinitely slow as x approaches 0.0. The (somewhat naive) continued - fraction evaluation algorithm used below also risks overflow for large x; - but for large x, erfc(x) == 0.0 to within machine precision. (For - example, erfc(30.0) is approximately 2.56e-393). - - Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and - continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) < - ERFC_CONTFRAC_CUTOFF. ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the - numbers of terms to use for the relevant expansions. */ - -#define ERF_SERIES_CUTOFF 1.5 -#define ERF_SERIES_TERMS 25 -#define ERFC_CONTFRAC_CUTOFF 30.0 -#define ERFC_CONTFRAC_TERMS 50 - -/* - Error function, via power series. - - Given a finite float x, return an approximation to erf(x). - Converges reasonably fast for small x. -*/ - -static double -m_erf_series(double x) -{ - double x2, acc, fk, result; - int i, saved_errno; - - x2 = x * x; - acc = 0.0; - fk = (double)ERF_SERIES_TERMS + 0.5; - for (i = 0; i < ERF_SERIES_TERMS; i++) { - acc = 2.0 + x2 * acc / fk; - fk -= 1.0; - } - /* Make sure the exp call doesn't affect errno; - see m_erfc_contfrac for more. */ - saved_errno = errno; - result = acc * x * exp(-x2) / sqrtpi; - errno = saved_errno; - return result; -} - -/* - Complementary error function, via continued fraction expansion. - - Given a positive float x, return an approximation to erfc(x). Converges - reasonably fast for x large (say, x > 2.0), and should be safe from - overflow if x and nterms are not too large. On an IEEE 754 machine, with x - <= 30.0, we're safe up to nterms = 100. For x >= 30.0, erfc(x) is smaller - than the smallest representable nonzero float. */ - -static double -m_erfc_contfrac(double x) -{ - double x2, a, da, p, p_last, q, q_last, b, result; - int i, saved_errno; - - if (x >= ERFC_CONTFRAC_CUTOFF) - return 0.0; - - x2 = x*x; - a = 0.0; - da = 0.5; - p = 1.0; p_last = 0.0; - q = da + x2; q_last = 1.0; - for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) { - double temp; - a += da; - da += 2.0; - b = da + x2; - temp = p; p = b*p - a*p_last; p_last = temp; - temp = q; q = b*q - a*q_last; q_last = temp; - } - /* Issue #8986: On some platforms, exp sets errno on underflow to zero; - save the current errno value so that we can restore it later. */ - saved_errno = errno; - result = p / q * x * exp(-x2) / sqrtpi; - errno = saved_errno; - return result; -} - -#endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */ - -/* Error function erf(x), for general x */ - -static double -m_erf(double x) -{ -#ifdef HAVE_ERF - return erf(x); -#else - double absx, cf; - - if (Py_IS_NAN(x)) - return x; - absx = fabs(x); - if (absx < ERF_SERIES_CUTOFF) - return m_erf_series(x); - else { - cf = m_erfc_contfrac(absx); - return x > 0.0 ? 1.0 - cf : cf - 1.0; - } -#endif -} - -/* Complementary error function erfc(x), for general x. */ - -static double -m_erfc(double x) -{ -#ifdef HAVE_ERFC - return erfc(x); -#else - double absx, cf; - - if (Py_IS_NAN(x)) - return x; - absx = fabs(x); - if (absx < ERF_SERIES_CUTOFF) - return 1.0 - m_erf_series(x); - else { - cf = m_erfc_contfrac(absx); - return x > 0.0 ? cf : 2.0 - cf; - } -#endif -} - /* wrapper for atan2 that deals directly with special cases before delegating to the platform libm for the remaining cases. This @@ -785,25 +720,7 @@ m_log2(double x) } if (x > 0.0) { -#ifdef HAVE_LOG2 return log2(x); -#else - double m; - int e; - m = frexp(x, &e); - /* We want log2(m * 2**e) == log(m) / log(2) + e. Care is needed when - * x is just greater than 1.0: in that case e is 1, log(m) is negative, - * and we get significant cancellation error from the addition of - * log(m) / log(2) to e. The slight rewrite of the expression below - * avoids this problem. - */ - if (x >= 1.0) { - return log(2.0 * m) / log(2.0) + (e - 1); - } - else { - return log(m) / log(2.0) + e; - } -#endif } else if (x == 0.0) { errno = EDOM; @@ -890,7 +807,7 @@ long_lcm(PyObject *a, PyObject *b) { PyObject *g, *m, *f, *ab; - if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) { + if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) { return PyLong_FromLong(0); } g = _PyLong_GCD(a, b); @@ -1036,9 +953,7 @@ is_error(double x) */ static PyObject * -math_1_to_whatever(PyObject *arg, double (*func) (double), - PyObject *(*from_double_func) (double), - int can_overflow) +math_1(PyObject *arg, double (*func) (double), int can_overflow) { double x, r; x = PyFloat_AsDouble(arg); @@ -1064,7 +979,7 @@ math_1_to_whatever(PyObject *arg, double (*func) (double), /* this branch unnecessary on most platforms */ return NULL; - return (*from_double_func)(r); + return PyFloat_FromDouble(r); } /* variant of math_1, to be used when the function being wrapped is known to @@ -1113,12 +1028,6 @@ math_1a(PyObject *arg, double (*func) (double)) */ static PyObject * -math_1(PyObject *arg, double (*func) (double), int can_overflow) -{ - return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow); -} - -static PyObject * math_2(PyObject *const *args, Py_ssize_t nargs, double (*func) (double, double), const char *funcname) { @@ -1215,10 +1124,10 @@ static PyObject * math_ceil(PyObject *module, PyObject *number) /*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/ { - _Py_IDENTIFIER(__ceil__); if (!PyFloat_CheckExact(number)) { - PyObject *method = _PyObject_LookupSpecialId(number, &PyId___ceil__); + math_module_state *state = get_math_module_state(module); + PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__); if (method != NULL) { PyObject *result = _PyObject_CallNoArgs(method); Py_DECREF(method); @@ -1245,10 +1154,10 @@ FUNC1(cos, cos, 0, FUNC1(cosh, cosh, 1, "cosh($module, x, /)\n--\n\n" "Return the hyperbolic cosine of x.") -FUNC1A(erf, m_erf, +FUNC1A(erf, erf, "erf($module, x, /)\n--\n\n" "Error function at x.") -FUNC1A(erfc, m_erfc, +FUNC1A(erfc, erfc, "erfc($module, x, /)\n--\n\n" "Complementary error function at x.") FUNC1(exp, exp, 1, @@ -1283,14 +1192,13 @@ math_floor(PyObject *module, PyObject *number) { double x; - _Py_IDENTIFIER(__floor__); - if (PyFloat_CheckExact(number)) { x = PyFloat_AS_DOUBLE(number); } else { - PyObject *method = _PyObject_LookupSpecialId(number, &PyId___floor__); + math_module_state *state = get_math_module_state(module); + PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__); if (method != NULL) { PyObject *result = _PyObject_CallNoArgs(method); Py_DECREF(method); @@ -1343,30 +1251,30 @@ FUNC1(tanh, tanh, 0, Dickinson's post at <http://bugs.python.org/file10357/msum4.py>. See those links for more details, proofs and other references. - Note 1: IEEE 754R floating point semantics are assumed, - but the current implementation does not re-establish special - value semantics across iterations (i.e. handling -Inf + Inf). + Note 1: IEEE 754 floating-point semantics with a rounding mode of + roundTiesToEven are assumed. - Note 2: No provision is made for intermediate overflow handling; - therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while - sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the + Note 2: No provision is made for intermediate overflow handling; + therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while + fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the overflow of the first partial sum. - Note 3: The intermediate values lo, yr, and hi are declared volatile so - aggressive compilers won't algebraically reduce lo to always be exactly 0.0. - Also, the volatile declaration forces the values to be stored in memory as - regular doubles instead of extended long precision (80-bit) values. This - prevents double rounding because any addition or subtraction of two doubles - can be resolved exactly into double-sized hi and lo values. As long as the - hi value gets forced into a double before yr and lo are computed, the extra - bits in downstream extended precision operations (x87 for example) will be - exactly zero and therefore can be losslessly stored back into a double, - thereby preventing double rounding. - - Note 4: A similar implementation is in Modules/cmathmodule.c. - Be sure to update both when making changes. - - Note 5: The signature of math.fsum() differs from builtins.sum() + Note 3: The algorithm has two potential sources of fragility. First, C + permits arithmetic operations on `double`s to be performed in an + intermediate format whose range and precision may be greater than those of + `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for + example on machines using the now largely historical x87 FPUs. In this case, + `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or + `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we + should be safe from this source of errors. Second, an aggressively + optimizing compiler can re-associate operations so that (for example) the + statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then + re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That + re-association would be in violation of the C standard, and should not occur + except possibly in the presence of unsafe optimizations (e.g., -ffast-math, + -fassociative-math). Such optimizations should be avoided for this module. + + Note 4: The signature of math.fsum() differs from builtins.sum() because the start argument doesn't make sense in the context of accurate summation. Since the partials table is collapsed before returning a result, sum(seq2, start=sum(seq1)) may not equal the @@ -1452,7 +1360,7 @@ math_fsum(PyObject *module, PyObject *seq) Py_ssize_t i, j, n = 0, m = NUM_PARTIALS; double x, y, t, ps[NUM_PARTIALS], *p = ps; double xsave, special_sum = 0.0, inf_sum = 0.0; - volatile double hi, yr, lo; + double hi, yr, lo = 0.0; iter = PyObject_GetIter(seq); if (iter == NULL) @@ -1789,13 +1697,13 @@ math_isqrt(PyObject *module, PyObject *n) return NULL; } - if (_PyLong_Sign(n) < 0) { + if (_PyLong_IsNegative((PyLongObject *)n)) { PyErr_SetString( PyExc_ValueError, "isqrt() argument must be nonnegative"); goto error; } - if (_PyLong_Sign(n) == 0) { + if (_PyLong_IsZero((PyLongObject *)n)) { Py_DECREF(n); return PyLong_FromLong(0); } @@ -2034,8 +1942,7 @@ factorial_odd_part(unsigned long n) inner = PyLong_FromLong(1); if (inner == NULL) return NULL; - outer = inner; - Py_INCREF(outer); + outer = Py_NewRef(inner); upper = 3; for (i = _Py_bit_length(n) - 2; i >= 0; i--) { @@ -2056,8 +1963,7 @@ factorial_odd_part(unsigned long n) Py_DECREF(partial); if (tmp == NULL) goto error; - Py_DECREF(inner); - inner = tmp; + Py_SETREF(inner, tmp); /* Now inner is the product of all odd integers j in the range (0, n/2**i], giving the inner product in the formula above. */ @@ -2065,8 +1971,7 @@ factorial_odd_part(unsigned long n) tmp = PyNumber_Multiply(outer, inner); if (tmp == NULL) goto error; - Py_DECREF(outer); - outer = tmp; + Py_SETREF(outer, tmp); } Py_DECREF(inner); return outer; @@ -2156,19 +2061,19 @@ static PyObject * math_trunc(PyObject *module, PyObject *x) /*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/ { - _Py_IDENTIFIER(__trunc__); PyObject *trunc, *result; if (PyFloat_CheckExact(x)) { return PyFloat_Type.tp_as_number->nb_int(x); } - if (Py_TYPE(x)->tp_dict == NULL) { + if (!_PyType_IsReady(Py_TYPE(x))) { if (PyType_Ready(Py_TYPE(x)) < 0) return NULL; } - trunc = _PyObject_LookupSpecialId(x, &PyId___trunc__); + math_module_state *state = get_math_module_state(module); + trunc = _PyObject_LookupSpecial(x, state->str___trunc__); if (trunc == NULL) { if (!PyErr_Occurred()) PyErr_Format(PyExc_TypeError, @@ -2312,7 +2217,7 @@ math_modf_impl(PyObject *module, double x) in that int is larger than PY_SSIZE_T_MAX. */ static PyObject* -loghelper(PyObject* arg, double (*func)(double), const char *funcname) +loghelper(PyObject* arg, double (*func)(double)) { /* If it is int, do it ourselves. */ if (PyLong_Check(arg)) { @@ -2320,7 +2225,7 @@ loghelper(PyObject* arg, double (*func)(double), const char *funcname) Py_ssize_t e; /* Negative or zero inputs give a ValueError. */ - if (Py_SIZE(arg) <= 0) { + if (!_PyLong_IsPositive((PyLongObject *)arg)) { PyErr_SetString(PyExc_ValueError, "math domain error"); return NULL; @@ -2350,33 +2255,22 @@ loghelper(PyObject* arg, double (*func)(double), const char *funcname) } -/*[clinic input] -math.log - - x: object - [ - base: object(c_default="NULL") = math.e - ] - / - -Return the logarithm of x to the given base. - -If the base not specified, returns the natural logarithm (base e) of x. -[clinic start generated code]*/ - +/* AC: cannot convert yet, see gh-102839 and gh-89381, waiting + for support of multiple signatures */ static PyObject * -math_log_impl(PyObject *module, PyObject *x, int group_right_1, - PyObject *base) -/*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/ +math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs) { PyObject *num, *den; PyObject *ans; - num = loghelper(x, m_log, "log"); - if (num == NULL || base == NULL) + if (!_PyArg_CheckPositional("log", nargs, 1, 2)) + return NULL; + + num = loghelper(args[0], m_log); + if (num == NULL || nargs == 1) return num; - den = loghelper(base, m_log, "log"); + den = loghelper(args[1], m_log); if (den == NULL) { Py_DECREF(num); return NULL; @@ -2388,6 +2282,10 @@ math_log_impl(PyObject *module, PyObject *x, int group_right_1, return ans; } +PyDoc_STRVAR(math_log_doc, +"log(x, [base=math.e])\n\ +Return the logarithm of x to the given base.\n\n\ +If the base is not specified, returns the natural logarithm (base e) of x."); /*[clinic input] math.log2 @@ -2402,7 +2300,7 @@ static PyObject * math_log2(PyObject *module, PyObject *x) /*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/ { - return loghelper(x, m_log2, "log2"); + return loghelper(x, m_log2); } @@ -2419,7 +2317,7 @@ static PyObject * math_log10(PyObject *module, PyObject *x) /*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/ { - return loghelper(x, m_log10, "log10"); + return loghelper(x, m_log10); } @@ -2474,6 +2372,7 @@ that are almost always correctly rounded, four techniques are used: * lossless scaling using a power-of-two scaling factor * accurate squaring using Veltkamp-Dekker splitting [1] + or an equivalent with an fma() call * compensated summation using a variant of the Neumaier algorithm [2] * differential correction of the square root [3] @@ -2512,9 +2411,8 @@ Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum. To minimize loss of information during the accumulation of fractional values, each term has a separate accumulator. This also breaks up sequential dependencies in the inner loop so the CPU can maximize -floating point throughput. [4] On a 2.6 GHz Haswell, adding one -dimension has an incremental cost of only 5ns -- for example when -moving from hypot(x,y) to hypot(x,y,z). +floating point throughput. [4] On an Apple M1 Max, hypot(*vec) +takes only 3.33 µsec when len(vec) == 1000. The square root differential correction is needed because a correctly rounded square root of a correctly rounded sum of @@ -2532,14 +2430,21 @@ algorithm, effectively doubling the number of accurate bits. This technique is used in Dekker's SQRT2 algorithm and again in Borges' ALGORITHM 4 and 5. -Without proof for all cases, hypot() cannot claim to be always -correctly rounded. However for n <= 1000, prior to the final addition -that rounds the overall result, the internal accuracy of "h" together -with its correction of "x / (2.0 * h)" is at least 100 bits. [6] -Also, hypot() was tested against a Decimal implementation with -prec=300. After 100 million trials, no incorrectly rounded examples -were found. In addition, perfect commutativity (all permutations are -exactly equal) was verified for 1 billion random inputs with n=5. [7] +The hypot() function is faithfully rounded (less than 1 ulp error) +and usually correctly rounded (within 1/2 ulp). The squaring +step is exact. The Neumaier summation computes as if in doubled +precision (106 bits) and has the advantage that its input squares +are non-negative so that the condition number of the sum is one. +The square root with a differential correction is likewise computed +as if in doubled precision. + +For n <= 1000, prior to the final addition that rounds the overall +result, the internal accuracy of "h" together with its correction of +"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested +against a Decimal implementation with prec=300. After 100 million +trials, no incorrectly rounded examples were found. In addition, +perfect commutativity (all permutations are exactly equal) was +verified for 1 billion random inputs with n=5. [7] References: @@ -2556,9 +2461,8 @@ References: static inline double vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) { - const double T27 = 134217729.0; /* ldexp(1.0, 27) + 1.0) */ - double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0; - double t, hi, lo, h; + double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0; + DoubleLength pr, sm; int max_e; Py_ssize_t i; @@ -2572,82 +2476,37 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan) return max; } frexp(max, &max_e); - if (max_e >= -1023) { - scale = ldexp(1.0, -max_e); - assert(max * scale >= 0.5); - assert(max * scale < 1.0); + if (max_e < -1023) { + /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */ for (i=0 ; i < n ; i++) { - x = vec[i]; - assert(Py_IS_FINITE(x) && fabs(x) <= max); - - x *= scale; - assert(fabs(x) < 1.0); - - t = x * T27; - hi = t - (t - x); - lo = x - hi; - assert(hi + lo == x); - - x = hi * hi; - assert(x <= 1.0); - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - - x = 2.0 * hi * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac2 += (oldcsum - csum) + x; - - assert(csum + lo * lo == csum); - frac3 += lo * lo; - } - h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3)); - - x = h; - t = x * T27; - hi = t - (t - x); - lo = x - hi; - assert (hi + lo == x); - - x = -hi * hi; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - - x = -2.0 * hi * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac2 += (oldcsum - csum) + x; - - x = -lo * lo; - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac3 += (oldcsum - csum) + x; - - x = csum - 1.0 + (frac1 + frac2 + frac3); - return (h + x / (2.0 * h)) / scale; - } - /* When max_e < -1023, ldexp(1.0, -max_e) overflows. - So instead of multiplying by a scale, we just divide by *max*. - */ + vec[i] /= DBL_MIN; // convert subnormals to normals + } + return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan); + } + scale = ldexp(1.0, -max_e); + assert(max * scale >= 0.5); + assert(max * scale < 1.0); for (i=0 ; i < n ; i++) { x = vec[i]; assert(Py_IS_FINITE(x) && fabs(x) <= max); - x /= max; - x = x*x; - assert(x <= 1.0); - assert(fabs(csum) >= fabs(x)); - oldcsum = csum; - csum += x; - frac1 += (oldcsum - csum) + x; - } - return max * sqrt(csum - 1.0 + frac1); + x *= scale; // lossless scaling + assert(fabs(x) < 1.0); + pr = dl_mul(x, x); // lossless squaring + assert(pr.hi <= 1.0); + sm = dl_fast_sum(csum, pr.hi); // lossless addition + csum = sm.hi; + frac1 += pr.lo; // lossy addition + frac2 += sm.lo; // lossy addition + } + h = sqrt(csum - 1.0 + (frac1 + frac2)); + pr = dl_mul(-h, h); + sm = dl_fast_sum(csum, pr.hi); + csum = sm.hi; + frac1 += pr.lo; + frac2 += sm.lo; + x = csum - 1.0 + (frac1 + frac2); + h += x / (2.0 * h); // differential correction + return h / scale; } #define NUM_STACK_ELEMS 16 @@ -2808,6 +2667,247 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\ 5.0\n\ "); +/** sumprod() ***************************************************************/ + +/* Forward declaration */ +static inline int _check_long_mult_overflow(long a, long b); + +static inline bool +long_add_would_overflow(long a, long b) +{ + return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a); +} + +/*[clinic input] +math.sumprod + + p: object + q: object + / + +Return the sum of products of values from two iterables p and q. + +Roughly equivalent to: + + sum(itertools.starmap(operator.mul, zip(p, q, strict=True))) + +For float and mixed int/float inputs, the intermediate products +and sums are computed with extended precision. +[clinic start generated code]*/ + +static PyObject * +math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) +/*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/ +{ + PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; + PyObject *p_it, *q_it, *total; + iternextfunc p_next, q_next; + bool p_stopped = false, q_stopped = false; + bool int_path_enabled = true, int_total_in_use = false; + bool flt_path_enabled = true, flt_total_in_use = false; + long int_total = 0; + TripleLength flt_total = tl_zero; + + p_it = PyObject_GetIter(p); + if (p_it == NULL) { + return NULL; + } + q_it = PyObject_GetIter(q); + if (q_it == NULL) { + Py_DECREF(p_it); + return NULL; + } + total = PyLong_FromLong(0); + if (total == NULL) { + Py_DECREF(p_it); + Py_DECREF(q_it); + return NULL; + } + p_next = *Py_TYPE(p_it)->tp_iternext; + q_next = *Py_TYPE(q_it)->tp_iternext; + while (1) { + bool finished; + + assert (p_i == NULL); + assert (q_i == NULL); + assert (term_i == NULL); + assert (new_total == NULL); + + assert (p_it != NULL); + assert (q_it != NULL); + assert (total != NULL); + + p_i = p_next(p_it); + if (p_i == NULL) { + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { + goto err_exit; + } + PyErr_Clear(); + } + p_stopped = true; + } + q_i = q_next(q_it); + if (q_i == NULL) { + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { + goto err_exit; + } + PyErr_Clear(); + } + q_stopped = true; + } + if (p_stopped != q_stopped) { + PyErr_Format(PyExc_ValueError, "Inputs are not the same length"); + goto err_exit; + } + finished = p_stopped & q_stopped; + + if (int_path_enabled) { + + if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) { + int overflow; + long int_p, int_q, int_prod; + + int_p = PyLong_AsLongAndOverflow(p_i, &overflow); + if (overflow) { + goto finalize_int_path; + } + int_q = PyLong_AsLongAndOverflow(q_i, &overflow); + if (overflow) { + goto finalize_int_path; + } + if (_check_long_mult_overflow(int_p, int_q)) { + goto finalize_int_path; + } + int_prod = int_p * int_q; + if (long_add_would_overflow(int_total, int_prod)) { + goto finalize_int_path; + } + int_total += int_prod; + int_total_in_use = true; + Py_CLEAR(p_i); + Py_CLEAR(q_i); + continue; + } + + finalize_int_path: + // We're finished, overflowed, or have a non-int + int_path_enabled = false; + if (int_total_in_use) { + term_i = PyLong_FromLong(int_total); + if (term_i == NULL) { + goto err_exit; + } + new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } + Py_SETREF(total, new_total); + new_total = NULL; + Py_CLEAR(term_i); + int_total = 0; // An ounce of prevention, ... + int_total_in_use = false; + } + } + + if (flt_path_enabled) { + + if (!finished) { + double flt_p, flt_q; + bool p_type_float = PyFloat_CheckExact(p_i); + bool q_type_float = PyFloat_CheckExact(q_i); + if (p_type_float && q_type_float) { + flt_p = PyFloat_AS_DOUBLE(p_i); + flt_q = PyFloat_AS_DOUBLE(q_i); + } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) { + /* We care about float/int pairs and int/float pairs because + they arise naturally in several use cases such as price + times quantity, measurements with integer weights, or + data selected by a vector of bools. */ + flt_p = PyFloat_AS_DOUBLE(p_i); + flt_q = PyLong_AsDouble(q_i); + if (flt_q == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } + } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) { + flt_q = PyFloat_AS_DOUBLE(q_i); + flt_p = PyLong_AsDouble(p_i); + if (flt_p == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + goto finalize_flt_path; + } + } else { + goto finalize_flt_path; + } + TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total); + if (isfinite(new_flt_total.hi)) { + flt_total = new_flt_total; + flt_total_in_use = true; + Py_CLEAR(p_i); + Py_CLEAR(q_i); + continue; + } + } + + finalize_flt_path: + // We're finished, overflowed, have a non-float, or got a non-finite value + flt_path_enabled = false; + if (flt_total_in_use) { + term_i = PyFloat_FromDouble(tl_to_d(flt_total)); + if (term_i == NULL) { + goto err_exit; + } + new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } + Py_SETREF(total, new_total); + new_total = NULL; + Py_CLEAR(term_i); + flt_total = tl_zero; + flt_total_in_use = false; + } + } + + assert(!int_total_in_use); + assert(!flt_total_in_use); + if (finished) { + goto normal_exit; + } + term_i = PyNumber_Multiply(p_i, q_i); + if (term_i == NULL) { + goto err_exit; + } + new_total = PyNumber_Add(total, term_i); + if (new_total == NULL) { + goto err_exit; + } + Py_SETREF(total, new_total); + new_total = NULL; + Py_CLEAR(p_i); + Py_CLEAR(q_i); + Py_CLEAR(term_i); + } + + normal_exit: + Py_DECREF(p_it); + Py_DECREF(q_it); + return total; + + err_exit: + Py_DECREF(p_it); + Py_DECREF(q_it); + Py_DECREF(total); + Py_XDECREF(p_i); + Py_XDECREF(q_i); + Py_XDECREF(term_i); + Py_XDECREF(new_total); + return NULL; +} + + /* pow can't use math_2, but needs its own wrapper: the problem is that an infinite result can arise either as a result of overflow (in which case OverflowError should be raised) or as a result of @@ -3141,8 +3241,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) long i_result = PyLong_AsLongAndOverflow(result, &overflow); /* If this already overflowed, don't even enter the loop. */ if (overflow == 0) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } /* Loop over all the items in the iterable until we finish, we overflow * or we found a non integer element */ @@ -3189,8 +3288,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) */ if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); if (item == NULL) { @@ -3239,8 +3337,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start) if (item == NULL) { /* error, or end-of-sequence */ if (PyErr_Occurred()) { - Py_DECREF(result); - result = NULL; + Py_SETREF(result, NULL); } break; } @@ -3507,8 +3604,7 @@ perm_comb(PyObject *n, unsigned long long k, int iscomb) return PyLong_FromLong(1); } if (k == 1) { - Py_INCREF(n); - return n; + return Py_NewRef(n); } /* P(n, k) = P(n, j) * P(n-j, k-j) */ @@ -3591,12 +3687,12 @@ math_perm_impl(PyObject *module, PyObject *n, PyObject *k) } assert(PyLong_CheckExact(n) && PyLong_CheckExact(k)); - if (Py_SIZE(n) < 0) { + if (_PyLong_IsNegative((PyLongObject *)n)) { PyErr_SetString(PyExc_ValueError, "n must be a non-negative integer"); goto error; } - if (Py_SIZE(k) < 0) { + if (_PyLong_IsNegative((PyLongObject *)k)) { PyErr_SetString(PyExc_ValueError, "k must be a non-negative integer"); goto error; @@ -3683,12 +3779,12 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k) } assert(PyLong_CheckExact(n) && PyLong_CheckExact(k)); - if (Py_SIZE(n) < 0) { + if (_PyLong_IsNegative((PyLongObject *)n)) { PyErr_SetString(PyExc_ValueError, "n must be a non-negative integer"); goto error; } - if (Py_SIZE(k) < 0) { + if (_PyLong_IsNegative((PyLongObject *)k)) { PyErr_SetString(PyExc_ValueError, "k must be a non-negative integer"); goto error; @@ -3720,7 +3816,8 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k) if (temp == NULL) { goto error; } - if (Py_SIZE(temp) < 0) { + assert(PyLong_Check(temp)); + if (_PyLong_IsNegative((PyLongObject *)temp)) { Py_DECREF(temp); result = PyLong_FromLong(0); goto done; @@ -3767,13 +3864,20 @@ math.nextafter x: double y: double / + * + steps: object = None -Return the next floating-point value after x towards y. +Return the floating-point value the given number of steps after x towards y. + +If steps is not specified or is None, it defaults to 1. + +Raises a TypeError, if x or y is not a double, or if steps is not an integer. +Raises ValueError if steps is negative. [clinic start generated code]*/ static PyObject * -math_nextafter_impl(PyObject *module, double x, double y) -/*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/ +math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps) +/*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/ { #if defined(_AIX) if (x == y) { @@ -3788,7 +3892,101 @@ math_nextafter_impl(PyObject *module, double x, double y) return PyFloat_FromDouble(y); } #endif - return PyFloat_FromDouble(nextafter(x, y)); + if (steps == Py_None) { + // fast path: we default to one step. + return PyFloat_FromDouble(nextafter(x, y)); + } + steps = PyNumber_Index(steps); + if (steps == NULL) { + return NULL; + } + assert(PyLong_CheckExact(steps)); + if (_PyLong_IsNegative((PyLongObject *)steps)) { + PyErr_SetString(PyExc_ValueError, + "steps must be a non-negative integer"); + Py_DECREF(steps); + return NULL; + } + + unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps); + // Conveniently, uint64_t and double have the same number of bits + // on all the platforms we care about. + // So if an overflow occurs, we can just use UINT64_MAX. + Py_DECREF(steps); + if (usteps_ull >= UINT64_MAX) { + // This branch includes the case where an error occurred, since + // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that + // usteps_ull can be strictly larger than UINT64_MAX on a machine + // where unsigned long long has width > 64 bits. + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_OverflowError)) { + PyErr_Clear(); + } + else { + return NULL; + } + } + usteps_ull = UINT64_MAX; + } + assert(usteps_ull <= UINT64_MAX); + uint64_t usteps = (uint64_t)usteps_ull; + + if (usteps == 0) { + return PyFloat_FromDouble(x); + } + if (Py_IS_NAN(x)) { + return PyFloat_FromDouble(x); + } + if (Py_IS_NAN(y)) { + return PyFloat_FromDouble(y); + } + + // We assume that double and uint64_t have the same endianness. + // This is not guaranteed by the C-standard, but it is true for + // all platforms we care about. (The most likely form of violation + // would be a "mixed-endian" double.) + union pun {double f; uint64_t i;}; + union pun ux = {x}, uy = {y}; + if (ux.i == uy.i) { + return PyFloat_FromDouble(x); + } + + const uint64_t sign_bit = 1ULL<<63; + + uint64_t ax = ux.i & ~sign_bit; + uint64_t ay = uy.i & ~sign_bit; + + // opposite signs + if (((ux.i ^ uy.i) & sign_bit)) { + // NOTE: ax + ay can never overflow, because their most significant bit + // ain't set. + if (ax + ay <= usteps) { + return PyFloat_FromDouble(uy.f); + // This comparison has to use <, because <= would get +0.0 vs -0.0 + // wrong. + } else if (ax < usteps) { + union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)}; + return PyFloat_FromDouble(result.f); + } else { + ux.i -= usteps; + return PyFloat_FromDouble(ux.f); + } + // same sign + } else if (ax > ay) { + if (ax - ay >= usteps) { + ux.i -= usteps; + return PyFloat_FromDouble(ux.f); + } else { + return PyFloat_FromDouble(uy.f); + } + } else { + if (ay - ax >= usteps) { + ux.i += usteps; + return PyFloat_FromDouble(ux.f); + } else { + return PyFloat_FromDouble(uy.f); + } + } } @@ -3812,7 +4010,7 @@ math_ulp_impl(PyObject *module, double x) if (Py_IS_INFINITY(x)) { return x; } - double inf = m_inf(); + double inf = Py_INFINITY; double x2 = nextafter(x, inf); if (Py_IS_INFINITY(x2)) { /* special case: x is the largest positive representable float */ @@ -3825,6 +4023,20 @@ math_ulp_impl(PyObject *module, double x) static int math_exec(PyObject *module) { + + math_module_state *state = get_math_module_state(module); + state->str___ceil__ = PyUnicode_InternFromString("__ceil__"); + if (state->str___ceil__ == NULL) { + return -1; + } + state->str___floor__ = PyUnicode_InternFromString("__floor__"); + if (state->str___floor__ == NULL) { + return -1; + } + state->str___trunc__ = PyUnicode_InternFromString("__trunc__"); + if (state->str___trunc__ == NULL) { + return -1; + } if (_PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) { return -1; } @@ -3835,17 +4047,31 @@ math_exec(PyObject *module) if (_PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) { return -1; } - if (_PyModule_Add(module, "inf", PyFloat_FromDouble(m_inf())) < 0) { + if (_PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) { return -1; } -#if _PY_SHORT_FLOAT_REPR == 1 - if (_PyModule_Add(module, "nan", PyFloat_FromDouble(m_nan())) < 0) { + if (_PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) { return -1; } -#endif return 0; } +static int +math_clear(PyObject *module) +{ + math_module_state *state = get_math_module_state(module); + Py_CLEAR(state->str___ceil__); + Py_CLEAR(state->str___floor__); + Py_CLEAR(state->str___trunc__); + return 0; +} + +static void +math_free(void *module) +{ + math_clear((PyObject *)module); +} + static PyMethodDef math_methods[] = { {"acos", math_acos, METH_O, math_acos_doc}, {"acosh", math_acosh, METH_O, math_acosh_doc}, @@ -3883,7 +4109,7 @@ static PyMethodDef math_methods[] = { {"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc}, MATH_LDEXP_METHODDEF {"lgamma", math_lgamma, METH_O, math_lgamma_doc}, - MATH_LOG_METHODDEF + {"log", _PyCFunction_CAST(math_log), METH_FASTCALL, math_log_doc}, {"log1p", math_log1p, METH_O, math_log1p_doc}, MATH_LOG10_METHODDEF MATH_LOG2_METHODDEF @@ -3896,6 +4122,7 @@ static PyMethodDef math_methods[] = { {"sqrt", math_sqrt, METH_O, math_sqrt_doc}, {"tan", math_tan, METH_O, math_tan_doc}, {"tanh", math_tanh, METH_O, math_tanh_doc}, + MATH_SUMPROD_METHODDEF MATH_TRUNC_METHODDEF MATH_PROD_METHODDEF MATH_PERM_METHODDEF @@ -3907,6 +4134,7 @@ static PyMethodDef math_methods[] = { static PyModuleDef_Slot math_slots[] = { {Py_mod_exec, math_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {0, NULL} }; @@ -3918,9 +4146,11 @@ static struct PyModuleDef mathmodule = { PyModuleDef_HEAD_INIT, .m_name = "math", .m_doc = module_doc, - .m_size = 0, + .m_size = sizeof(math_module_state), .m_methods = math_methods, .m_slots = math_slots, + .m_clear = math_clear, + .m_free = math_free, }; PyMODINIT_FUNC |