aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Modules/mathmodule.c
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2024-02-12 07:53:52 +0300
committerDaniil Cherednik <dcherednik@ydb.tech>2024-02-14 14:26:16 +0000
commit31f2a419764a8ba77c2a970cfc80056c6cd06756 (patch)
treec1995d239eba8571cefc640f6648e1d5dd4ce9e2 /contrib/tools/python3/src/Modules/mathmodule.c
parentfe2ef02b38d9c85d80060963b265a1df9f38c3bb (diff)
downloadydb-31f2a419764a8ba77c2a970cfc80056c6cd06756.tar.gz
Update Python from 3.11.8 to 3.12.2
Diffstat (limited to 'contrib/tools/python3/src/Modules/mathmodule.c')
-rw-r--r--contrib/tools/python3/src/Modules/mathmodule.c1022
1 files changed, 626 insertions, 396 deletions
diff --git a/contrib/tools/python3/src/Modules/mathmodule.c b/contrib/tools/python3/src/Modules/mathmodule.c
index 5f5b71c4c0..23fa2b1816 100644
--- a/contrib/tools/python3/src/Modules/mathmodule.c
+++ b/contrib/tools/python3/src/Modules/mathmodule.c
@@ -55,18 +55,19 @@ raised for division by zero and mod by zero.
#ifndef Py_BUILD_CORE_BUILTIN
# define Py_BUILD_CORE_MODULE 1
#endif
-#define NEEDS_PY_IDENTIFIER
#include "Python.h"
#include "pycore_bitutils.h" // _Py_bit_length()
#include "pycore_call.h" // _PyObject_CallNoArgs()
-#include "pycore_dtoa.h" // _Py_dg_infinity()
#include "pycore_long.h" // _PyLong_GetZero()
+#include "pycore_moduleobject.h" // _PyModule_GetState()
+#include "pycore_object.h" // _PyObject_LookupSpecial()
#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR
/* For DBL_EPSILON in _math.h */
#include <float.h>
/* For _Py_log1p with workarounds for buggy handling of zeros. */
#include "_math.h"
+#include <stdbool.h>
#include "clinic/mathmodule.c.h"
@@ -76,6 +77,127 @@ module math
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=76bc7002685dd942]*/
+typedef struct {
+ PyObject *str___ceil__;
+ PyObject *str___floor__;
+ PyObject *str___trunc__;
+} math_module_state;
+
+static inline math_module_state*
+get_math_module_state(PyObject *module)
+{
+ void *state = _PyModule_GetState(module);
+ assert(state != NULL);
+ return (math_module_state *)state;
+}
+
+/*
+Double and triple length extended precision algorithms from:
+
+ Accurate Sum and Dot Product
+ by Takeshi Ogita, Siegfried M. Rump, and Shin’Ichi Oishi
+ https://doi.org/10.1137/030601818
+ https://www.tuhh.de/ti3/paper/rump/OgRuOi05.pdf
+
+*/
+
+typedef struct{ double hi; double lo; } DoubleLength;
+
+static DoubleLength
+dl_fast_sum(double a, double b)
+{
+ /* Algorithm 1.1. Compensated summation of two floating point numbers. */
+ assert(fabs(a) >= fabs(b));
+ double x = a + b;
+ double y = (a - x) + b;
+ return (DoubleLength) {x, y};
+}
+
+static DoubleLength
+dl_sum(double a, double b)
+{
+ /* Algorithm 3.1 Error-free transformation of the sum */
+ double x = a + b;
+ double z = x - a;
+ double y = (a - (x - z)) + (b - z);
+ return (DoubleLength) {x, y};
+}
+
+#ifndef UNRELIABLE_FMA
+
+static DoubleLength
+dl_mul(double x, double y)
+{
+ /* Algorithm 3.5. Error-free transformation of a product */
+ double z = x * y;
+ double zz = fma(x, y, -z);
+ return (DoubleLength) {z, zz};
+}
+
+#else
+
+/*
+ The default implementation of dl_mul() depends on the C math library
+ having an accurate fma() function as required by § 7.12.13.1 of the
+ C99 standard.
+
+ The UNRELIABLE_FMA option is provided as a slower but accurate
+ alternative for builds where the fma() function is found wanting.
+ The speed penalty may be modest (17% slower on an Apple M1 Max),
+ so don't hesitate to enable this build option.
+
+ The algorithms are from the T. J. Dekker paper:
+ A Floating-Point Technique for Extending the Available Precision
+ https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
+*/
+
+static DoubleLength
+dl_split(double x) {
+ // Dekker (5.5) and (5.6).
+ double t = x * 134217729.0; // Veltkamp constant = 2.0 ** 27 + 1
+ double hi = t - (t - x);
+ double lo = x - hi;
+ return (DoubleLength) {hi, lo};
+}
+
+static DoubleLength
+dl_mul(double x, double y)
+{
+ // Dekker (5.12) and mul12()
+ DoubleLength xx = dl_split(x);
+ DoubleLength yy = dl_split(y);
+ double p = xx.hi * yy.hi;
+ double q = xx.hi * yy.lo + xx.lo * yy.hi;
+ double z = p + q;
+ double zz = p - z + q + xx.lo * yy.lo;
+ return (DoubleLength) {z, zz};
+}
+
+#endif
+
+typedef struct { double hi; double lo; double tiny; } TripleLength;
+
+static const TripleLength tl_zero = {0.0, 0.0, 0.0};
+
+static TripleLength
+tl_fma(double x, double y, TripleLength total)
+{
+ /* Algorithm 5.10 with SumKVert for K=3 */
+ DoubleLength pr = dl_mul(x, y);
+ DoubleLength sm = dl_sum(total.hi, pr.hi);
+ DoubleLength r1 = dl_sum(total.lo, pr.lo);
+ DoubleLength r2 = dl_sum(r1.hi, sm.lo);
+ return (TripleLength) {sm.hi, r2.hi, total.tiny + r1.lo + r2.lo};
+}
+
+static double
+tl_to_d(TripleLength total)
+{
+ DoubleLength last = dl_sum(total.lo, total.hi);
+ return total.tiny + last.lo + last.hi;
+}
+
+
/*
sin(pi*x), giving accurate results for all finite x (especially x
integral or close to an integer). This is here for use in the
@@ -85,10 +207,6 @@ module math
static const double pi = 3.141592653589793238462643383279502884197;
static const double logpi = 1.144729885849400174143427351353058711647;
-#if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
-static const double sqrtpi = 1.772453850905516027298167483341145182798;
-#endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
-
/* Version of PyFloat_AsDouble() with in-line fast paths
for exact floats and integers. Gives a substantial
@@ -146,7 +264,9 @@ m_sinpi(double x)
return copysign(1.0, x)*r;
}
-/* Implementation of the real gamma function. In extensive but non-exhaustive
+/* Implementation of the real gamma function. Kept here to work around
+ issues (see e.g. gh-70309) with quality of libm's tgamma/lgamma implementations
+ on various platforms (Windows, MacOS). In extensive but non-exhaustive
random tests, this function proved accurate to within <= 10 ulps across the
entire float domain. Note that accuracy may depend on the quality of the
system math functions, the pow function in particular. Special cases
@@ -268,34 +388,6 @@ lanczos_sum(double x)
return num/den;
}
-/* Constant for +infinity, generated in the same way as float('inf'). */
-
-static double
-m_inf(void)
-{
-#if _PY_SHORT_FLOAT_REPR == 1
- return _Py_dg_infinity(0);
-#else
- return Py_HUGE_VAL;
-#endif
-}
-
-/* Constant nan value, generated in the same way as float('nan'). */
-/* We don't currently assume that Py_NAN is defined everywhere. */
-
-#if _PY_SHORT_FLOAT_REPR == 1
-
-static double
-m_nan(void)
-{
-#if _PY_SHORT_FLOAT_REPR == 1
- return _Py_dg_stdnan(0);
-#else
- return Py_NAN;
-#endif
-}
-
-#endif
static double
m_tgamma(double x)
@@ -314,7 +406,7 @@ m_tgamma(double x)
if (x == 0.0) {
errno = EDOM;
/* tgamma(+-0.0) = +-inf, divide-by-zero */
- return copysign(Py_HUGE_VAL, x);
+ return copysign(Py_INFINITY, x);
}
/* integer arguments */
@@ -442,163 +534,6 @@ m_lgamma(double x)
return r;
}
-#if !defined(HAVE_ERF) || !defined(HAVE_ERFC)
-
-/*
- Implementations of the error function erf(x) and the complementary error
- function erfc(x).
-
- Method: we use a series approximation for erf for small x, and a continued
- fraction approximation for erfc(x) for larger x;
- combined with the relations erf(-x) = -erf(x) and erfc(x) = 1.0 - erf(x),
- this gives us erf(x) and erfc(x) for all x.
-
- The series expansion used is:
-
- erf(x) = x*exp(-x*x)/sqrt(pi) * [
- 2/1 + 4/3 x**2 + 8/15 x**4 + 16/105 x**6 + ...]
-
- The coefficient of x**(2k-2) here is 4**k*factorial(k)/factorial(2*k).
- This series converges well for smallish x, but slowly for larger x.
-
- The continued fraction expansion used is:
-
- erfc(x) = x*exp(-x*x)/sqrt(pi) * [1/(0.5 + x**2 -) 0.5/(2.5 + x**2 - )
- 3.0/(4.5 + x**2 - ) 7.5/(6.5 + x**2 - ) ...]
-
- after the first term, the general term has the form:
-
- k*(k-0.5)/(2*k+0.5 + x**2 - ...).
-
- This expansion converges fast for larger x, but convergence becomes
- infinitely slow as x approaches 0.0. The (somewhat naive) continued
- fraction evaluation algorithm used below also risks overflow for large x;
- but for large x, erfc(x) == 0.0 to within machine precision. (For
- example, erfc(30.0) is approximately 2.56e-393).
-
- Parameters: use series expansion for abs(x) < ERF_SERIES_CUTOFF and
- continued fraction expansion for ERF_SERIES_CUTOFF <= abs(x) <
- ERFC_CONTFRAC_CUTOFF. ERFC_SERIES_TERMS and ERFC_CONTFRAC_TERMS are the
- numbers of terms to use for the relevant expansions. */
-
-#define ERF_SERIES_CUTOFF 1.5
-#define ERF_SERIES_TERMS 25
-#define ERFC_CONTFRAC_CUTOFF 30.0
-#define ERFC_CONTFRAC_TERMS 50
-
-/*
- Error function, via power series.
-
- Given a finite float x, return an approximation to erf(x).
- Converges reasonably fast for small x.
-*/
-
-static double
-m_erf_series(double x)
-{
- double x2, acc, fk, result;
- int i, saved_errno;
-
- x2 = x * x;
- acc = 0.0;
- fk = (double)ERF_SERIES_TERMS + 0.5;
- for (i = 0; i < ERF_SERIES_TERMS; i++) {
- acc = 2.0 + x2 * acc / fk;
- fk -= 1.0;
- }
- /* Make sure the exp call doesn't affect errno;
- see m_erfc_contfrac for more. */
- saved_errno = errno;
- result = acc * x * exp(-x2) / sqrtpi;
- errno = saved_errno;
- return result;
-}
-
-/*
- Complementary error function, via continued fraction expansion.
-
- Given a positive float x, return an approximation to erfc(x). Converges
- reasonably fast for x large (say, x > 2.0), and should be safe from
- overflow if x and nterms are not too large. On an IEEE 754 machine, with x
- <= 30.0, we're safe up to nterms = 100. For x >= 30.0, erfc(x) is smaller
- than the smallest representable nonzero float. */
-
-static double
-m_erfc_contfrac(double x)
-{
- double x2, a, da, p, p_last, q, q_last, b, result;
- int i, saved_errno;
-
- if (x >= ERFC_CONTFRAC_CUTOFF)
- return 0.0;
-
- x2 = x*x;
- a = 0.0;
- da = 0.5;
- p = 1.0; p_last = 0.0;
- q = da + x2; q_last = 1.0;
- for (i = 0; i < ERFC_CONTFRAC_TERMS; i++) {
- double temp;
- a += da;
- da += 2.0;
- b = da + x2;
- temp = p; p = b*p - a*p_last; p_last = temp;
- temp = q; q = b*q - a*q_last; q_last = temp;
- }
- /* Issue #8986: On some platforms, exp sets errno on underflow to zero;
- save the current errno value so that we can restore it later. */
- saved_errno = errno;
- result = p / q * x * exp(-x2) / sqrtpi;
- errno = saved_errno;
- return result;
-}
-
-#endif /* !defined(HAVE_ERF) || !defined(HAVE_ERFC) */
-
-/* Error function erf(x), for general x */
-
-static double
-m_erf(double x)
-{
-#ifdef HAVE_ERF
- return erf(x);
-#else
- double absx, cf;
-
- if (Py_IS_NAN(x))
- return x;
- absx = fabs(x);
- if (absx < ERF_SERIES_CUTOFF)
- return m_erf_series(x);
- else {
- cf = m_erfc_contfrac(absx);
- return x > 0.0 ? 1.0 - cf : cf - 1.0;
- }
-#endif
-}
-
-/* Complementary error function erfc(x), for general x. */
-
-static double
-m_erfc(double x)
-{
-#ifdef HAVE_ERFC
- return erfc(x);
-#else
- double absx, cf;
-
- if (Py_IS_NAN(x))
- return x;
- absx = fabs(x);
- if (absx < ERF_SERIES_CUTOFF)
- return 1.0 - m_erf_series(x);
- else {
- cf = m_erfc_contfrac(absx);
- return x > 0.0 ? cf : 2.0 - cf;
- }
-#endif
-}
-
/*
wrapper for atan2 that deals directly with special cases before
delegating to the platform libm for the remaining cases. This
@@ -785,25 +720,7 @@ m_log2(double x)
}
if (x > 0.0) {
-#ifdef HAVE_LOG2
return log2(x);
-#else
- double m;
- int e;
- m = frexp(x, &e);
- /* We want log2(m * 2**e) == log(m) / log(2) + e. Care is needed when
- * x is just greater than 1.0: in that case e is 1, log(m) is negative,
- * and we get significant cancellation error from the addition of
- * log(m) / log(2) to e. The slight rewrite of the expression below
- * avoids this problem.
- */
- if (x >= 1.0) {
- return log(2.0 * m) / log(2.0) + (e - 1);
- }
- else {
- return log(m) / log(2.0) + e;
- }
-#endif
}
else if (x == 0.0) {
errno = EDOM;
@@ -890,7 +807,7 @@ long_lcm(PyObject *a, PyObject *b)
{
PyObject *g, *m, *f, *ab;
- if (Py_SIZE(a) == 0 || Py_SIZE(b) == 0) {
+ if (_PyLong_IsZero((PyLongObject *)a) || _PyLong_IsZero((PyLongObject *)b)) {
return PyLong_FromLong(0);
}
g = _PyLong_GCD(a, b);
@@ -1036,9 +953,7 @@ is_error(double x)
*/
static PyObject *
-math_1_to_whatever(PyObject *arg, double (*func) (double),
- PyObject *(*from_double_func) (double),
- int can_overflow)
+math_1(PyObject *arg, double (*func) (double), int can_overflow)
{
double x, r;
x = PyFloat_AsDouble(arg);
@@ -1064,7 +979,7 @@ math_1_to_whatever(PyObject *arg, double (*func) (double),
/* this branch unnecessary on most platforms */
return NULL;
- return (*from_double_func)(r);
+ return PyFloat_FromDouble(r);
}
/* variant of math_1, to be used when the function being wrapped is known to
@@ -1113,12 +1028,6 @@ math_1a(PyObject *arg, double (*func) (double))
*/
static PyObject *
-math_1(PyObject *arg, double (*func) (double), int can_overflow)
-{
- return math_1_to_whatever(arg, func, PyFloat_FromDouble, can_overflow);
-}
-
-static PyObject *
math_2(PyObject *const *args, Py_ssize_t nargs,
double (*func) (double, double), const char *funcname)
{
@@ -1215,10 +1124,10 @@ static PyObject *
math_ceil(PyObject *module, PyObject *number)
/*[clinic end generated code: output=6c3b8a78bc201c67 input=2725352806399cab]*/
{
- _Py_IDENTIFIER(__ceil__);
if (!PyFloat_CheckExact(number)) {
- PyObject *method = _PyObject_LookupSpecialId(number, &PyId___ceil__);
+ math_module_state *state = get_math_module_state(module);
+ PyObject *method = _PyObject_LookupSpecial(number, state->str___ceil__);
if (method != NULL) {
PyObject *result = _PyObject_CallNoArgs(method);
Py_DECREF(method);
@@ -1245,10 +1154,10 @@ FUNC1(cos, cos, 0,
FUNC1(cosh, cosh, 1,
"cosh($module, x, /)\n--\n\n"
"Return the hyperbolic cosine of x.")
-FUNC1A(erf, m_erf,
+FUNC1A(erf, erf,
"erf($module, x, /)\n--\n\n"
"Error function at x.")
-FUNC1A(erfc, m_erfc,
+FUNC1A(erfc, erfc,
"erfc($module, x, /)\n--\n\n"
"Complementary error function at x.")
FUNC1(exp, exp, 1,
@@ -1283,14 +1192,13 @@ math_floor(PyObject *module, PyObject *number)
{
double x;
- _Py_IDENTIFIER(__floor__);
-
if (PyFloat_CheckExact(number)) {
x = PyFloat_AS_DOUBLE(number);
}
else
{
- PyObject *method = _PyObject_LookupSpecialId(number, &PyId___floor__);
+ math_module_state *state = get_math_module_state(module);
+ PyObject *method = _PyObject_LookupSpecial(number, state->str___floor__);
if (method != NULL) {
PyObject *result = _PyObject_CallNoArgs(method);
Py_DECREF(method);
@@ -1343,30 +1251,30 @@ FUNC1(tanh, tanh, 0,
Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
See those links for more details, proofs and other references.
- Note 1: IEEE 754R floating point semantics are assumed,
- but the current implementation does not re-establish special
- value semantics across iterations (i.e. handling -Inf + Inf).
+ Note 1: IEEE 754 floating-point semantics with a rounding mode of
+ roundTiesToEven are assumed.
- Note 2: No provision is made for intermediate overflow handling;
- therefore, sum([1e+308, 1e-308, 1e+308]) returns 1e+308 while
- sum([1e+308, 1e+308, 1e-308]) raises an OverflowError due to the
+ Note 2: No provision is made for intermediate overflow handling;
+ therefore, fsum([1e+308, -1e+308, 1e+308]) returns 1e+308 while
+ fsum([1e+308, 1e+308, -1e+308]) raises an OverflowError due to the
overflow of the first partial sum.
- Note 3: The intermediate values lo, yr, and hi are declared volatile so
- aggressive compilers won't algebraically reduce lo to always be exactly 0.0.
- Also, the volatile declaration forces the values to be stored in memory as
- regular doubles instead of extended long precision (80-bit) values. This
- prevents double rounding because any addition or subtraction of two doubles
- can be resolved exactly into double-sized hi and lo values. As long as the
- hi value gets forced into a double before yr and lo are computed, the extra
- bits in downstream extended precision operations (x87 for example) will be
- exactly zero and therefore can be losslessly stored back into a double,
- thereby preventing double rounding.
-
- Note 4: A similar implementation is in Modules/cmathmodule.c.
- Be sure to update both when making changes.
-
- Note 5: The signature of math.fsum() differs from builtins.sum()
+ Note 3: The algorithm has two potential sources of fragility. First, C
+ permits arithmetic operations on `double`s to be performed in an
+ intermediate format whose range and precision may be greater than those of
+ `double` (see for example C99 §5.2.4.2.2, paragraph 8). This can happen for
+ example on machines using the now largely historical x87 FPUs. In this case,
+ `fsum` can produce incorrect results. If `FLT_EVAL_METHOD` is `0` or `1`, or
+ `FLT_EVAL_METHOD` is `2` and `long double` is identical to `double`, then we
+ should be safe from this source of errors. Second, an aggressively
+ optimizing compiler can re-associate operations so that (for example) the
+ statement `yr = hi - x;` is treated as `yr = (x + y) - x` and then
+ re-associated as `yr = y + (x - x)`, giving `y = yr` and `lo = 0.0`. That
+ re-association would be in violation of the C standard, and should not occur
+ except possibly in the presence of unsafe optimizations (e.g., -ffast-math,
+ -fassociative-math). Such optimizations should be avoided for this module.
+
+ Note 4: The signature of math.fsum() differs from builtins.sum()
because the start argument doesn't make sense in the context of
accurate summation. Since the partials table is collapsed before
returning a result, sum(seq2, start=sum(seq1)) may not equal the
@@ -1452,7 +1360,7 @@ math_fsum(PyObject *module, PyObject *seq)
Py_ssize_t i, j, n = 0, m = NUM_PARTIALS;
double x, y, t, ps[NUM_PARTIALS], *p = ps;
double xsave, special_sum = 0.0, inf_sum = 0.0;
- volatile double hi, yr, lo;
+ double hi, yr, lo = 0.0;
iter = PyObject_GetIter(seq);
if (iter == NULL)
@@ -1789,13 +1697,13 @@ math_isqrt(PyObject *module, PyObject *n)
return NULL;
}
- if (_PyLong_Sign(n) < 0) {
+ if (_PyLong_IsNegative((PyLongObject *)n)) {
PyErr_SetString(
PyExc_ValueError,
"isqrt() argument must be nonnegative");
goto error;
}
- if (_PyLong_Sign(n) == 0) {
+ if (_PyLong_IsZero((PyLongObject *)n)) {
Py_DECREF(n);
return PyLong_FromLong(0);
}
@@ -2034,8 +1942,7 @@ factorial_odd_part(unsigned long n)
inner = PyLong_FromLong(1);
if (inner == NULL)
return NULL;
- outer = inner;
- Py_INCREF(outer);
+ outer = Py_NewRef(inner);
upper = 3;
for (i = _Py_bit_length(n) - 2; i >= 0; i--) {
@@ -2056,8 +1963,7 @@ factorial_odd_part(unsigned long n)
Py_DECREF(partial);
if (tmp == NULL)
goto error;
- Py_DECREF(inner);
- inner = tmp;
+ Py_SETREF(inner, tmp);
/* Now inner is the product of all odd integers j in the range (0,
n/2**i], giving the inner product in the formula above. */
@@ -2065,8 +1971,7 @@ factorial_odd_part(unsigned long n)
tmp = PyNumber_Multiply(outer, inner);
if (tmp == NULL)
goto error;
- Py_DECREF(outer);
- outer = tmp;
+ Py_SETREF(outer, tmp);
}
Py_DECREF(inner);
return outer;
@@ -2156,19 +2061,19 @@ static PyObject *
math_trunc(PyObject *module, PyObject *x)
/*[clinic end generated code: output=34b9697b707e1031 input=2168b34e0a09134d]*/
{
- _Py_IDENTIFIER(__trunc__);
PyObject *trunc, *result;
if (PyFloat_CheckExact(x)) {
return PyFloat_Type.tp_as_number->nb_int(x);
}
- if (Py_TYPE(x)->tp_dict == NULL) {
+ if (!_PyType_IsReady(Py_TYPE(x))) {
if (PyType_Ready(Py_TYPE(x)) < 0)
return NULL;
}
- trunc = _PyObject_LookupSpecialId(x, &PyId___trunc__);
+ math_module_state *state = get_math_module_state(module);
+ trunc = _PyObject_LookupSpecial(x, state->str___trunc__);
if (trunc == NULL) {
if (!PyErr_Occurred())
PyErr_Format(PyExc_TypeError,
@@ -2312,7 +2217,7 @@ math_modf_impl(PyObject *module, double x)
in that int is larger than PY_SSIZE_T_MAX. */
static PyObject*
-loghelper(PyObject* arg, double (*func)(double), const char *funcname)
+loghelper(PyObject* arg, double (*func)(double))
{
/* If it is int, do it ourselves. */
if (PyLong_Check(arg)) {
@@ -2320,7 +2225,7 @@ loghelper(PyObject* arg, double (*func)(double), const char *funcname)
Py_ssize_t e;
/* Negative or zero inputs give a ValueError. */
- if (Py_SIZE(arg) <= 0) {
+ if (!_PyLong_IsPositive((PyLongObject *)arg)) {
PyErr_SetString(PyExc_ValueError,
"math domain error");
return NULL;
@@ -2350,33 +2255,22 @@ loghelper(PyObject* arg, double (*func)(double), const char *funcname)
}
-/*[clinic input]
-math.log
-
- x: object
- [
- base: object(c_default="NULL") = math.e
- ]
- /
-
-Return the logarithm of x to the given base.
-
-If the base not specified, returns the natural logarithm (base e) of x.
-[clinic start generated code]*/
-
+/* AC: cannot convert yet, see gh-102839 and gh-89381, waiting
+ for support of multiple signatures */
static PyObject *
-math_log_impl(PyObject *module, PyObject *x, int group_right_1,
- PyObject *base)
-/*[clinic end generated code: output=7b5a39e526b73fc9 input=0f62d5726cbfebbd]*/
+math_log(PyObject *module, PyObject * const *args, Py_ssize_t nargs)
{
PyObject *num, *den;
PyObject *ans;
- num = loghelper(x, m_log, "log");
- if (num == NULL || base == NULL)
+ if (!_PyArg_CheckPositional("log", nargs, 1, 2))
+ return NULL;
+
+ num = loghelper(args[0], m_log);
+ if (num == NULL || nargs == 1)
return num;
- den = loghelper(base, m_log, "log");
+ den = loghelper(args[1], m_log);
if (den == NULL) {
Py_DECREF(num);
return NULL;
@@ -2388,6 +2282,10 @@ math_log_impl(PyObject *module, PyObject *x, int group_right_1,
return ans;
}
+PyDoc_STRVAR(math_log_doc,
+"log(x, [base=math.e])\n\
+Return the logarithm of x to the given base.\n\n\
+If the base is not specified, returns the natural logarithm (base e) of x.");
/*[clinic input]
math.log2
@@ -2402,7 +2300,7 @@ static PyObject *
math_log2(PyObject *module, PyObject *x)
/*[clinic end generated code: output=5425899a4d5d6acb input=08321262bae4f39b]*/
{
- return loghelper(x, m_log2, "log2");
+ return loghelper(x, m_log2);
}
@@ -2419,7 +2317,7 @@ static PyObject *
math_log10(PyObject *module, PyObject *x)
/*[clinic end generated code: output=be72a64617df9c6f input=b2469d02c6469e53]*/
{
- return loghelper(x, m_log10, "log10");
+ return loghelper(x, m_log10);
}
@@ -2474,6 +2372,7 @@ that are almost always correctly rounded, four techniques are used:
* lossless scaling using a power-of-two scaling factor
* accurate squaring using Veltkamp-Dekker splitting [1]
+ or an equivalent with an fma() call
* compensated summation using a variant of the Neumaier algorithm [2]
* differential correction of the square root [3]
@@ -2512,9 +2411,8 @@ Since lo**2 is less than 1/2 ulp(csum), we have csum+lo*lo == csum.
To minimize loss of information during the accumulation of fractional
values, each term has a separate accumulator. This also breaks up
sequential dependencies in the inner loop so the CPU can maximize
-floating point throughput. [4] On a 2.6 GHz Haswell, adding one
-dimension has an incremental cost of only 5ns -- for example when
-moving from hypot(x,y) to hypot(x,y,z).
+floating point throughput. [4] On an Apple M1 Max, hypot(*vec)
+takes only 3.33 µsec when len(vec) == 1000.
The square root differential correction is needed because a
correctly rounded square root of a correctly rounded sum of
@@ -2532,14 +2430,21 @@ algorithm, effectively doubling the number of accurate bits.
This technique is used in Dekker's SQRT2 algorithm and again in
Borges' ALGORITHM 4 and 5.
-Without proof for all cases, hypot() cannot claim to be always
-correctly rounded. However for n <= 1000, prior to the final addition
-that rounds the overall result, the internal accuracy of "h" together
-with its correction of "x / (2.0 * h)" is at least 100 bits. [6]
-Also, hypot() was tested against a Decimal implementation with
-prec=300. After 100 million trials, no incorrectly rounded examples
-were found. In addition, perfect commutativity (all permutations are
-exactly equal) was verified for 1 billion random inputs with n=5. [7]
+The hypot() function is faithfully rounded (less than 1 ulp error)
+and usually correctly rounded (within 1/2 ulp). The squaring
+step is exact. The Neumaier summation computes as if in doubled
+precision (106 bits) and has the advantage that its input squares
+are non-negative so that the condition number of the sum is one.
+The square root with a differential correction is likewise computed
+as if in doubled precision.
+
+For n <= 1000, prior to the final addition that rounds the overall
+result, the internal accuracy of "h" together with its correction of
+"x / (2.0 * h)" is at least 100 bits. [6] Also, hypot() was tested
+against a Decimal implementation with prec=300. After 100 million
+trials, no incorrectly rounded examples were found. In addition,
+perfect commutativity (all permutations are exactly equal) was
+verified for 1 billion random inputs with n=5. [7]
References:
@@ -2556,9 +2461,8 @@ References:
static inline double
vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
{
- const double T27 = 134217729.0; /* ldexp(1.0, 27) + 1.0) */
- double x, scale, oldcsum, csum = 1.0, frac1 = 0.0, frac2 = 0.0, frac3 = 0.0;
- double t, hi, lo, h;
+ double x, h, scale, csum = 1.0, frac1 = 0.0, frac2 = 0.0;
+ DoubleLength pr, sm;
int max_e;
Py_ssize_t i;
@@ -2572,82 +2476,37 @@ vector_norm(Py_ssize_t n, double *vec, double max, int found_nan)
return max;
}
frexp(max, &max_e);
- if (max_e >= -1023) {
- scale = ldexp(1.0, -max_e);
- assert(max * scale >= 0.5);
- assert(max * scale < 1.0);
+ if (max_e < -1023) {
+ /* When max_e < -1023, ldexp(1.0, -max_e) would overflow. */
for (i=0 ; i < n ; i++) {
- x = vec[i];
- assert(Py_IS_FINITE(x) && fabs(x) <= max);
-
- x *= scale;
- assert(fabs(x) < 1.0);
-
- t = x * T27;
- hi = t - (t - x);
- lo = x - hi;
- assert(hi + lo == x);
-
- x = hi * hi;
- assert(x <= 1.0);
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac1 += (oldcsum - csum) + x;
-
- x = 2.0 * hi * lo;
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac2 += (oldcsum - csum) + x;
-
- assert(csum + lo * lo == csum);
- frac3 += lo * lo;
- }
- h = sqrt(csum - 1.0 + (frac1 + frac2 + frac3));
-
- x = h;
- t = x * T27;
- hi = t - (t - x);
- lo = x - hi;
- assert (hi + lo == x);
-
- x = -hi * hi;
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac1 += (oldcsum - csum) + x;
-
- x = -2.0 * hi * lo;
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac2 += (oldcsum - csum) + x;
-
- x = -lo * lo;
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac3 += (oldcsum - csum) + x;
-
- x = csum - 1.0 + (frac1 + frac2 + frac3);
- return (h + x / (2.0 * h)) / scale;
- }
- /* When max_e < -1023, ldexp(1.0, -max_e) overflows.
- So instead of multiplying by a scale, we just divide by *max*.
- */
+ vec[i] /= DBL_MIN; // convert subnormals to normals
+ }
+ return DBL_MIN * vector_norm(n, vec, max / DBL_MIN, found_nan);
+ }
+ scale = ldexp(1.0, -max_e);
+ assert(max * scale >= 0.5);
+ assert(max * scale < 1.0);
for (i=0 ; i < n ; i++) {
x = vec[i];
assert(Py_IS_FINITE(x) && fabs(x) <= max);
- x /= max;
- x = x*x;
- assert(x <= 1.0);
- assert(fabs(csum) >= fabs(x));
- oldcsum = csum;
- csum += x;
- frac1 += (oldcsum - csum) + x;
- }
- return max * sqrt(csum - 1.0 + frac1);
+ x *= scale; // lossless scaling
+ assert(fabs(x) < 1.0);
+ pr = dl_mul(x, x); // lossless squaring
+ assert(pr.hi <= 1.0);
+ sm = dl_fast_sum(csum, pr.hi); // lossless addition
+ csum = sm.hi;
+ frac1 += pr.lo; // lossy addition
+ frac2 += sm.lo; // lossy addition
+ }
+ h = sqrt(csum - 1.0 + (frac1 + frac2));
+ pr = dl_mul(-h, h);
+ sm = dl_fast_sum(csum, pr.hi);
+ csum = sm.hi;
+ frac1 += pr.lo;
+ frac2 += sm.lo;
+ x = csum - 1.0 + (frac1 + frac2);
+ h += x / (2.0 * h); // differential correction
+ return h / scale;
}
#define NUM_STACK_ELEMS 16
@@ -2808,6 +2667,247 @@ For example, the hypotenuse of a 3/4/5 right triangle is:\n\
5.0\n\
");
+/** sumprod() ***************************************************************/
+
+/* Forward declaration */
+static inline int _check_long_mult_overflow(long a, long b);
+
+static inline bool
+long_add_would_overflow(long a, long b)
+{
+ return (a > 0) ? (b > LONG_MAX - a) : (b < LONG_MIN - a);
+}
+
+/*[clinic input]
+math.sumprod
+
+ p: object
+ q: object
+ /
+
+Return the sum of products of values from two iterables p and q.
+
+Roughly equivalent to:
+
+ sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))
+
+For float and mixed int/float inputs, the intermediate products
+and sums are computed with extended precision.
+[clinic start generated code]*/
+
+static PyObject *
+math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q)
+/*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/
+{
+ PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL;
+ PyObject *p_it, *q_it, *total;
+ iternextfunc p_next, q_next;
+ bool p_stopped = false, q_stopped = false;
+ bool int_path_enabled = true, int_total_in_use = false;
+ bool flt_path_enabled = true, flt_total_in_use = false;
+ long int_total = 0;
+ TripleLength flt_total = tl_zero;
+
+ p_it = PyObject_GetIter(p);
+ if (p_it == NULL) {
+ return NULL;
+ }
+ q_it = PyObject_GetIter(q);
+ if (q_it == NULL) {
+ Py_DECREF(p_it);
+ return NULL;
+ }
+ total = PyLong_FromLong(0);
+ if (total == NULL) {
+ Py_DECREF(p_it);
+ Py_DECREF(q_it);
+ return NULL;
+ }
+ p_next = *Py_TYPE(p_it)->tp_iternext;
+ q_next = *Py_TYPE(q_it)->tp_iternext;
+ while (1) {
+ bool finished;
+
+ assert (p_i == NULL);
+ assert (q_i == NULL);
+ assert (term_i == NULL);
+ assert (new_total == NULL);
+
+ assert (p_it != NULL);
+ assert (q_it != NULL);
+ assert (total != NULL);
+
+ p_i = p_next(p_it);
+ if (p_i == NULL) {
+ if (PyErr_Occurred()) {
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
+ goto err_exit;
+ }
+ PyErr_Clear();
+ }
+ p_stopped = true;
+ }
+ q_i = q_next(q_it);
+ if (q_i == NULL) {
+ if (PyErr_Occurred()) {
+ if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
+ goto err_exit;
+ }
+ PyErr_Clear();
+ }
+ q_stopped = true;
+ }
+ if (p_stopped != q_stopped) {
+ PyErr_Format(PyExc_ValueError, "Inputs are not the same length");
+ goto err_exit;
+ }
+ finished = p_stopped & q_stopped;
+
+ if (int_path_enabled) {
+
+ if (!finished && PyLong_CheckExact(p_i) & PyLong_CheckExact(q_i)) {
+ int overflow;
+ long int_p, int_q, int_prod;
+
+ int_p = PyLong_AsLongAndOverflow(p_i, &overflow);
+ if (overflow) {
+ goto finalize_int_path;
+ }
+ int_q = PyLong_AsLongAndOverflow(q_i, &overflow);
+ if (overflow) {
+ goto finalize_int_path;
+ }
+ if (_check_long_mult_overflow(int_p, int_q)) {
+ goto finalize_int_path;
+ }
+ int_prod = int_p * int_q;
+ if (long_add_would_overflow(int_total, int_prod)) {
+ goto finalize_int_path;
+ }
+ int_total += int_prod;
+ int_total_in_use = true;
+ Py_CLEAR(p_i);
+ Py_CLEAR(q_i);
+ continue;
+ }
+
+ finalize_int_path:
+ // We're finished, overflowed, or have a non-int
+ int_path_enabled = false;
+ if (int_total_in_use) {
+ term_i = PyLong_FromLong(int_total);
+ if (term_i == NULL) {
+ goto err_exit;
+ }
+ new_total = PyNumber_Add(total, term_i);
+ if (new_total == NULL) {
+ goto err_exit;
+ }
+ Py_SETREF(total, new_total);
+ new_total = NULL;
+ Py_CLEAR(term_i);
+ int_total = 0; // An ounce of prevention, ...
+ int_total_in_use = false;
+ }
+ }
+
+ if (flt_path_enabled) {
+
+ if (!finished) {
+ double flt_p, flt_q;
+ bool p_type_float = PyFloat_CheckExact(p_i);
+ bool q_type_float = PyFloat_CheckExact(q_i);
+ if (p_type_float && q_type_float) {
+ flt_p = PyFloat_AS_DOUBLE(p_i);
+ flt_q = PyFloat_AS_DOUBLE(q_i);
+ } else if (p_type_float && (PyLong_CheckExact(q_i) || PyBool_Check(q_i))) {
+ /* We care about float/int pairs and int/float pairs because
+ they arise naturally in several use cases such as price
+ times quantity, measurements with integer weights, or
+ data selected by a vector of bools. */
+ flt_p = PyFloat_AS_DOUBLE(p_i);
+ flt_q = PyLong_AsDouble(q_i);
+ if (flt_q == -1.0 && PyErr_Occurred()) {
+ PyErr_Clear();
+ goto finalize_flt_path;
+ }
+ } else if (q_type_float && (PyLong_CheckExact(p_i) || PyBool_Check(p_i))) {
+ flt_q = PyFloat_AS_DOUBLE(q_i);
+ flt_p = PyLong_AsDouble(p_i);
+ if (flt_p == -1.0 && PyErr_Occurred()) {
+ PyErr_Clear();
+ goto finalize_flt_path;
+ }
+ } else {
+ goto finalize_flt_path;
+ }
+ TripleLength new_flt_total = tl_fma(flt_p, flt_q, flt_total);
+ if (isfinite(new_flt_total.hi)) {
+ flt_total = new_flt_total;
+ flt_total_in_use = true;
+ Py_CLEAR(p_i);
+ Py_CLEAR(q_i);
+ continue;
+ }
+ }
+
+ finalize_flt_path:
+ // We're finished, overflowed, have a non-float, or got a non-finite value
+ flt_path_enabled = false;
+ if (flt_total_in_use) {
+ term_i = PyFloat_FromDouble(tl_to_d(flt_total));
+ if (term_i == NULL) {
+ goto err_exit;
+ }
+ new_total = PyNumber_Add(total, term_i);
+ if (new_total == NULL) {
+ goto err_exit;
+ }
+ Py_SETREF(total, new_total);
+ new_total = NULL;
+ Py_CLEAR(term_i);
+ flt_total = tl_zero;
+ flt_total_in_use = false;
+ }
+ }
+
+ assert(!int_total_in_use);
+ assert(!flt_total_in_use);
+ if (finished) {
+ goto normal_exit;
+ }
+ term_i = PyNumber_Multiply(p_i, q_i);
+ if (term_i == NULL) {
+ goto err_exit;
+ }
+ new_total = PyNumber_Add(total, term_i);
+ if (new_total == NULL) {
+ goto err_exit;
+ }
+ Py_SETREF(total, new_total);
+ new_total = NULL;
+ Py_CLEAR(p_i);
+ Py_CLEAR(q_i);
+ Py_CLEAR(term_i);
+ }
+
+ normal_exit:
+ Py_DECREF(p_it);
+ Py_DECREF(q_it);
+ return total;
+
+ err_exit:
+ Py_DECREF(p_it);
+ Py_DECREF(q_it);
+ Py_DECREF(total);
+ Py_XDECREF(p_i);
+ Py_XDECREF(q_i);
+ Py_XDECREF(term_i);
+ Py_XDECREF(new_total);
+ return NULL;
+}
+
+
/* pow can't use math_2, but needs its own wrapper: the problem is
that an infinite result can arise either as a result of overflow
(in which case OverflowError should be raised) or as a result of
@@ -3141,8 +3241,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
long i_result = PyLong_AsLongAndOverflow(result, &overflow);
/* If this already overflowed, don't even enter the loop. */
if (overflow == 0) {
- Py_DECREF(result);
- result = NULL;
+ Py_SETREF(result, NULL);
}
/* Loop over all the items in the iterable until we finish, we overflow
* or we found a non integer element */
@@ -3189,8 +3288,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
*/
if (PyFloat_CheckExact(result)) {
double f_result = PyFloat_AS_DOUBLE(result);
- Py_DECREF(result);
- result = NULL;
+ Py_SETREF(result, NULL);
while(result == NULL) {
item = PyIter_Next(iter);
if (item == NULL) {
@@ -3239,8 +3337,7 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
if (item == NULL) {
/* error, or end-of-sequence */
if (PyErr_Occurred()) {
- Py_DECREF(result);
- result = NULL;
+ Py_SETREF(result, NULL);
}
break;
}
@@ -3507,8 +3604,7 @@ perm_comb(PyObject *n, unsigned long long k, int iscomb)
return PyLong_FromLong(1);
}
if (k == 1) {
- Py_INCREF(n);
- return n;
+ return Py_NewRef(n);
}
/* P(n, k) = P(n, j) * P(n-j, k-j) */
@@ -3591,12 +3687,12 @@ math_perm_impl(PyObject *module, PyObject *n, PyObject *k)
}
assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
- if (Py_SIZE(n) < 0) {
+ if (_PyLong_IsNegative((PyLongObject *)n)) {
PyErr_SetString(PyExc_ValueError,
"n must be a non-negative integer");
goto error;
}
- if (Py_SIZE(k) < 0) {
+ if (_PyLong_IsNegative((PyLongObject *)k)) {
PyErr_SetString(PyExc_ValueError,
"k must be a non-negative integer");
goto error;
@@ -3683,12 +3779,12 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
}
assert(PyLong_CheckExact(n) && PyLong_CheckExact(k));
- if (Py_SIZE(n) < 0) {
+ if (_PyLong_IsNegative((PyLongObject *)n)) {
PyErr_SetString(PyExc_ValueError,
"n must be a non-negative integer");
goto error;
}
- if (Py_SIZE(k) < 0) {
+ if (_PyLong_IsNegative((PyLongObject *)k)) {
PyErr_SetString(PyExc_ValueError,
"k must be a non-negative integer");
goto error;
@@ -3720,7 +3816,8 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
if (temp == NULL) {
goto error;
}
- if (Py_SIZE(temp) < 0) {
+ assert(PyLong_Check(temp));
+ if (_PyLong_IsNegative((PyLongObject *)temp)) {
Py_DECREF(temp);
result = PyLong_FromLong(0);
goto done;
@@ -3767,13 +3864,20 @@ math.nextafter
x: double
y: double
/
+ *
+ steps: object = None
-Return the next floating-point value after x towards y.
+Return the floating-point value the given number of steps after x towards y.
+
+If steps is not specified or is None, it defaults to 1.
+
+Raises a TypeError, if x or y is not a double, or if steps is not an integer.
+Raises ValueError if steps is negative.
[clinic start generated code]*/
static PyObject *
-math_nextafter_impl(PyObject *module, double x, double y)
-/*[clinic end generated code: output=750c8266c1c540ce input=02b2d50cd1d9f9b6]*/
+math_nextafter_impl(PyObject *module, double x, double y, PyObject *steps)
+/*[clinic end generated code: output=cc6511f02afc099e input=7f2a5842112af2b4]*/
{
#if defined(_AIX)
if (x == y) {
@@ -3788,7 +3892,101 @@ math_nextafter_impl(PyObject *module, double x, double y)
return PyFloat_FromDouble(y);
}
#endif
- return PyFloat_FromDouble(nextafter(x, y));
+ if (steps == Py_None) {
+ // fast path: we default to one step.
+ return PyFloat_FromDouble(nextafter(x, y));
+ }
+ steps = PyNumber_Index(steps);
+ if (steps == NULL) {
+ return NULL;
+ }
+ assert(PyLong_CheckExact(steps));
+ if (_PyLong_IsNegative((PyLongObject *)steps)) {
+ PyErr_SetString(PyExc_ValueError,
+ "steps must be a non-negative integer");
+ Py_DECREF(steps);
+ return NULL;
+ }
+
+ unsigned long long usteps_ull = PyLong_AsUnsignedLongLong(steps);
+ // Conveniently, uint64_t and double have the same number of bits
+ // on all the platforms we care about.
+ // So if an overflow occurs, we can just use UINT64_MAX.
+ Py_DECREF(steps);
+ if (usteps_ull >= UINT64_MAX) {
+ // This branch includes the case where an error occurred, since
+ // (unsigned long long)(-1) = ULLONG_MAX >= UINT64_MAX. Note that
+ // usteps_ull can be strictly larger than UINT64_MAX on a machine
+ // where unsigned long long has width > 64 bits.
+ if (PyErr_Occurred()) {
+ if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+ PyErr_Clear();
+ }
+ else {
+ return NULL;
+ }
+ }
+ usteps_ull = UINT64_MAX;
+ }
+ assert(usteps_ull <= UINT64_MAX);
+ uint64_t usteps = (uint64_t)usteps_ull;
+
+ if (usteps == 0) {
+ return PyFloat_FromDouble(x);
+ }
+ if (Py_IS_NAN(x)) {
+ return PyFloat_FromDouble(x);
+ }
+ if (Py_IS_NAN(y)) {
+ return PyFloat_FromDouble(y);
+ }
+
+ // We assume that double and uint64_t have the same endianness.
+ // This is not guaranteed by the C-standard, but it is true for
+ // all platforms we care about. (The most likely form of violation
+ // would be a "mixed-endian" double.)
+ union pun {double f; uint64_t i;};
+ union pun ux = {x}, uy = {y};
+ if (ux.i == uy.i) {
+ return PyFloat_FromDouble(x);
+ }
+
+ const uint64_t sign_bit = 1ULL<<63;
+
+ uint64_t ax = ux.i & ~sign_bit;
+ uint64_t ay = uy.i & ~sign_bit;
+
+ // opposite signs
+ if (((ux.i ^ uy.i) & sign_bit)) {
+ // NOTE: ax + ay can never overflow, because their most significant bit
+ // ain't set.
+ if (ax + ay <= usteps) {
+ return PyFloat_FromDouble(uy.f);
+ // This comparison has to use <, because <= would get +0.0 vs -0.0
+ // wrong.
+ } else if (ax < usteps) {
+ union pun result = {.i = (uy.i & sign_bit) | (usteps - ax)};
+ return PyFloat_FromDouble(result.f);
+ } else {
+ ux.i -= usteps;
+ return PyFloat_FromDouble(ux.f);
+ }
+ // same sign
+ } else if (ax > ay) {
+ if (ax - ay >= usteps) {
+ ux.i -= usteps;
+ return PyFloat_FromDouble(ux.f);
+ } else {
+ return PyFloat_FromDouble(uy.f);
+ }
+ } else {
+ if (ay - ax >= usteps) {
+ ux.i += usteps;
+ return PyFloat_FromDouble(ux.f);
+ } else {
+ return PyFloat_FromDouble(uy.f);
+ }
+ }
}
@@ -3812,7 +4010,7 @@ math_ulp_impl(PyObject *module, double x)
if (Py_IS_INFINITY(x)) {
return x;
}
- double inf = m_inf();
+ double inf = Py_INFINITY;
double x2 = nextafter(x, inf);
if (Py_IS_INFINITY(x2)) {
/* special case: x is the largest positive representable float */
@@ -3825,6 +4023,20 @@ math_ulp_impl(PyObject *module, double x)
static int
math_exec(PyObject *module)
{
+
+ math_module_state *state = get_math_module_state(module);
+ state->str___ceil__ = PyUnicode_InternFromString("__ceil__");
+ if (state->str___ceil__ == NULL) {
+ return -1;
+ }
+ state->str___floor__ = PyUnicode_InternFromString("__floor__");
+ if (state->str___floor__ == NULL) {
+ return -1;
+ }
+ state->str___trunc__ = PyUnicode_InternFromString("__trunc__");
+ if (state->str___trunc__ == NULL) {
+ return -1;
+ }
if (_PyModule_Add(module, "pi", PyFloat_FromDouble(Py_MATH_PI)) < 0) {
return -1;
}
@@ -3835,17 +4047,31 @@ math_exec(PyObject *module)
if (_PyModule_Add(module, "tau", PyFloat_FromDouble(Py_MATH_TAU)) < 0) {
return -1;
}
- if (_PyModule_Add(module, "inf", PyFloat_FromDouble(m_inf())) < 0) {
+ if (_PyModule_Add(module, "inf", PyFloat_FromDouble(Py_INFINITY)) < 0) {
return -1;
}
-#if _PY_SHORT_FLOAT_REPR == 1
- if (_PyModule_Add(module, "nan", PyFloat_FromDouble(m_nan())) < 0) {
+ if (_PyModule_Add(module, "nan", PyFloat_FromDouble(fabs(Py_NAN))) < 0) {
return -1;
}
-#endif
return 0;
}
+static int
+math_clear(PyObject *module)
+{
+ math_module_state *state = get_math_module_state(module);
+ Py_CLEAR(state->str___ceil__);
+ Py_CLEAR(state->str___floor__);
+ Py_CLEAR(state->str___trunc__);
+ return 0;
+}
+
+static void
+math_free(void *module)
+{
+ math_clear((PyObject *)module);
+}
+
static PyMethodDef math_methods[] = {
{"acos", math_acos, METH_O, math_acos_doc},
{"acosh", math_acosh, METH_O, math_acosh_doc},
@@ -3883,7 +4109,7 @@ static PyMethodDef math_methods[] = {
{"lcm", _PyCFunction_CAST(math_lcm), METH_FASTCALL, math_lcm_doc},
MATH_LDEXP_METHODDEF
{"lgamma", math_lgamma, METH_O, math_lgamma_doc},
- MATH_LOG_METHODDEF
+ {"log", _PyCFunction_CAST(math_log), METH_FASTCALL, math_log_doc},
{"log1p", math_log1p, METH_O, math_log1p_doc},
MATH_LOG10_METHODDEF
MATH_LOG2_METHODDEF
@@ -3896,6 +4122,7 @@ static PyMethodDef math_methods[] = {
{"sqrt", math_sqrt, METH_O, math_sqrt_doc},
{"tan", math_tan, METH_O, math_tan_doc},
{"tanh", math_tanh, METH_O, math_tanh_doc},
+ MATH_SUMPROD_METHODDEF
MATH_TRUNC_METHODDEF
MATH_PROD_METHODDEF
MATH_PERM_METHODDEF
@@ -3907,6 +4134,7 @@ static PyMethodDef math_methods[] = {
static PyModuleDef_Slot math_slots[] = {
{Py_mod_exec, math_exec},
+ {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
{0, NULL}
};
@@ -3918,9 +4146,11 @@ static struct PyModuleDef mathmodule = {
PyModuleDef_HEAD_INIT,
.m_name = "math",
.m_doc = module_doc,
- .m_size = 0,
+ .m_size = sizeof(math_module_state),
.m_methods = math_methods,
.m_slots = math_slots,
+ .m_clear = math_clear,
+ .m_free = math_free,
};
PyMODINIT_FUNC