Library import 16 (#2433)

Co-authored-by: robot-piglet <robot-piglet@yandex-team.com> Co-authored-by: deshevoy <deshevoy@yandex-team.com> Co-authored-by: robot-contrib <robot-contrib@yandex-team.com> Co-authored-by: thegeorg <thegeorg@yandex-team.com> Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com> Co-authored-by: svidyuk <svidyuk@yandex-team.com> Co-authored-by: shadchin <shadchin@yandex-team.com> Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com> Co-authored-by: innokentii <innokentii@yandex-team.com> Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com> Co-authored-by: snermolaev <snermolaev@yandex-team.com> Co-authored-by: dimdim11 <dimdim11@yandex-team.com> Co-authored-by: kickbutt <kickbutt@yandex-team.com> Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com> Co-authored-by: korsunandrei <korsunandrei@yandex-team.com> Co-authored-by: petrk <petrk@yandex-team.com> Co-authored-by: miroslav2 <miroslav2@yandex-team.com> Co-authored-by: serjflint <serjflint@yandex-team.com> Co-authored-by: akhropov <akhropov@yandex-team.com> Co-authored-by: prettyboy <prettyboy@yandex-team.com> Co-authored-by: ilikepugs <ilikepugs@yandex-team.com> Co-authored-by: hiddenpath <hiddenpath@yandex-team.com> Co-authored-by: mikhnenko <mikhnenko@yandex-team.com> Co-authored-by: spreis <spreis@yandex-team.com> Co-authored-by: andreyshspb <andreyshspb@yandex-team.com> Co-authored-by: dimaandreev <dimaandreev@yandex-team.com> Co-authored-by: rashid <rashid@yandex-team.com> Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com> Co-authored-by: r-vetrov <r-vetrov@yandex-team.com> Co-authored-by: ypodlesov <ypodlesov@yandex-team.com> Co-authored-by: zaverden <zaverden@yandex-team.com> Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com> Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com> Co-authored-by: v-korovin <v-korovin@yandex-team.com> Co-authored-by: arikon <arikon@yandex-team.com> Co-authored-by: khoden <khoden@yandex-team.com> Co-authored-by: psydmm <psydmm@yandex-team.com> Co-authored-by: robot-javacom <robot-javacom@yandex-team.com> Co-authored-by: dtorilov <dtorilov@yandex-team.com> Co-authored-by: sennikovmv <sennikovmv@yandex-team.com> Co-authored-by: hcpp <hcpp@ydb.tech>
author: AlexSm <alex@ydb.tech> 2024-03-05 10:40:59 +0100
committer: GitHub <noreply@github.com> 2024-03-05 12:40:59 +0300
commit: 1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree: 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/_pylong.py
parent: ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download: ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz
1 files changed, 285 insertions, 0 deletions
diff --git a/contrib/tools/python3/Lib/_pylong.py b/contrib/tools/python3/Lib/_pylong.py
new file mode 100644
index 00000000000..936346e187f
--- /dev/null
+++ b/contrib/tools/python3/Lib/_pylong.py
@@ -0,0 +1,285 @@
+"""Python implementations of some algorithms for use by longobject.c.
+The goal is to provide asymptotically faster algorithms that can be
+used for operations on integers with many digits.  In those cases, the
+performance overhead of the Python implementation is not significant
+since the asymptotic behavior is what dominates runtime. Functions
+provided by this module should be considered private and not part of any
+public API.
+
+Note: for ease of maintainability, please prefer clear code and avoid
+"micro-optimizations".  This module will only be imported and used for
+integers with a huge number of digits.  Saving a few microseconds with
+tricky or non-obvious code is not worth it.  For people looking for
+maximum performance, they should use something like gmpy2."""
+
+import re
+import decimal
+
+
+def int_to_decimal(n):
+    """Asymptotically fast conversion of an 'int' to Decimal."""
+
+    # Function due to Tim Peters.  See GH issue #90716 for details.
+    # https://github.com/python/cpython/issues/90716
+    #
+    # The implementation in longobject.c of base conversion algorithms
+    # between power-of-2 and non-power-of-2 bases are quadratic time.
+    # This function implements a divide-and-conquer algorithm that is
+    # faster for large numbers.  Builds an equal decimal.Decimal in a
+    # "clever" recursive way.  If we want a string representation, we
+    # apply str to _that_.
+
+    D = decimal.Decimal
+    D2 = D(2)
+
+    BITLIM = 128
+
+    mem = {}
+
+    def w2pow(w):
+        """Return D(2)**w and store the result. Also possibly save some
+        intermediate results. In context, these are likely to be reused
+        across various levels of the conversion to Decimal."""
+        if (result := mem.get(w)) is None:
+            if w <= BITLIM:
+                result = D2**w
+            elif w - 1 in mem:
+                result = (t := mem[w - 1]) + t
+            else:
+                w2 = w >> 1
+                # If w happens to be odd, w-w2 is one larger then w2
+                # now. Recurse on the smaller first (w2), so that it's
+                # in the cache and the larger (w-w2) can be handled by
+                # the cheaper `w-1 in mem` branch instead.
+                result = w2pow(w2) * w2pow(w - w2)
+            mem[w] = result
+        return result
+
+    def inner(n, w):
+        if w <= BITLIM:
+            return D(n)
+        w2 = w >> 1
+        hi = n >> w2
+        lo = n - (hi << w2)
+        return inner(lo, w2) + inner(hi, w - w2) * w2pow(w2)
+
+    with decimal.localcontext() as ctx:
+        ctx.prec = decimal.MAX_PREC
+        ctx.Emax = decimal.MAX_EMAX
+        ctx.Emin = decimal.MIN_EMIN
+        ctx.traps[decimal.Inexact] = 1
+
+        if n < 0:
+            negate = True
+            n = -n
+        else:
+            negate = False
+        result = inner(n, n.bit_length())
+        if negate:
+            result = -result
+    return result
+
+
+def int_to_decimal_string(n):
+    """Asymptotically fast conversion of an 'int' to a decimal string."""
+    return str(int_to_decimal(n))
+
+
+def _str_to_int_inner(s):
+    """Asymptotically fast conversion of a 'str' to an 'int'."""
+
+    # Function due to Bjorn Martinsson.  See GH issue #90716 for details.
+    # https://github.com/python/cpython/issues/90716
+    #
+    # The implementation in longobject.c of base conversion algorithms
+    # between power-of-2 and non-power-of-2 bases are quadratic time.
+    # This function implements a divide-and-conquer algorithm making use
+    # of Python's built in big int multiplication. Since Python uses the
+    # Karatsuba algorithm for multiplication, the time complexity
+    # of this function is O(len(s)**1.58).
+
+    DIGLIM = 2048
+
+    mem = {}
+
+    def w5pow(w):
+        """Return 5**w and store the result.
+        Also possibly save some intermediate results. In context, these
+        are likely to be reused across various levels of the conversion
+        to 'int'.
+        """
+        if (result := mem.get(w)) is None:
+            if w <= DIGLIM:
+                result = 5**w
+            elif w - 1 in mem:
+                result = mem[w - 1] * 5
+            else:
+                w2 = w >> 1
+                # If w happens to be odd, w-w2 is one larger then w2
+                # now. Recurse on the smaller first (w2), so that it's
+                # in the cache and the larger (w-w2) can be handled by
+                # the cheaper `w-1 in mem` branch instead.
+                result = w5pow(w2) * w5pow(w - w2)
+            mem[w] = result
+        return result
+
+    def inner(a, b):
+        if b - a <= DIGLIM:
+            return int(s[a:b])
+        mid = (a + b + 1) >> 1
+        return inner(mid, b) + ((inner(a, mid) * w5pow(b - mid)) << (b - mid))
+
+    return inner(0, len(s))
+
+
+def int_from_string(s):
+    """Asymptotically fast version of PyLong_FromString(), conversion
+    of a string of decimal digits into an 'int'."""
+    # PyLong_FromString() has already removed leading +/-, checked for invalid
+    # use of underscore characters, checked that string consists of only digits
+    # and underscores, and stripped leading whitespace.  The input can still
+    # contain underscores and have trailing whitespace.
+    s = s.rstrip().replace('_', '')
+    return _str_to_int_inner(s)
+
+
+def str_to_int(s):
+    """Asymptotically fast version of decimal string to 'int' conversion."""
+    # FIXME: this doesn't support the full syntax that int() supports.
+    m = re.match(r'\s*([+-]?)([0-9_]+)\s*', s)
+    if not m:
+        raise ValueError('invalid literal for int() with base 10')
+    v = int_from_string(m.group(2))
+    if m.group(1) == '-':
+        v = -v
+    return v
+
+
+# Fast integer division, based on code from Mark Dickinson, fast_div.py
+# GH-47701. Additional refinements and optimizations by Bjorn Martinsson.  The
+# algorithm is due to Burnikel and Ziegler, in their paper "Fast Recursive
+# Division".
+
+_DIV_LIMIT = 4000
+
+
+def _div2n1n(a, b, n):
+    """Divide a 2n-bit nonnegative integer a by an n-bit positive integer
+    b, using a recursive divide-and-conquer algorithm.
+
+    Inputs:
+      n is a positive integer
+      b is a positive integer with exactly n bits
+      a is a nonnegative integer such that a < 2**n * b
+
+    Output:
+      (q, r) such that a = b*q+r and 0 <= r < b.
+
+    """
+    if a.bit_length() - n <= _DIV_LIMIT:
+        return divmod(a, b)
+    pad = n & 1
+    if pad:
+        a <<= 1
+        b <<= 1
+        n += 1
+    half_n = n >> 1
+    mask = (1 << half_n) - 1
+    b1, b2 = b >> half_n, b & mask
+    q1, r = _div3n2n(a >> n, (a >> half_n) & mask, b, b1, b2, half_n)
+    q2, r = _div3n2n(r, a & mask, b, b1, b2, half_n)
+    if pad:
+        r >>= 1
+    return q1 << half_n | q2, r
+
+
+def _div3n2n(a12, a3, b, b1, b2, n):
+    """Helper function for _div2n1n; not intended to be called directly."""
+    if a12 >> n == b1:
+        q, r = (1 << n) - 1, a12 - (b1 << n) + b1
+    else:
+        q, r = _div2n1n(a12, b1, n)
+    r = (r << n | a3) - q * b2
+    while r < 0:
+        q -= 1
+        r += b
+    return q, r
+
+
+def _int2digits(a, n):
+    """Decompose non-negative int a into base 2**n
+
+    Input:
+      a is a non-negative integer
+
+    Output:
+      List of the digits of a in base 2**n in little-endian order,
+      meaning the most significant digit is last. The most
+      significant digit is guaranteed to be non-zero.
+      If a is 0 then the output is an empty list.
+
+    """
+    a_digits = [0] * ((a.bit_length() + n - 1) // n)
+
+    def inner(x, L, R):
+        if L + 1 == R:
+            a_digits[L] = x
+            return
+        mid = (L + R) >> 1
+        shift = (mid - L) * n
+        upper = x >> shift
+        lower = x ^ (upper << shift)
+        inner(lower, L, mid)
+        inner(upper, mid, R)
+
+    if a:
+        inner(a, 0, len(a_digits))
+    return a_digits
+
+
+def _digits2int(digits, n):
+    """Combine base-2**n digits into an int. This function is the
+    inverse of `_int2digits`. For more details, see _int2digits.
+    """
+
+    def inner(L, R):
+        if L + 1 == R:
+            return digits[L]
+        mid = (L + R) >> 1
+        shift = (mid - L) * n
+        return (inner(mid, R) << shift) + inner(L, mid)
+
+    return inner(0, len(digits)) if digits else 0
+
+
+def _divmod_pos(a, b):
+    """Divide a non-negative integer a by a positive integer b, giving
+    quotient and remainder."""
+    # Use grade-school algorithm in base 2**n, n = nbits(b)
+    n = b.bit_length()
+    a_digits = _int2digits(a, n)
+
+    r = 0
+    q_digits = []
+    for a_digit in reversed(a_digits):
+        q_digit, r = _div2n1n((r << n) + a_digit, b, n)
+        q_digits.append(q_digit)
+    q_digits.reverse()
+    q = _digits2int(q_digits, n)
+    return q, r
+
+
+def int_divmod(a, b):
+    """Asymptotically fast replacement for divmod, for 'int'.
+    Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b).
+    """
+    if b == 0:
+        raise ZeroDivisionError
+    elif b < 0:
+        q, r = int_divmod(-a, -b)
+        return q, -r
+    elif a < 0:
+        q, r = int_divmod(~a, b)
+        return ~q, b + ~r
+    else:
+        return _divmod_pos(a, b)
author	AlexSm <alex@ydb.tech>	2024-03-05 10:40:59 +0100
committer	GitHub <noreply@github.com>	2024-03-05 12:40:59 +0300
commit	1ac13c847b5358faba44dbb638a828e24369467b (patch)
tree	07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Lib/_pylong.py
parent	ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff)
download	ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz