Sync contrib/tools/python3 layout with upstream

* Move src/ subdir contents to the top of the layout * Rename self-written lib -> lib2 to avoid CaseFolding warning from the VCS * Regenerate contrib/libs/python proxy-headers accordingly 4ccc62ac1511abcf0fed14ccade38e984e088f1e
author: thegeorg <[email protected]> 2024-02-19 02:38:52 +0300
committer: thegeorg <[email protected]> 2024-02-19 02:50:43 +0300
commit: d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree: 31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Objects/stringlib
parent: 452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)
21 files changed, 5327 insertions, 0 deletions
diff --git a/contrib/tools/python3/Objects/stringlib/asciilib.h b/contrib/tools/python3/Objects/stringlib/asciilib.h
new file mode 100644
index 00000000000..b3016bfbbb0
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/asciilib.h
@@ -0,0 +1,27 @@
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define FASTSEARCH               asciilib_fastsearch
+#define STRINGLIB(F)             asciilib_##F
+#define STRINGLIB_OBJECT         PyUnicodeObject
+#define STRINGLIB_SIZEOF_CHAR    1
+#define STRINGLIB_MAX_CHAR       0x7Fu
+#define STRINGLIB_CHAR           Py_UCS1
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_PARSE_CODE     "U"
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_STR            PyUnicode_1BYTE_DATA
+#define STRINGLIB_LEN            PyUnicode_GET_LENGTH
+#define STRINGLIB_NEW(STR,LEN)   _PyUnicode_FromASCII((const char*)(STR),(LEN))
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
+#define STRINGLIB_MUTABLE 0
+#define STRINGLIB_FAST_MEMCHR    memchr
+
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_ASCII
diff --git a/contrib/tools/python3/Objects/stringlib/clinic/transmogrify.h.h b/contrib/tools/python3/Objects/stringlib/clinic/transmogrify.h.h
new file mode 100644
index 00000000000..49388cf043c
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/clinic/transmogrify.h.h
@@ -0,0 +1,281 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+#  include "pycore_gc.h"            // PyGC_Head
+#  include "pycore_runtime.h"       // _Py_ID()
+#endif
+
+
+PyDoc_STRVAR(stringlib_expandtabs__doc__,
+"expandtabs($self, /, tabsize=8)\n"
+"--\n"
+"\n"
+"Return a copy where all tab characters are expanded using spaces.\n"
+"\n"
+"If tabsize is not given, a tab size of 8 characters is assumed.");
+
+#define STRINGLIB_EXPANDTABS_METHODDEF    \
+    {"expandtabs", _PyCFunction_CAST(stringlib_expandtabs), METH_FASTCALL|METH_KEYWORDS, stringlib_expandtabs__doc__},
+
+static PyObject *
+stringlib_expandtabs_impl(PyObject *self, int tabsize);
+
+static PyObject *
+stringlib_expandtabs(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+    #define NUM_KEYWORDS 1
+    static struct {
+        PyGC_Head _this_is_not_used;
+        PyObject_VAR_HEAD
+        PyObject *ob_item[NUM_KEYWORDS];
+    } _kwtuple = {
+        .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+        .ob_item = { &_Py_ID(tabsize), },
+    };
+    #undef NUM_KEYWORDS
+    #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+    #else  // !Py_BUILD_CORE
+    #  define KWTUPLE NULL
+    #endif  // !Py_BUILD_CORE
+
+    static const char * const _keywords[] = {"tabsize", NULL};
+    static _PyArg_Parser _parser = {
+        .keywords = _keywords,
+        .fname = "expandtabs",
+        .kwtuple = KWTUPLE,
+    };
+    #undef KWTUPLE
+    PyObject *argsbuf[1];
+    Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
+    int tabsize = 8;
+
+    args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 1, 0, argsbuf);
+    if (!args) {
+        goto exit;
+    }
+    if (!noptargs) {
+        goto skip_optional_pos;
+    }
+    tabsize = _PyLong_AsInt(args[0]);
+    if (tabsize == -1 && PyErr_Occurred()) {
+        goto exit;
+    }
+skip_optional_pos:
+    return_value = stringlib_expandtabs_impl(self, tabsize);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(stringlib_ljust__doc__,
+"ljust($self, width, fillchar=b\' \', /)\n"
+"--\n"
+"\n"
+"Return a left-justified string of length width.\n"
+"\n"
+"Padding is done using the specified fill character.");
+
+#define STRINGLIB_LJUST_METHODDEF    \
+    {"ljust", _PyCFunction_CAST(stringlib_ljust), METH_FASTCALL, stringlib_ljust__doc__},
+
+static PyObject *
+stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar);
+
+static PyObject *
+stringlib_ljust(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!_PyArg_CheckPositional("ljust", nargs, 1, 2)) {
+        goto exit;
+    }
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        width = ival;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
+        fillchar = PyBytes_AS_STRING(args[1])[0];
+    }
+    else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
+        fillchar = PyByteArray_AS_STRING(args[1])[0];
+    }
+    else {
+        _PyArg_BadArgument("ljust", "argument 2", "a byte string of length 1", args[1]);
+        goto exit;
+    }
+skip_optional:
+    return_value = stringlib_ljust_impl(self, width, fillchar);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(stringlib_rjust__doc__,
+"rjust($self, width, fillchar=b\' \', /)\n"
+"--\n"
+"\n"
+"Return a right-justified string of length width.\n"
+"\n"
+"Padding is done using the specified fill character.");
+
+#define STRINGLIB_RJUST_METHODDEF    \
+    {"rjust", _PyCFunction_CAST(stringlib_rjust), METH_FASTCALL, stringlib_rjust__doc__},
+
+static PyObject *
+stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar);
+
+static PyObject *
+stringlib_rjust(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!_PyArg_CheckPositional("rjust", nargs, 1, 2)) {
+        goto exit;
+    }
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        width = ival;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
+        fillchar = PyBytes_AS_STRING(args[1])[0];
+    }
+    else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
+        fillchar = PyByteArray_AS_STRING(args[1])[0];
+    }
+    else {
+        _PyArg_BadArgument("rjust", "argument 2", "a byte string of length 1", args[1]);
+        goto exit;
+    }
+skip_optional:
+    return_value = stringlib_rjust_impl(self, width, fillchar);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(stringlib_center__doc__,
+"center($self, width, fillchar=b\' \', /)\n"
+"--\n"
+"\n"
+"Return a centered string of length width.\n"
+"\n"
+"Padding is done using the specified fill character.");
+
+#define STRINGLIB_CENTER_METHODDEF    \
+    {"center", _PyCFunction_CAST(stringlib_center), METH_FASTCALL, stringlib_center__doc__},
+
+static PyObject *
+stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar);
+
+static PyObject *
+stringlib_center(PyObject *self, PyObject *const *args, Py_ssize_t nargs)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t width;
+    char fillchar = ' ';
+
+    if (!_PyArg_CheckPositional("center", nargs, 1, 2)) {
+        goto exit;
+    }
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(args[0]);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        width = ival;
+    }
+    if (nargs < 2) {
+        goto skip_optional;
+    }
+    if (PyBytes_Check(args[1]) && PyBytes_GET_SIZE(args[1]) == 1) {
+        fillchar = PyBytes_AS_STRING(args[1])[0];
+    }
+    else if (PyByteArray_Check(args[1]) && PyByteArray_GET_SIZE(args[1]) == 1) {
+        fillchar = PyByteArray_AS_STRING(args[1])[0];
+    }
+    else {
+        _PyArg_BadArgument("center", "argument 2", "a byte string of length 1", args[1]);
+        goto exit;
+    }
+skip_optional:
+    return_value = stringlib_center_impl(self, width, fillchar);
+
+exit:
+    return return_value;
+}
+
+PyDoc_STRVAR(stringlib_zfill__doc__,
+"zfill($self, width, /)\n"
+"--\n"
+"\n"
+"Pad a numeric string with zeros on the left, to fill a field of the given width.\n"
+"\n"
+"The original string is never truncated.");
+
+#define STRINGLIB_ZFILL_METHODDEF    \
+    {"zfill", (PyCFunction)stringlib_zfill, METH_O, stringlib_zfill__doc__},
+
+static PyObject *
+stringlib_zfill_impl(PyObject *self, Py_ssize_t width);
+
+static PyObject *
+stringlib_zfill(PyObject *self, PyObject *arg)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t width;
+
+    {
+        Py_ssize_t ival = -1;
+        PyObject *iobj = _PyNumber_Index(arg);
+        if (iobj != NULL) {
+            ival = PyLong_AsSsize_t(iobj);
+            Py_DECREF(iobj);
+        }
+        if (ival == -1 && PyErr_Occurred()) {
+            goto exit;
+        }
+        width = ival;
+    }
+    return_value = stringlib_zfill_impl(self, width);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=d44a269805f6739e input=a9049054013a1b77]*/
diff --git a/contrib/tools/python3/Objects/stringlib/codecs.h b/contrib/tools/python3/Objects/stringlib/codecs.h
new file mode 100644
index 00000000000..958cc861478
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/codecs.h
@@ -0,0 +1,833 @@
+/* stringlib: codec implementations */
+
+#if !STRINGLIB_IS_UNICODE
+# error "codecs.h is specific to Unicode"
+#endif
+
+#include "pycore_bitutils.h"      // _Py_bswap32()
+
+/* Mask to quickly check whether a C 'size_t' contains a
+   non-ASCII, UTF8-encoded char. */
+#if (SIZEOF_SIZE_T == 8)
+# define ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define ASCII_CHAR_MASK 0x80808080U
+#else
+# error C 'size_t' size should be either 4 or 8!
+#endif
+
+/* 10xxxxxx */
+#define IS_CONTINUATION_BYTE(ch) ((ch) >= 0x80 && (ch) < 0xC0)
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf8_decode)(const char **inptr, const char *end,
+                       STRINGLIB_CHAR *dest,
+                       Py_ssize_t *outpos)
+{
+    Py_UCS4 ch;
+    const char *s = *inptr;
+    STRINGLIB_CHAR *p = dest + *outpos;
+
+    while (s < end) {
+        ch = (unsigned char)*s;
+
+        if (ch < 0x80) {
+            /* Fast path for runs of ASCII characters. Given that common UTF-8
+               input will consist of an overwhelming majority of ASCII
+               characters, we try to optimize for this case by checking
+               as many characters as a C 'size_t' can contain.
+               First, check if we can do an aligned read, as most CPUs have
+               a penalty for unaligned reads.
+            */
+            if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
+                /* Help register allocation */
+                const char *_s = s;
+                STRINGLIB_CHAR *_p = p;
+                while (_s + SIZEOF_SIZE_T <= end) {
+                    /* Read a whole size_t at a time (either 4 or 8 bytes),
+                       and do a fast unrolled copy if it only contains ASCII
+                       characters. */
+                    size_t value = *(const size_t *) _s;
+                    if (value & ASCII_CHAR_MASK)
+                        break;
+#if PY_LITTLE_ENDIAN
+                    _p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
+                    _p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
+                    _p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
+                    _p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
+# if SIZEOF_SIZE_T == 8
+                    _p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
+                    _p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
+                    _p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
+                    _p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
+# endif
+#else
+# if SIZEOF_SIZE_T == 8
+                    _p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
+                    _p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
+                    _p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
+                    _p[3] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
+                    _p[4] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
+                    _p[5] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
+                    _p[6] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
+                    _p[7] = (STRINGLIB_CHAR)(value & 0xFFu);
+# else
+                    _p[0] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
+                    _p[1] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
+                    _p[2] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
+                    _p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
+# endif
+#endif
+                    _s += SIZEOF_SIZE_T;
+                    _p += SIZEOF_SIZE_T;
+                }
+                s = _s;
+                p = _p;
+                if (s == end)
+                    break;
+                ch = (unsigned char)*s;
+            }
+            if (ch < 0x80) {
+                s++;
+                *p++ = ch;
+                continue;
+            }
+        }
+
+        if (ch < 0xE0) {
+            /* \xC2\x80-\xDF\xBF -- 0080-07FF */
+            Py_UCS4 ch2;
+            if (ch < 0xC2) {
+                /* invalid sequence
+                \x80-\xBF -- continuation byte
+                \xC0-\xC1 -- fake 0000-007F */
+                goto InvalidStart;
+            }
+            if (end - s < 2) {
+                /* unexpected end of data: the caller will decide whether
+                   it's an error or not */
+                break;
+            }
+            ch2 = (unsigned char)s[1];
+            if (!IS_CONTINUATION_BYTE(ch2))
+                /* invalid continuation byte */
+                goto InvalidContinuation1;
+            ch = (ch << 6) + ch2 -
+                 ((0xC0 << 6) + 0x80);
+            assert ((ch > 0x007F) && (ch <= 0x07FF));
+            s += 2;
+            if (STRINGLIB_MAX_CHAR <= 0x007F ||
+                (STRINGLIB_MAX_CHAR < 0x07FF && ch > STRINGLIB_MAX_CHAR))
+                /* Out-of-range */
+                goto Return;
+            *p++ = ch;
+            continue;
+        }
+
+        if (ch < 0xF0) {
+            /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */
+            Py_UCS4 ch2, ch3;
+            if (end - s < 3) {
+                /* unexpected end of data: the caller will decide whether
+                   it's an error or not */
+                if (end - s < 2)
+                    break;
+                ch2 = (unsigned char)s[1];
+                if (!IS_CONTINUATION_BYTE(ch2) ||
+                    (ch2 < 0xA0 ? ch == 0xE0 : ch == 0xED))
+                    /* for clarification see comments below */
+                    goto InvalidContinuation1;
+                break;
+            }
+            ch2 = (unsigned char)s[1];
+            ch3 = (unsigned char)s[2];
+            if (!IS_CONTINUATION_BYTE(ch2)) {
+                /* invalid continuation byte */
+                goto InvalidContinuation1;
+            }
+            if (ch == 0xE0) {
+                if (ch2 < 0xA0)
+                    /* invalid sequence
+                       \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */
+                    goto InvalidContinuation1;
+            } else if (ch == 0xED && ch2 >= 0xA0) {
+                /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
+                   will result in surrogates in range D800-DFFF. Surrogates are
+                   not valid UTF-8 so they are rejected.
+                   See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
+                   (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
+                goto InvalidContinuation1;
+            }
+            if (!IS_CONTINUATION_BYTE(ch3)) {
+                /* invalid continuation byte */
+                goto InvalidContinuation2;
+            }
+            ch = (ch << 12) + (ch2 << 6) + ch3 -
+                 ((0xE0 << 12) + (0x80 << 6) + 0x80);
+            assert ((ch > 0x07FF) && (ch <= 0xFFFF));
+            s += 3;
+            if (STRINGLIB_MAX_CHAR <= 0x07FF ||
+                (STRINGLIB_MAX_CHAR < 0xFFFF && ch > STRINGLIB_MAX_CHAR))
+                /* Out-of-range */
+                goto Return;
+            *p++ = ch;
+            continue;
+        }
+
+        if (ch < 0xF5) {
+            /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */
+            Py_UCS4 ch2, ch3, ch4;
+            if (end - s < 4) {
+                /* unexpected end of data: the caller will decide whether
+                   it's an error or not */
+                if (end - s < 2)
+                    break;
+                ch2 = (unsigned char)s[1];
+                if (!IS_CONTINUATION_BYTE(ch2) ||
+                    (ch2 < 0x90 ? ch == 0xF0 : ch == 0xF4))
+                    /* for clarification see comments below */
+                    goto InvalidContinuation1;
+                if (end - s < 3)
+                    break;
+                ch3 = (unsigned char)s[2];
+                if (!IS_CONTINUATION_BYTE(ch3))
+                    goto InvalidContinuation2;
+                break;
+            }
+            ch2 = (unsigned char)s[1];
+            ch3 = (unsigned char)s[2];
+            ch4 = (unsigned char)s[3];
+            if (!IS_CONTINUATION_BYTE(ch2)) {
+                /* invalid continuation byte */
+                goto InvalidContinuation1;
+            }
+            if (ch == 0xF0) {
+                if (ch2 < 0x90)
+                    /* invalid sequence
+                       \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF */
+                    goto InvalidContinuation1;
+            } else if (ch == 0xF4 && ch2 >= 0x90) {
+                /* invalid sequence
+                   \xF4\x90\x80\x80- -- 110000- overflow */
+                goto InvalidContinuation1;
+            }
+            if (!IS_CONTINUATION_BYTE(ch3)) {
+                /* invalid continuation byte */
+                goto InvalidContinuation2;
+            }
+            if (!IS_CONTINUATION_BYTE(ch4)) {
+                /* invalid continuation byte */
+                goto InvalidContinuation3;
+            }
+            ch = (ch << 18) + (ch2 << 12) + (ch3 << 6) + ch4 -
+                 ((0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80);
+            assert ((ch > 0xFFFF) && (ch <= 0x10FFFF));
+            s += 4;
+            if (STRINGLIB_MAX_CHAR <= 0xFFFF ||
+                (STRINGLIB_MAX_CHAR < 0x10FFFF && ch > STRINGLIB_MAX_CHAR))
+                /* Out-of-range */
+                goto Return;
+            *p++ = ch;
+            continue;
+        }
+        goto InvalidStart;
+    }
+    ch = 0;
+Return:
+    *inptr = s;
+    *outpos = p - dest;
+    return ch;
+InvalidStart:
+    ch = 1;
+    goto Return;
+InvalidContinuation1:
+    ch = 2;
+    goto Return;
+InvalidContinuation2:
+    ch = 3;
+    goto Return;
+InvalidContinuation3:
+    ch = 4;
+    goto Return;
+}
+
+#undef ASCII_CHAR_MASK
+
+
+/* UTF-8 encoder specialized for a Unicode kind to avoid the slow
+   PyUnicode_READ() macro. Delete some parts of the code depending on the kind:
+   UCS-1 strings don't need to handle surrogates for example. */
+Py_LOCAL_INLINE(char *)
+STRINGLIB(utf8_encoder)(_PyBytesWriter *writer,
+                        PyObject *unicode,
+                        const STRINGLIB_CHAR *data,
+                        Py_ssize_t size,
+                        _Py_error_handler error_handler,
+                        const char *errors)
+{
+    Py_ssize_t i;                /* index into data of next input character */
+    char *p;                     /* next free byte in output buffer */
+#if STRINGLIB_SIZEOF_CHAR > 1
+    PyObject *error_handler_obj = NULL;
+    PyObject *exc = NULL;
+    PyObject *rep = NULL;
+#endif
+#if STRINGLIB_SIZEOF_CHAR == 1
+    const Py_ssize_t max_char_size = 2;
+#elif STRINGLIB_SIZEOF_CHAR == 2
+    const Py_ssize_t max_char_size = 3;
+#else /*  STRINGLIB_SIZEOF_CHAR == 4 */
+    const Py_ssize_t max_char_size = 4;
+#endif
+
+    assert(size >= 0);
+    if (size > PY_SSIZE_T_MAX / max_char_size) {
+        /* integer overflow */
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    _PyBytesWriter_Init(writer);
+    p = _PyBytesWriter_Alloc(writer, size * max_char_size);
+    if (p == NULL)
+        return NULL;
+
+    for (i = 0; i < size;) {
+        Py_UCS4 ch = data[i++];
+
+        if (ch < 0x80) {
+            /* Encode ASCII */
+            *p++ = (char) ch;
+
+        }
+        else
+#if STRINGLIB_SIZEOF_CHAR > 1
+        if (ch < 0x0800)
+#endif
+        {
+            /* Encode Latin-1 */
+            *p++ = (char)(0xc0 | (ch >> 6));
+            *p++ = (char)(0x80 | (ch & 0x3f));
+        }
+#if STRINGLIB_SIZEOF_CHAR > 1
+        else if (Py_UNICODE_IS_SURROGATE(ch)) {
+            Py_ssize_t startpos, endpos, newpos;
+            Py_ssize_t k;
+            if (error_handler == _Py_ERROR_UNKNOWN) {
+                error_handler = _Py_GetErrorHandler(errors);
+            }
+
+            startpos = i-1;
+            endpos = startpos+1;
+
+            while ((endpos < size) && Py_UNICODE_IS_SURROGATE(data[endpos]))
+                endpos++;
+
+            /* Only overallocate the buffer if it's not the last write */
+            writer->overallocate = (endpos < size);
+
+            switch (error_handler)
+            {
+            case _Py_ERROR_REPLACE:
+                memset(p, '?', endpos - startpos);
+                p += (endpos - startpos);
+                /* fall through */
+            case _Py_ERROR_IGNORE:
+                i += (endpos - startpos - 1);
+                break;
+
+            case _Py_ERROR_SURROGATEPASS:
+                for (k=startpos; k<endpos; k++) {
+                    ch = data[k];
+                    *p++ = (char)(0xe0 | (ch >> 12));
+                    *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+                    *p++ = (char)(0x80 | (ch & 0x3f));
+                }
+                i += (endpos - startpos - 1);
+                break;
+
+            case _Py_ERROR_BACKSLASHREPLACE:
+                /* subtract preallocated bytes */
+                writer->min_size -= max_char_size * (endpos - startpos);
+                p = backslashreplace(writer, p,
+                                     unicode, startpos, endpos);
+                if (p == NULL)
+                    goto error;
+                i += (endpos - startpos - 1);
+                break;
+
+            case _Py_ERROR_XMLCHARREFREPLACE:
+                /* subtract preallocated bytes */
+                writer->min_size -= max_char_size * (endpos - startpos);
+                p = xmlcharrefreplace(writer, p,
+                                      unicode, startpos, endpos);
+                if (p == NULL)
+                    goto error;
+                i += (endpos - startpos - 1);
+                break;
+
+            case _Py_ERROR_SURROGATEESCAPE:
+                for (k=startpos; k<endpos; k++) {
+                    ch = data[k];
+                    if (!(0xDC80 <= ch && ch <= 0xDCFF))
+                        break;
+                    *p++ = (char)(ch & 0xff);
+                }
+                if (k >= endpos) {
+                    i += (endpos - startpos - 1);
+                    break;
+                }
+                startpos = k;
+                assert(startpos < endpos);
+                /* fall through */
+            default:
+                rep = unicode_encode_call_errorhandler(
+                      errors, &error_handler_obj, "utf-8", "surrogates not allowed",
+                      unicode, &exc, startpos, endpos, &newpos);
+                if (!rep)
+                    goto error;
+
+                if (newpos < startpos) {
+                    writer->overallocate = 1;
+                    p = _PyBytesWriter_Prepare(writer, p,
+                                               max_char_size * (startpos - newpos));
+                    if (p == NULL)
+                        goto error;
+                }
+                else {
+                    /* subtract preallocated bytes */
+                    writer->min_size -= max_char_size * (newpos - startpos);
+                    /* Only overallocate the buffer if it's not the last write */
+                    writer->overallocate = (newpos < size);
+                }
+
+                if (PyBytes_Check(rep)) {
+                    p = _PyBytesWriter_WriteBytes(writer, p,
+                                                  PyBytes_AS_STRING(rep),
+                                                  PyBytes_GET_SIZE(rep));
+                }
+                else {
+                    /* rep is unicode */
+                    if (PyUnicode_READY(rep) < 0)
+                        goto error;
+
+                    if (!PyUnicode_IS_ASCII(rep)) {
+                        raise_encode_exception(&exc, "utf-8", unicode,
+                                               startpos, endpos,
+                                               "surrogates not allowed");
+                        goto error;
+                    }
+
+                    p = _PyBytesWriter_WriteBytes(writer, p,
+                                                  PyUnicode_DATA(rep),
+                                                  PyUnicode_GET_LENGTH(rep));
+                }
+
+                if (p == NULL)
+                    goto error;
+                Py_CLEAR(rep);
+
+                i = newpos;
+            }
+
+            /* If overallocation was disabled, ensure that it was the last
+               write. Otherwise, we missed an optimization */
+            assert(writer->overallocate || i == size);
+        }
+        else
+#if STRINGLIB_SIZEOF_CHAR > 2
+        if (ch < 0x10000)
+#endif
+        {
+            *p++ = (char)(0xe0 | (ch >> 12));
+            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+            *p++ = (char)(0x80 | (ch & 0x3f));
+        }
+#if STRINGLIB_SIZEOF_CHAR > 2
+        else /* ch >= 0x10000 */
+        {
+            assert(ch <= MAX_UNICODE);
+            /* Encode UCS4 Unicode ordinals */
+            *p++ = (char)(0xf0 | (ch >> 18));
+            *p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
+            *p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
+            *p++ = (char)(0x80 | (ch & 0x3f));
+        }
+#endif /* STRINGLIB_SIZEOF_CHAR > 2 */
+#endif /* STRINGLIB_SIZEOF_CHAR > 1 */
+    }
+
+#if STRINGLIB_SIZEOF_CHAR > 1
+    Py_XDECREF(error_handler_obj);
+    Py_XDECREF(exc);
+#endif
+    return p;
+
+#if STRINGLIB_SIZEOF_CHAR > 1
+ error:
+    Py_XDECREF(rep);
+    Py_XDECREF(error_handler_obj);
+    Py_XDECREF(exc);
+    return NULL;
+#endif
+}
+
+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+   non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+   UTF16-encoded surrogate characters. This is an efficient heuristic,
+   assuming that non-surrogate characters with a code point >= 0x8000 are
+   rare in most input.
+*/
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \
+                                 (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+                        int native_ordering)
+{
+    Py_UCS4 ch;
+    const unsigned char *q = *inptr;
+    STRINGLIB_CHAR *p = dest + *outpos;
+    /* Offsets from q for retrieving byte pairs in the right order. */
+#if PY_LITTLE_ENDIAN
+    int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+    int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+    --e;
+
+    while (q < e) {
+        Py_UCS4 ch2;
+        /* First check for possible aligned read of a C 'long'. Unaligned
+           reads are more expensive, better to defer to another iteration. */
+        if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
+            /* Fast path for runs of in-range non-surrogate chars. */
+            const unsigned char *_q = q;
+            while (_q + SIZEOF_LONG <= e) {
+                unsigned long block = * (const unsigned long *) _q;
+                if (native_ordering) {
+                    /* Can use buffer directly */
+                    if (block & FAST_CHAR_MASK)
+                        break;
+                }
+                else {
+                    /* Need to byte-swap */
+                    if (block & SWAB(FAST_CHAR_MASK))
+                        break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    block >>= 8;
+#else
+                    block = SWAB(block);
+#endif
+                }
+#if PY_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block >> 16);
+                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block >> 48);
+                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+                _q += SIZEOF_LONG;
+                p += SIZEOF_LONG / 2;
+            }
+            q = _q;
+            if (q >= e)
+                break;
+        }
+
+        ch = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+            if (ch > STRINGLIB_MAX_CHAR)
+                /* Out-of-range */
+                goto Return;
+#endif
+            *p++ = (STRINGLIB_CHAR)ch;
+            continue;
+        }
+
+        /* UTF-16 code pair: */
+        if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+            goto IllegalEncoding;
+        if (q >= e)
+            goto UnexpectedEnd;
+        ch2 = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+            goto IllegalSurrogate;
+        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+        /* Out-of-range */
+        goto Return;
+#else
+        *p++ = (STRINGLIB_CHAR)ch;
+#endif
+    }
+    ch = 0;
+Return:
+    *inptr = q;
+    *outpos = p - dest;
+    return ch;
+UnexpectedEnd:
+    ch = 1;
+    goto Return;
+IllegalEncoding:
+    ch = 2;
+    goto Return;
+IllegalSurrogate:
+    ch = 3;
+    goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+
+
+#if STRINGLIB_MAX_CHAR >= 0x80
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(utf16_encode)(const STRINGLIB_CHAR *in,
+                        Py_ssize_t len,
+                        unsigned short **outptr,
+                        int native_ordering)
+{
+    unsigned short *out = *outptr;
+    const STRINGLIB_CHAR *end = in + len;
+#if STRINGLIB_SIZEOF_CHAR == 1
+    if (native_ordering) {
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            *out++ = *in++;
+        }
+    } else {
+# define SWAB2(CH)  ((CH) << 8) /* high byte is zero */
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            out[0] = SWAB2(in[0]);
+            out[1] = SWAB2(in[1]);
+            out[2] = SWAB2(in[2]);
+            out[3] = SWAB2(in[3]);
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            Py_UCS4 ch = *in++;
+            *out++ = SWAB2((Py_UCS2)ch);
+        }
+#undef SWAB2
+    }
+    *outptr = out;
+    return len;
+#else
+    if (native_ordering) {
+#if STRINGLIB_MAX_CHAR < 0x10000
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+#endif
+        while (in < end) {
+            Py_UCS4 ch;
+            ch = *in++;
+            if (ch < 0xd800)
+                *out++ = ch;
+            else if (ch < 0xe000)
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+#if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
+                out[0] = Py_UNICODE_HIGH_SURROGATE(ch);
+                out[1] = Py_UNICODE_LOW_SURROGATE(ch);
+                out += 2;
+            }
+#endif
+            else
+                *out++ = ch;
+        }
+    } else {
+#define SWAB2(CH)  (((CH) << 8) | ((CH) >> 8))
+#if STRINGLIB_MAX_CHAR < 0x10000
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+            out[0] = SWAB2(in[0]);
+            out[1] = SWAB2(in[1]);
+            out[2] = SWAB2(in[2]);
+            out[3] = SWAB2(in[3]);
+            in += 4; out += 4;
+        }
+#endif
+        while (in < end) {
+            Py_UCS4 ch = *in++;
+            if (ch < 0xd800)
+                *out++ = SWAB2((Py_UCS2)ch);
+            else if (ch < 0xe000)
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+#if STRINGLIB_MAX_CHAR >= 0x10000
+            else if (ch >= 0x10000) {
+                Py_UCS2 ch1 = Py_UNICODE_HIGH_SURROGATE(ch);
+                Py_UCS2 ch2 = Py_UNICODE_LOW_SURROGATE(ch);
+                out[0] = SWAB2(ch1);
+                out[1] = SWAB2(ch2);
+                out += 2;
+            }
+#endif
+            else
+                *out++ = SWAB2((Py_UCS2)ch);
+        }
+#undef SWAB2
+    }
+    *outptr = out;
+    return len;
+  fail:
+    *outptr = out;
+    return len - (end - in + 1);
+#endif
+}
+
+static inline uint32_t
+STRINGLIB(SWAB4)(STRINGLIB_CHAR ch)
+{
+    uint32_t word = ch;
+#if STRINGLIB_SIZEOF_CHAR == 1
+    /* high bytes are zero */
+    return (word << 24);
+#elif STRINGLIB_SIZEOF_CHAR == 2
+    /* high bytes are zero */
+    return ((word & 0x00FFu) << 24) | ((word & 0xFF00u) << 8);
+#else
+    return _Py_bswap32(word);
+#endif
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(utf32_encode)(const STRINGLIB_CHAR *in,
+                        Py_ssize_t len,
+                        uint32_t **outptr,
+                        int native_ordering)
+{
+    uint32_t *out = *outptr;
+    const STRINGLIB_CHAR *end = in + len;
+    if (native_ordering) {
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+#if STRINGLIB_SIZEOF_CHAR > 1
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+#endif
+            out[0] = in[0];
+            out[1] = in[1];
+            out[2] = in[2];
+            out[3] = in[3];
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            Py_UCS4 ch;
+            ch = *in++;
+#if STRINGLIB_SIZEOF_CHAR > 1
+            if (Py_UNICODE_IS_SURROGATE(ch)) {
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+            }
+#endif
+            *out++ = ch;
+        }
+    } else {
+        const STRINGLIB_CHAR *unrolled_end = in + _Py_SIZE_ROUND_DOWN(len, 4);
+        while (in < unrolled_end) {
+#if STRINGLIB_SIZEOF_CHAR > 1
+            /* check if any character is a surrogate character */
+            if (((in[0] ^ 0xd800) &
+                 (in[1] ^ 0xd800) &
+                 (in[2] ^ 0xd800) &
+                 (in[3] ^ 0xd800) & 0xf800) == 0)
+                break;
+#endif
+            out[0] = STRINGLIB(SWAB4)(in[0]);
+            out[1] = STRINGLIB(SWAB4)(in[1]);
+            out[2] = STRINGLIB(SWAB4)(in[2]);
+            out[3] = STRINGLIB(SWAB4)(in[3]);
+            in += 4; out += 4;
+        }
+        while (in < end) {
+            Py_UCS4 ch = *in++;
+#if STRINGLIB_SIZEOF_CHAR > 1
+            if (Py_UNICODE_IS_SURROGATE(ch)) {
+                /* reject surrogate characters (U+D800-U+DFFF) */
+                goto fail;
+            }
+#endif
+            *out++ = STRINGLIB(SWAB4)(ch);
+        }
+    }
+    *outptr = out;
+    return len;
+#if STRINGLIB_SIZEOF_CHAR > 1
+  fail:
+    *outptr = out;
+    return len - (end - in + 1);
+#endif
+}
+
+#endif
diff --git a/contrib/tools/python3/Objects/stringlib/count.h b/contrib/tools/python3/Objects/stringlib/count.h
new file mode 100644
index 00000000000..e20edcd104b
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/count.h
@@ -0,0 +1,32 @@
+/* stringlib: count implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+// gh-97982: Implementing asciilib_count() is not worth it, FASTSEARCH() does
+// not specialize the code for ASCII strings. Use ucs1lib_count() for ASCII and
+// UCS1 strings: it's the same than asciilib_count().
+#if !STRINGLIB_IS_UNICODE || STRINGLIB_MAX_CHAR > 0x7Fu
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(count)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                Py_ssize_t maxcount)
+{
+    Py_ssize_t count;
+
+    if (str_len < 0)
+        return 0; /* start > len(str) */
+    if (sub_len == 0)
+        return (str_len < maxcount) ? str_len + 1 : maxcount;
+
+    count = FASTSEARCH(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
+
+    if (count < 0)
+        return 0; /* no match */
+
+    return count;
+}
+
+#endif
diff --git a/contrib/tools/python3/Objects/stringlib/ctype.h b/contrib/tools/python3/Objects/stringlib/ctype.h
new file mode 100644
index 00000000000..9b319b07d11
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/ctype.h
@@ -0,0 +1,116 @@
+#if STRINGLIB_IS_UNICODE
+# error "ctype.h only compatible with byte-wise strings"
+#endif
+
+#include "pycore_bytes_methods.h"
+
+static PyObject*
+stringlib_isspace(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isspace(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalpha(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isalpha(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isalnum(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isalnum(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isascii(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isascii(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isdigit(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isdigit(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_islower(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_islower(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_isupper(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_isupper(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+static PyObject*
+stringlib_istitle(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    return _Py_bytes_istitle(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+
+/* functions that return a new object partially translated by ctype funcs: */
+
+static PyObject*
+stringlib_lower(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_lower(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_upper(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_upper(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_title(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_title(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                 STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_capitalize(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_capitalize(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                      STRINGLIB_LEN(self));
+    return newobj;
+}
+
+static PyObject*
+stringlib_swapcase(PyObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject* newobj;
+    newobj = STRINGLIB_NEW(NULL, STRINGLIB_LEN(self));
+    if (!newobj)
+            return NULL;
+    _Py_bytes_swapcase(STRINGLIB_STR(newobj), STRINGLIB_STR(self),
+                    STRINGLIB_LEN(self));
+    return newobj;
+}
diff --git a/contrib/tools/python3/Objects/stringlib/eq.h b/contrib/tools/python3/Objects/stringlib/eq.h
new file mode 100644
index 00000000000..2eac4baf5ca
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/eq.h
@@ -0,0 +1,17 @@
+/* Fast unicode equal function optimized for dictobject.c and setobject.c */
+
+/* Return 1 if two unicode objects are equal, 0 if not.
+ * unicode_eq() is called when the hash of two unicode objects is equal.
+ */
+Py_LOCAL_INLINE(int)
+unicode_eq(PyObject *a, PyObject *b)
+{
+    if (PyUnicode_GET_LENGTH(a) != PyUnicode_GET_LENGTH(b))
+        return 0;
+    if (PyUnicode_GET_LENGTH(a) == 0)
+        return 1;
+    if (PyUnicode_KIND(a) != PyUnicode_KIND(b))
+        return 0;
+    return memcmp(PyUnicode_1BYTE_DATA(a), PyUnicode_1BYTE_DATA(b),
+                  PyUnicode_GET_LENGTH(a) * PyUnicode_KIND(a)) == 0;
+}
diff --git a/contrib/tools/python3/Objects/stringlib/fastsearch.h b/contrib/tools/python3/Objects/stringlib/fastsearch.h
new file mode 100644
index 00000000000..257b7bd6788
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/fastsearch.h
@@ -0,0 +1,804 @@
+/* stringlib: fastsearch implementation */
+
+#define STRINGLIB_FASTSEARCH_H
+
+/* fast search/count implementation, based on a mix between boyer-
+   moore and horspool, with a few more bells and whistles on the top.
+   for some more background, see:
+   https://web.archive.org/web/20201107074620/http://effbot.org/zone/stringlib.htm */
+
+/* note: fastsearch may access s[n], which isn't a problem when using
+   Python's ordinary string types, but may cause problems if you're
+   using this code in other contexts.  also, the count mode returns -1
+   if there cannot possibly be a match in the target string, and 0 if
+   it has actually checked for matches, but didn't find any.  callers
+   beware! */
+
+/* If the strings are long enough, use Crochemore and Perrin's Two-Way
+   algorithm, which has worst-case O(n) runtime and best-case O(n/k).
+   Also compute a table of shifts to achieve O(n/k) in more cases,
+   and often (data dependent) deduce larger shifts than pure C&P can
+   deduce. See stringlib_find_two_way_notes.txt in this folder for a
+   detailed explanation. */
+
+#define FAST_COUNT 0
+#define FAST_SEARCH 1
+#define FAST_RSEARCH 2
+
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch)     \
+    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+
+#ifdef STRINGLIB_FAST_MEMCHR
+#  define MEMCHR_CUT_OFF 15
+#else
+#  define MEMCHR_CUT_OFF 40
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(find_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+    const STRINGLIB_CHAR *p, *e;
+
+    p = s;
+    e = s + n;
+    if (n > MEMCHR_CUT_OFF) {
+#ifdef STRINGLIB_FAST_MEMCHR
+        p = STRINGLIB_FAST_MEMCHR(s, ch, n);
+        if (p != NULL)
+            return (p - s);
+        return -1;
+#else
+        /* use memchr if we can choose a needle without too many likely
+           false positives */
+        const STRINGLIB_CHAR *s1, *e1;
+        unsigned char needle = ch & 0xff;
+        /* If looking for a multiple of 256, we'd have too
+           many false positives looking for the '\0' byte in UCS2
+           and UCS4 representations. */
+        if (needle != 0) {
+            do {
+                void *candidate = memchr(p, needle,
+                                         (e - p) * sizeof(STRINGLIB_CHAR));
+                if (candidate == NULL)
+                    return -1;
+                s1 = p;
+                p = (const STRINGLIB_CHAR *)
+                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+                if (*p == ch)
+                    return (p - s);
+                /* False positive */
+                p++;
+                if (p - s1 > MEMCHR_CUT_OFF)
+                    continue;
+                if (e - p <= MEMCHR_CUT_OFF)
+                    break;
+                e1 = p + MEMCHR_CUT_OFF;
+                while (p != e1) {
+                    if (*p == ch)
+                        return (p - s);
+                    p++;
+                }
+            }
+            while (e - p > MEMCHR_CUT_OFF);
+        }
+#endif
+    }
+    while (p < e) {
+        if (*p == ch)
+            return (p - s);
+        p++;
+    }
+    return -1;
+}
+
+#undef MEMCHR_CUT_OFF
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+#  define MEMRCHR_CUT_OFF 15
+#else
+#  define MEMRCHR_CUT_OFF 40
+#endif
+
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_char)(const STRINGLIB_CHAR* s, Py_ssize_t n, STRINGLIB_CHAR ch)
+{
+    const STRINGLIB_CHAR *p;
+#ifdef HAVE_MEMRCHR
+    /* memrchr() is a GNU extension, available since glibc 2.1.91.  it
+       doesn't seem as optimized as memchr(), but is still quite
+       faster than our hand-written loop below. There is no wmemrchr
+       for 4-byte chars. */
+
+    if (n > MEMRCHR_CUT_OFF) {
+#if STRINGLIB_SIZEOF_CHAR == 1
+        p = memrchr(s, ch, n);
+        if (p != NULL)
+            return (p - s);
+        return -1;
+#else
+        /* use memrchr if we can choose a needle without too many likely
+           false positives */
+        const STRINGLIB_CHAR *s1;
+        Py_ssize_t n1;
+        unsigned char needle = ch & 0xff;
+        /* If looking for a multiple of 256, we'd have too
+           many false positives looking for the '\0' byte in UCS2
+           and UCS4 representations. */
+        if (needle != 0) {
+            do {
+                void *candidate = memrchr(s, needle,
+                                          n * sizeof(STRINGLIB_CHAR));
+                if (candidate == NULL)
+                    return -1;
+                n1 = n;
+                p = (const STRINGLIB_CHAR *)
+                        _Py_ALIGN_DOWN(candidate, sizeof(STRINGLIB_CHAR));
+                n = p - s;
+                if (*p == ch)
+                    return n;
+                /* False positive */
+                if (n1 - n > MEMRCHR_CUT_OFF)
+                    continue;
+                if (n <= MEMRCHR_CUT_OFF)
+                    break;
+                s1 = p - MEMRCHR_CUT_OFF;
+                while (p > s1) {
+                    p--;
+                    if (*p == ch)
+                        return (p - s);
+                }
+                n = p - s;
+            }
+            while (n > MEMRCHR_CUT_OFF);
+        }
+#endif
+    }
+#endif  /* HAVE_MEMRCHR */
+    p = s + n;
+    while (p > s) {
+        p--;
+        if (*p == ch)
+            return (p - s);
+    }
+    return -1;
+}
+
+#undef MEMRCHR_CUT_OFF
+
+/* Change to a 1 to see logging comments walk through the algorithm. */
+#if 0 && STRINGLIB_SIZEOF_CHAR == 1
+# define LOG(...) printf(__VA_ARGS__)
+# define LOG_STRING(s, n) printf("\"%.*s\"", (int)(n), s)
+# define LOG_LINEUP() do {                                         \
+    LOG("> "); LOG_STRING(haystack, len_haystack); LOG("\n> ");    \
+    LOG("%*s",(int)(window_last - haystack + 1 - len_needle), ""); \
+    LOG_STRING(needle, len_needle); LOG("\n");                     \
+} while(0)
+#else
+# define LOG(...)
+# define LOG_STRING(s, n)
+# define LOG_LINEUP()
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(_lex_search)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
+                       Py_ssize_t *return_period, int invert_alphabet)
+{
+    /* Do a lexicographic search. Essentially this:
+           >>> max(needle[i:] for i in range(len(needle)+1))
+       Also find the period of the right half.   */
+    Py_ssize_t max_suffix = 0;
+    Py_ssize_t candidate = 1;
+    Py_ssize_t k = 0;
+    // The period of the right half.
+    Py_ssize_t period = 1;
+
+    while (candidate + k < len_needle) {
+        // each loop increases candidate + k + max_suffix
+        STRINGLIB_CHAR a = needle[candidate + k];
+        STRINGLIB_CHAR b = needle[max_suffix + k];
+        // check if the suffix at candidate is better than max_suffix
+        if (invert_alphabet ? (b < a) : (a < b)) {
+            // Fell short of max_suffix.
+            // The next k + 1 characters are non-increasing
+            // from candidate, so they won't start a maximal suffix.
+            candidate += k + 1;
+            k = 0;
+            // We've ruled out any period smaller than what's
+            // been scanned since max_suffix.
+            period = candidate - max_suffix;
+        }
+        else if (a == b) {
+            if (k + 1 != period) {
+                // Keep scanning the equal strings
+                k++;
+            }
+            else {
+                // Matched a whole period.
+                // Start matching the next period.
+                candidate += period;
+                k = 0;
+            }
+        }
+        else {
+            // Did better than max_suffix, so replace it.
+            max_suffix = candidate;
+            candidate++;
+            k = 0;
+            period = 1;
+        }
+    }
+    *return_period = period;
+    return max_suffix;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle,
+                      Py_ssize_t len_needle,
+                      Py_ssize_t *return_period)
+{
+    /* Do a "critical factorization", making it so that:
+       >>> needle = (left := needle[:cut]) + (right := needle[cut:])
+       where the "local period" of the cut is maximal.
+
+       The local period of the cut is the minimal length of a string w
+       such that (left endswith w or w endswith left)
+       and (right startswith w or w startswith left).
+
+       The Critical Factorization Theorem says that this maximal local
+       period is the global period of the string.
+
+       Crochemore and Perrin (1991) show that this cut can be computed
+       as the later of two cuts: one that gives a lexicographically
+       maximal right half, and one that gives the same with the
+       with respect to a reversed alphabet-ordering.
+
+       This is what we want to happen:
+           >>> x = "GCAGAGAG"
+           >>> cut, period = factorize(x)
+           >>> x[:cut], (right := x[cut:])
+           ('GC', 'AGAGAG')
+           >>> period  # right half period
+           2
+           >>> right[period:] == right[:-period]
+           True
+
+       This is how the local period lines up in the above example:
+                GC | AGAGAG
+           AGAGAGC = AGAGAGC
+       The length of this minimal repetition is 7, which is indeed the
+       period of the original string. */
+
+    Py_ssize_t cut1, period1, cut2, period2, cut, period;
+    cut1 = STRINGLIB(_lex_search)(needle, len_needle, &period1, 0);
+    cut2 = STRINGLIB(_lex_search)(needle, len_needle, &period2, 1);
+
+    // Take the later cut.
+    if (cut1 > cut2) {
+        period = period1;
+        cut = cut1;
+    }
+    else {
+        period = period2;
+        cut = cut2;
+    }
+
+    LOG("split: "); LOG_STRING(needle, cut);
+    LOG(" + "); LOG_STRING(needle + cut, len_needle - cut);
+    LOG("\n");
+
+    *return_period = period;
+    return cut;
+}
+
+
+#define SHIFT_TYPE uint8_t
+#define MAX_SHIFT UINT8_MAX
+
+#define TABLE_SIZE_BITS 6u
+#define TABLE_SIZE (1U << TABLE_SIZE_BITS)
+#define TABLE_MASK (TABLE_SIZE - 1U)
+
+typedef struct STRINGLIB(_pre) {
+    const STRINGLIB_CHAR *needle;
+    Py_ssize_t len_needle;
+    Py_ssize_t cut;
+    Py_ssize_t period;
+    Py_ssize_t gap;
+    int is_periodic;
+    SHIFT_TYPE table[TABLE_SIZE];
+} STRINGLIB(prework);
+
+
+static void
+STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle,
+                       STRINGLIB(prework) *p)
+{
+    p->needle = needle;
+    p->len_needle = len_needle;
+    p->cut = STRINGLIB(_factorize)(needle, len_needle, &(p->period));
+    assert(p->period + p->cut <= len_needle);
+    p->is_periodic = (0 == memcmp(needle,
+                                  needle + p->period,
+                                  p->cut * STRINGLIB_SIZEOF_CHAR));
+    if (p->is_periodic) {
+        assert(p->cut <= len_needle/2);
+        assert(p->cut < p->period);
+        p->gap = 0; // unused
+    }
+    else {
+        // A lower bound on the period
+        p->period = Py_MAX(p->cut, len_needle - p->cut) + 1;
+        // The gap between the last character and the previous
+        // occurrence of an equivalent character (modulo TABLE_SIZE)
+        p->gap = len_needle;
+        STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK;
+        for (Py_ssize_t i = len_needle - 2; i >= 0; i--) {
+            STRINGLIB_CHAR x = needle[i] & TABLE_MASK;
+            if (x == last) {
+                p->gap = len_needle - 1 - i;
+                break;
+            }
+        }
+    }
+    // Fill up a compressed Boyer-Moore "Bad Character" table
+    Py_ssize_t not_found_shift = Py_MIN(len_needle, MAX_SHIFT);
+    for (Py_ssize_t i = 0; i < (Py_ssize_t)TABLE_SIZE; i++) {
+        p->table[i] = Py_SAFE_DOWNCAST(not_found_shift,
+                                       Py_ssize_t, SHIFT_TYPE);
+    }
+    for (Py_ssize_t i = len_needle - not_found_shift; i < len_needle; i++) {
+        SHIFT_TYPE shift = Py_SAFE_DOWNCAST(len_needle - 1 - i,
+                                            Py_ssize_t, SHIFT_TYPE);
+        p->table[needle[i] & TABLE_MASK] = shift;
+    }
+}
+
+static Py_ssize_t
+STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack,
+                    STRINGLIB(prework) *p)
+{
+    // Crochemore and Perrin's (1991) Two-Way algorithm.
+    // See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260
+    const Py_ssize_t len_needle = p->len_needle;
+    const Py_ssize_t cut = p->cut;
+    Py_ssize_t period = p->period;
+    const STRINGLIB_CHAR *const needle = p->needle;
+    const STRINGLIB_CHAR *window_last = haystack + len_needle - 1;
+    const STRINGLIB_CHAR *const haystack_end = haystack + len_haystack;
+    SHIFT_TYPE *table = p->table;
+    const STRINGLIB_CHAR *window;
+    LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack);
+
+    if (p->is_periodic) {
+        LOG("Needle is periodic.\n");
+        Py_ssize_t memory = 0;
+      periodicwindowloop:
+        while (window_last < haystack_end) {
+            assert(memory == 0);
+            for (;;) {
+                LOG_LINEUP();
+                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
+                window_last += shift;
+                if (shift == 0) {
+                    break;
+                }
+                if (window_last >= haystack_end) {
+                    return -1;
+                }
+                LOG("Horspool skip\n");
+            }
+          no_shift:
+            window = window_last - len_needle + 1;
+            assert((window[len_needle - 1] & TABLE_MASK) ==
+                   (needle[len_needle - 1] & TABLE_MASK));
+            Py_ssize_t i = Py_MAX(cut, memory);
+            for (; i < len_needle; i++) {
+                if (needle[i] != window[i]) {
+                    LOG("Right half does not match.\n");
+                    window_last += i - cut + 1;
+                    memory = 0;
+                    goto periodicwindowloop;
+                }
+            }
+            for (i = memory; i < cut; i++) {
+                if (needle[i] != window[i]) {
+                    LOG("Left half does not match.\n");
+                    window_last += period;
+                    memory = len_needle - period;
+                    if (window_last >= haystack_end) {
+                        return -1;
+                    }
+                    Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
+                    if (shift) {
+                        // A mismatch has been identified to the right
+                        // of where i will next start, so we can jump
+                        // at least as far as if the mismatch occurred
+                        // on the first comparison.
+                        Py_ssize_t mem_jump = Py_MAX(cut, memory) - cut + 1;
+                        LOG("Skip with Memory.\n");
+                        memory = 0;
+                        window_last += Py_MAX(shift, mem_jump);
+                        goto periodicwindowloop;
+                    }
+                    goto no_shift;
+                }
+            }
+            LOG("Found a match!\n");
+            return window - haystack;
+        }
+    }
+    else {
+        Py_ssize_t gap = p->gap;
+        period = Py_MAX(gap, period);
+        LOG("Needle is not periodic.\n");
+        Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap);
+      windowloop:
+        while (window_last < haystack_end) {
+            for (;;) {
+                LOG_LINEUP();
+                Py_ssize_t shift = table[(*window_last) & TABLE_MASK];
+                window_last += shift;
+                if (shift == 0) {
+                    break;
+                }
+                if (window_last >= haystack_end) {
+                    return -1;
+                }
+                LOG("Horspool skip\n");
+            }
+            window = window_last - len_needle + 1;
+            assert((window[len_needle - 1] & TABLE_MASK) ==
+                   (needle[len_needle - 1] & TABLE_MASK));
+            for (Py_ssize_t i = cut; i < gap_jump_end; i++) {
+                if (needle[i] != window[i]) {
+                    LOG("Early right half mismatch: jump by gap.\n");
+                    assert(gap >= i - cut + 1);
+                    window_last += gap;
+                    goto windowloop;
+                }
+            }
+            for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) {
+                if (needle[i] != window[i]) {
+                    LOG("Late right half mismatch.\n");
+                    assert(i - cut + 1 > gap);
+                    window_last += i - cut + 1;
+                    goto windowloop;
+                }
+            }
+            for (Py_ssize_t i = 0; i < cut; i++) {
+                if (needle[i] != window[i]) {
+                    LOG("Left half does not match.\n");
+                    window_last += period;
+                    goto windowloop;
+                }
+            }
+            LOG("Found a match!\n");
+            return window - haystack;
+        }
+    }
+    LOG("Not found. Returning -1.\n");
+    return -1;
+}
+
+
+static Py_ssize_t
+STRINGLIB(_two_way_find)(const STRINGLIB_CHAR *haystack,
+                         Py_ssize_t len_haystack,
+                         const STRINGLIB_CHAR *needle,
+                         Py_ssize_t len_needle)
+{
+    LOG("###### Finding \"%s\" in \"%s\".\n", needle, haystack);
+    STRINGLIB(prework) p;
+    STRINGLIB(_preprocess)(needle, len_needle, &p);
+    return STRINGLIB(_two_way)(haystack, len_haystack, &p);
+}
+
+
+static Py_ssize_t
+STRINGLIB(_two_way_count)(const STRINGLIB_CHAR *haystack,
+                          Py_ssize_t len_haystack,
+                          const STRINGLIB_CHAR *needle,
+                          Py_ssize_t len_needle,
+                          Py_ssize_t maxcount)
+{
+    LOG("###### Counting \"%s\" in \"%s\".\n", needle, haystack);
+    STRINGLIB(prework) p;
+    STRINGLIB(_preprocess)(needle, len_needle, &p);
+    Py_ssize_t index = 0, count = 0;
+    while (1) {
+        Py_ssize_t result;
+        result = STRINGLIB(_two_way)(haystack + index,
+                                     len_haystack - index, &p);
+        if (result == -1) {
+            return count;
+        }
+        count++;
+        if (count == maxcount) {
+            return maxcount;
+        }
+        index += result + len_needle;
+    }
+    return count;
+}
+
+#undef SHIFT_TYPE
+#undef NOT_FOUND
+#undef SHIFT_OVERFLOW
+#undef TABLE_SIZE_BITS
+#undef TABLE_SIZE
+#undef TABLE_MASK
+
+#undef LOG
+#undef LOG_STRING
+#undef LOG_LINEUP
+
+static inline Py_ssize_t
+STRINGLIB(default_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
+                        const STRINGLIB_CHAR* p, Py_ssize_t m,
+                        Py_ssize_t maxcount, int mode)
+{
+    const Py_ssize_t w = n - m;
+    Py_ssize_t mlast = m - 1, count = 0;
+    Py_ssize_t gap = mlast;
+    const STRINGLIB_CHAR last = p[mlast];
+    const STRINGLIB_CHAR *const ss = &s[mlast];
+
+    unsigned long mask = 0;
+    for (Py_ssize_t i = 0; i < mlast; i++) {
+        STRINGLIB_BLOOM_ADD(mask, p[i]);
+        if (p[i] == last) {
+            gap = mlast - i - 1;
+        }
+    }
+    STRINGLIB_BLOOM_ADD(mask, last);
+
+    for (Py_ssize_t i = 0; i <= w; i++) {
+        if (ss[i] == last) {
+            /* candidate match */
+            Py_ssize_t j;
+            for (j = 0; j < mlast; j++) {
+                if (s[i+j] != p[j]) {
+                    break;
+                }
+            }
+            if (j == mlast) {
+                /* got a match! */
+                if (mode != FAST_COUNT) {
+                    return i;
+                }
+                count++;
+                if (count == maxcount) {
+                    return maxcount;
+                }
+                i = i + mlast;
+                continue;
+            }
+            /* miss: check if next character is part of pattern */
+            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+                i = i + m;
+            }
+            else {
+                i = i + gap;
+            }
+        }
+        else {
+            /* skip: check if next character is part of pattern */
+            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+                i = i + m;
+            }
+        }
+    }
+    return mode == FAST_COUNT ? count : -1;
+}
+
+
+static Py_ssize_t
+STRINGLIB(adaptive_find)(const STRINGLIB_CHAR* s, Py_ssize_t n,
+                         const STRINGLIB_CHAR* p, Py_ssize_t m,
+                         Py_ssize_t maxcount, int mode)
+{
+    const Py_ssize_t w = n - m;
+    Py_ssize_t mlast = m - 1, count = 0;
+    Py_ssize_t gap = mlast;
+    Py_ssize_t hits = 0, res;
+    const STRINGLIB_CHAR last = p[mlast];
+    const STRINGLIB_CHAR *const ss = &s[mlast];
+
+    unsigned long mask = 0;
+    for (Py_ssize_t i = 0; i < mlast; i++) {
+        STRINGLIB_BLOOM_ADD(mask, p[i]);
+        if (p[i] == last) {
+            gap = mlast - i - 1;
+        }
+    }
+    STRINGLIB_BLOOM_ADD(mask, last);
+
+    for (Py_ssize_t i = 0; i <= w; i++) {
+        if (ss[i] == last) {
+            /* candidate match */
+            Py_ssize_t j;
+            for (j = 0; j < mlast; j++) {
+                if (s[i+j] != p[j]) {
+                    break;
+                }
+            }
+            if (j == mlast) {
+                /* got a match! */
+                if (mode != FAST_COUNT) {
+                    return i;
+                }
+                count++;
+                if (count == maxcount) {
+                    return maxcount;
+                }
+                i = i + mlast;
+                continue;
+            }
+            hits += j + 1;
+            if (hits > m / 4 && w - i > 2000) {
+                if (mode == FAST_SEARCH) {
+                    res = STRINGLIB(_two_way_find)(s + i, n - i, p, m);
+                    return res == -1 ? -1 : res + i;
+                }
+                else {
+                    res = STRINGLIB(_two_way_count)(s + i, n - i, p, m,
+                                                    maxcount - count);
+                    return res + count;
+                }
+            }
+            /* miss: check if next character is part of pattern */
+            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+                i = i + m;
+            }
+            else {
+                i = i + gap;
+            }
+        }
+        else {
+            /* skip: check if next character is part of pattern */
+            if (!STRINGLIB_BLOOM(mask, ss[i+1])) {
+                i = i + m;
+            }
+        }
+    }
+    return mode == FAST_COUNT ? count : -1;
+}
+
+
+static Py_ssize_t
+STRINGLIB(default_rfind)(const STRINGLIB_CHAR* s, Py_ssize_t n,
+                         const STRINGLIB_CHAR* p, Py_ssize_t m,
+                         Py_ssize_t maxcount, int mode)
+{
+    /* create compressed boyer-moore delta 1 table */
+    unsigned long mask = 0;
+    Py_ssize_t i, j, mlast = m - 1, skip = m - 1, w = n - m;
+
+    /* process pattern[0] outside the loop */
+    STRINGLIB_BLOOM_ADD(mask, p[0]);
+    /* process pattern[:0:-1] */
+    for (i = mlast; i > 0; i--) {
+        STRINGLIB_BLOOM_ADD(mask, p[i]);
+        if (p[i] == p[0]) {
+            skip = i - 1;
+        }
+    }
+
+    for (i = w; i >= 0; i--) {
+        if (s[i] == p[0]) {
+            /* candidate match */
+            for (j = mlast; j > 0; j--) {
+                if (s[i+j] != p[j]) {
+                    break;
+                }
+            }
+            if (j == 0) {
+                /* got a match! */
+                return i;
+            }
+            /* miss: check if previous character is part of pattern */
+            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
+                i = i - m;
+            }
+            else {
+                i = i - skip;
+            }
+        }
+        else {
+            /* skip: check if previous character is part of pattern */
+            if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1])) {
+                i = i - m;
+            }
+        }
+    }
+    return -1;
+}
+
+
+static inline Py_ssize_t
+STRINGLIB(count_char)(const STRINGLIB_CHAR *s, Py_ssize_t n,
+                      const STRINGLIB_CHAR p0, Py_ssize_t maxcount)
+{
+    Py_ssize_t i, count = 0;
+    for (i = 0; i < n; i++) {
+        if (s[i] == p0) {
+            count++;
+            if (count == maxcount) {
+                return maxcount;
+            }
+        }
+    }
+    return count;
+}
+
+
+Py_LOCAL_INLINE(Py_ssize_t)
+FASTSEARCH(const STRINGLIB_CHAR* s, Py_ssize_t n,
+           const STRINGLIB_CHAR* p, Py_ssize_t m,
+           Py_ssize_t maxcount, int mode)
+{
+    if (n < m || (mode == FAST_COUNT && maxcount == 0)) {
+        return -1;
+    }
+
+    /* look for special cases */
+    if (m <= 1) {
+        if (m <= 0) {
+            return -1;
+        }
+        /* use special case for 1-character strings */
+        if (mode == FAST_SEARCH)
+            return STRINGLIB(find_char)(s, n, p[0]);
+        else if (mode == FAST_RSEARCH)
+            return STRINGLIB(rfind_char)(s, n, p[0]);
+        else {
+            return STRINGLIB(count_char)(s, n, p[0], maxcount);
+        }
+    }
+
+    if (mode != FAST_RSEARCH) {
+        if (n < 2500 || (m < 100 && n < 30000) || m < 6) {
+            return STRINGLIB(default_find)(s, n, p, m, maxcount, mode);
+        }
+        else if ((m >> 2) * 3 < (n >> 2)) {
+            /* 33% threshold, but don't overflow. */
+            /* For larger problems where the needle isn't a huge
+               percentage of the size of the haystack, the relatively
+               expensive O(m) startup cost of the two-way algorithm
+               will surely pay off. */
+            if (mode == FAST_SEARCH) {
+                return STRINGLIB(_two_way_find)(s, n, p, m);
+            }
+            else {
+                return STRINGLIB(_two_way_count)(s, n, p, m, maxcount);
+            }
+        }
+        else {
+            /* To ensure that we have good worst-case behavior,
+               here's an adaptive version of the algorithm, where if
+               we match O(m) characters without any matches of the
+               entire needle, then we predict that the startup cost of
+               the two-way algorithm will probably be worth it. */
+            return STRINGLIB(adaptive_find)(s, n, p, m, maxcount, mode);
+        }
+    }
+    else {
+        /* FAST_RSEARCH */
+        return STRINGLIB(default_rfind)(s, n, p, m, maxcount, mode);
+    }
+}
+
diff --git a/contrib/tools/python3/Objects/stringlib/find.h b/contrib/tools/python3/Objects/stringlib/find.h
new file mode 100644
index 00000000000..509b9297396
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/find.h
@@ -0,0 +1,119 @@
+/* stringlib: find/index implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(find)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+               const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+               Py_ssize_t offset)
+{
+    Py_ssize_t pos;
+
+    assert(str_len >= 0);
+    if (sub_len == 0)
+        return offset;
+
+    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_SEARCH);
+
+    if (pos >= 0)
+        pos += offset;
+
+    return pos;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                Py_ssize_t offset)
+{
+    Py_ssize_t pos;
+
+    assert(str_len >= 0);
+    if (sub_len == 0)
+        return str_len + offset;
+
+    pos = FASTSEARCH(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
+
+    if (pos >= 0)
+        pos += offset;
+
+    return pos;
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(find_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                     Py_ssize_t start, Py_ssize_t end)
+{
+    return STRINGLIB(find)(str + start, end - start, sub, sub_len, start);
+}
+
+Py_LOCAL_INLINE(Py_ssize_t)
+STRINGLIB(rfind_slice)(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                      const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                      Py_ssize_t start, Py_ssize_t end)
+{
+    return STRINGLIB(rfind)(str + start, end - start, sub, sub_len, start);
+}
+
+#ifdef STRINGLIB_WANT_CONTAINS_OBJ
+
+Py_LOCAL_INLINE(int)
+STRINGLIB(contains_obj)(PyObject* str, PyObject* sub)
+{
+    return STRINGLIB(find)(
+        STRINGLIB_STR(str), STRINGLIB_LEN(str),
+        STRINGLIB_STR(sub), STRINGLIB_LEN(sub), 0
+        ) != -1;
+}
+
+#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
+
+/*
+This function is a helper for the "find" family (find, rfind, index,
+rindex) and for count, startswith and endswith, because they all have
+the same behaviour for the arguments.
+
+It does not touch the variables received until it knows everything
+is ok.
+*/
+
+#define FORMAT_BUFFER_SIZE 50
+
+Py_LOCAL_INLINE(int)
+STRINGLIB(parse_args_finds)(const char * function_name, PyObject *args,
+                           PyObject **subobj,
+                           Py_ssize_t *start, Py_ssize_t *end)
+{
+    PyObject *tmp_subobj;
+    Py_ssize_t tmp_start = 0;
+    Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
+    PyObject *obj_start=Py_None, *obj_end=Py_None;
+    char format[FORMAT_BUFFER_SIZE] = "O|OO:";
+    size_t len = strlen(format);
+
+    strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
+    format[FORMAT_BUFFER_SIZE - 1] = '\0';
+
+    if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
+        return 0;
+
+    /* To support None in "start" and "end" arguments, meaning
+       the same as if they were not passed.
+    */
+    if (obj_start != Py_None)
+        if (!_PyEval_SliceIndex(obj_start, &tmp_start))
+            return 0;
+    if (obj_end != Py_None)
+        if (!_PyEval_SliceIndex(obj_end, &tmp_end))
+            return 0;
+
+    *start = tmp_start;
+    *end = tmp_end;
+    *subobj = tmp_subobj;
+    return 1;
+}
+
+#undef FORMAT_BUFFER_SIZE
diff --git a/contrib/tools/python3/Objects/stringlib/find_max_char.h b/contrib/tools/python3/Objects/stringlib/find_max_char.h
new file mode 100644
index 00000000000..b9ffdfc2e35
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/find_max_char.h
@@ -0,0 +1,132 @@
+/* Finding the optimal width of unicode characters in a buffer */
+
+#if !STRINGLIB_IS_UNICODE
+# error "find_max_char.h is specific to Unicode"
+#endif
+
+/* Mask to quickly check whether a C 'size_t' contains a
+   non-ASCII, UTF8-encoded char. */
+#if (SIZEOF_SIZE_T == 8)
+# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
+#elif (SIZEOF_SIZE_T == 4)
+# define UCS1_ASCII_CHAR_MASK 0x80808080U
+#else
+# error C 'size_t' size should be either 4 or 8!
+#endif
+
+#if STRINGLIB_SIZEOF_CHAR == 1
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
+{
+    const unsigned char *p = (const unsigned char *) begin;
+
+    while (p < end) {
+        if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
+            /* Help register allocation */
+            const unsigned char *_p = p;
+            while (_p + SIZEOF_SIZE_T <= end) {
+                size_t value = *(const size_t *) _p;
+                if (value & UCS1_ASCII_CHAR_MASK)
+                    return 255;
+                _p += SIZEOF_SIZE_T;
+            }
+            p = _p;
+            if (p == end)
+                break;
+        }
+        if (*p++ & 0x80)
+            return 255;
+    }
+    return 127;
+}
+
+#undef ASCII_CHAR_MASK
+
+#else /* STRINGLIB_SIZEOF_CHAR == 1 */
+
+#define MASK_ASCII 0xFFFFFF80
+#define MASK_UCS1 0xFFFFFF00
+#define MASK_UCS2 0xFFFF0000
+
+#define MAX_CHAR_ASCII 0x7f
+#define MAX_CHAR_UCS1  0xff
+#define MAX_CHAR_UCS2  0xffff
+#define MAX_CHAR_UCS4  0x10ffff
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
+{
+#if STRINGLIB_SIZEOF_CHAR == 2
+    const Py_UCS4 mask_limit = MASK_UCS1;
+    const Py_UCS4 max_char_limit = MAX_CHAR_UCS2;
+#elif STRINGLIB_SIZEOF_CHAR == 4
+    const Py_UCS4 mask_limit = MASK_UCS2;
+    const Py_UCS4 max_char_limit = MAX_CHAR_UCS4;
+#else
+#error Invalid STRINGLIB_SIZEOF_CHAR (must be 1, 2 or 4)
+#endif
+    Py_UCS4 mask;
+    Py_ssize_t n = end - begin;
+    const STRINGLIB_CHAR *p = begin;
+    const STRINGLIB_CHAR *unrolled_end = begin + _Py_SIZE_ROUND_DOWN(n, 4);
+    Py_UCS4 max_char;
+
+    max_char = MAX_CHAR_ASCII;
+    mask = MASK_ASCII;
+    while (p < unrolled_end) {
+        STRINGLIB_CHAR bits = p[0] | p[1] | p[2] | p[3];
+        if (bits & mask) {
+            if (mask == mask_limit) {
+                /* Limit reached */
+                return max_char_limit;
+            }
+            if (mask == MASK_ASCII) {
+                max_char = MAX_CHAR_UCS1;
+                mask = MASK_UCS1;
+            }
+            else {
+                /* mask can't be MASK_UCS2 because of mask_limit above */
+                assert(mask == MASK_UCS1);
+                max_char = MAX_CHAR_UCS2;
+                mask = MASK_UCS2;
+            }
+            /* We check the new mask on the same chars in the next iteration */
+            continue;
+        }
+        p += 4;
+    }
+    while (p < end) {
+        if (p[0] & mask) {
+            if (mask == mask_limit) {
+                /* Limit reached */
+                return max_char_limit;
+            }
+            if (mask == MASK_ASCII) {
+                max_char = MAX_CHAR_UCS1;
+                mask = MASK_UCS1;
+            }
+            else {
+                /* mask can't be MASK_UCS2 because of mask_limit above */
+                assert(mask == MASK_UCS1);
+                max_char = MAX_CHAR_UCS2;
+                mask = MASK_UCS2;
+            }
+            /* We check the new mask on the same chars in the next iteration */
+            continue;
+        }
+        p++;
+    }
+    return max_char;
+}
+
+#undef MASK_ASCII
+#undef MASK_UCS1
+#undef MASK_UCS2
+#undef MAX_CHAR_ASCII
+#undef MAX_CHAR_UCS1
+#undef MAX_CHAR_UCS2
+#undef MAX_CHAR_UCS4
+
+#endif /* STRINGLIB_SIZEOF_CHAR == 1 */
+
diff --git a/contrib/tools/python3/Objects/stringlib/join.h b/contrib/tools/python3/Objects/stringlib/join.h
new file mode 100644
index 00000000000..de6bd83ffe4
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/join.h
@@ -0,0 +1,162 @@
+/* stringlib: bytes joining implementation */
+
+#if STRINGLIB_IS_UNICODE
+#error join.h only compatible with byte-wise strings
+#endif
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(bytes_join)(PyObject *sep, PyObject *iterable)
+{
+    const char *sepstr = STRINGLIB_STR(sep);
+    Py_ssize_t seplen = STRINGLIB_LEN(sep);
+    PyObject *res = NULL;
+    char *p;
+    Py_ssize_t seqlen = 0;
+    Py_ssize_t sz = 0;
+    Py_ssize_t i, nbufs;
+    PyObject *seq, *item;
+    Py_buffer *buffers = NULL;
+#define NB_STATIC_BUFFERS 10
+    Py_buffer static_buffers[NB_STATIC_BUFFERS];
+#define GIL_THRESHOLD 1048576
+    int drop_gil = 1;
+    PyThreadState *save = NULL;
+
+    seq = PySequence_Fast(iterable, "can only join an iterable");
+    if (seq == NULL) {
+        return NULL;
+    }
+
+    seqlen = PySequence_Fast_GET_SIZE(seq);
+    if (seqlen == 0) {
+        Py_DECREF(seq);
+        return STRINGLIB_NEW(NULL, 0);
+    }
+#if !STRINGLIB_MUTABLE
+    if (seqlen == 1) {
+        item = PySequence_Fast_GET_ITEM(seq, 0);
+        if (STRINGLIB_CHECK_EXACT(item)) {
+            Py_INCREF(item);
+            Py_DECREF(seq);
+            return item;
+        }
+    }
+#endif
+    if (seqlen > NB_STATIC_BUFFERS) {
+        buffers = PyMem_NEW(Py_buffer, seqlen);
+        if (buffers == NULL) {
+            Py_DECREF(seq);
+            PyErr_NoMemory();
+            return NULL;
+        }
+    }
+    else {
+        buffers = static_buffers;
+    }
+
+    /* Here is the general case.  Do a pre-pass to figure out the total
+     * amount of space we'll need (sz), and see whether all arguments are
+     * bytes-like.
+     */
+    for (i = 0, nbufs = 0; i < seqlen; i++) {
+        Py_ssize_t itemlen;
+        item = PySequence_Fast_GET_ITEM(seq, i);
+        if (PyBytes_CheckExact(item)) {
+            /* Fast path. */
+            buffers[i].obj = Py_NewRef(item);
+            buffers[i].buf = PyBytes_AS_STRING(item);
+            buffers[i].len = PyBytes_GET_SIZE(item);
+        }
+        else {
+            if (PyObject_GetBuffer(item, &buffers[i], PyBUF_SIMPLE) != 0) {
+                PyErr_Format(PyExc_TypeError,
+                             "sequence item %zd: expected a bytes-like object, "
+                             "%.80s found",
+                             i, Py_TYPE(item)->tp_name);
+                goto error;
+            }
+            /* If the backing objects are mutable, then dropping the GIL
+             * opens up race conditions where another thread tries to modify
+             * the object which we hold a buffer on it. Such code has data
+             * races anyway, but this is a conservative approach that avoids
+             * changing the behaviour of that data race.
+             */
+            drop_gil = 0;
+        }
+        nbufs = i + 1;  /* for error cleanup */
+        itemlen = buffers[i].len;
+        if (itemlen > PY_SSIZE_T_MAX - sz) {
+            PyErr_SetString(PyExc_OverflowError,
+                            "join() result is too long");
+            goto error;
+        }
+        sz += itemlen;
+        if (i != 0) {
+            if (seplen > PY_SSIZE_T_MAX - sz) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "join() result is too long");
+                goto error;
+            }
+            sz += seplen;
+        }
+        if (seqlen != PySequence_Fast_GET_SIZE(seq)) {
+            PyErr_SetString(PyExc_RuntimeError,
+                            "sequence changed size during iteration");
+            goto error;
+        }
+    }
+
+    /* Allocate result space. */
+    res = STRINGLIB_NEW(NULL, sz);
+    if (res == NULL)
+        goto error;
+
+    /* Catenate everything. */
+    p = STRINGLIB_STR(res);
+    if (sz < GIL_THRESHOLD) {
+        drop_gil = 0;   /* Benefits are likely outweighed by the overheads */
+    }
+    if (drop_gil) {
+        save = PyEval_SaveThread();
+    }
+    if (!seplen) {
+        /* fast path */
+        for (i = 0; i < nbufs; i++) {
+            Py_ssize_t n = buffers[i].len;
+            char *q = buffers[i].buf;
+            memcpy(p, q, n);
+            p += n;
+        }
+    }
+    else {
+        for (i = 0; i < nbufs; i++) {
+            Py_ssize_t n;
+            char *q;
+            if (i) {
+                memcpy(p, sepstr, seplen);
+                p += seplen;
+            }
+            n = buffers[i].len;
+            q = buffers[i].buf;
+            memcpy(p, q, n);
+            p += n;
+        }
+    }
+    if (drop_gil) {
+        PyEval_RestoreThread(save);
+    }
+    goto done;
+
+error:
+    res = NULL;
+done:
+    Py_DECREF(seq);
+    for (i = 0; i < nbufs; i++)
+        PyBuffer_Release(&buffers[i]);
+    if (buffers != static_buffers)
+        PyMem_Free(buffers);
+    return res;
+}
+
+#undef NB_STATIC_BUFFERS
+#undef GIL_THRESHOLD
diff --git a/contrib/tools/python3/Objects/stringlib/localeutil.h b/contrib/tools/python3/Objects/stringlib/localeutil.h
new file mode 100644
index 00000000000..d77715ec0de
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/localeutil.h
@@ -0,0 +1,82 @@
+/* _PyUnicode_InsertThousandsGrouping() helper functions */
+
+typedef struct {
+    const char *grouping;
+    char previous;
+    Py_ssize_t i; /* Where we're currently pointing in grouping. */
+} GroupGenerator;
+
+
+static void
+GroupGenerator_init(GroupGenerator *self, const char *grouping)
+{
+    self->grouping = grouping;
+    self->i = 0;
+    self->previous = 0;
+}
+
+
+/* Returns the next grouping, or 0 to signify end. */
+static Py_ssize_t
+GroupGenerator_next(GroupGenerator *self)
+{
+    /* Note that we don't really do much error checking here. If a
+       grouping string contains just CHAR_MAX, for example, then just
+       terminate the generator. That shouldn't happen, but at least we
+       fail gracefully. */
+    switch (self->grouping[self->i]) {
+    case 0:
+        return self->previous;
+    case CHAR_MAX:
+        /* Stop the generator. */
+        return 0;
+    default: {
+        char ch = self->grouping[self->i];
+        self->previous = ch;
+        self->i++;
+        return (Py_ssize_t)ch;
+    }
+    }
+}
+
+
+/* Fill in some digits, leading zeros, and thousands separator. All
+   are optional, depending on when we're called. */
+static void
+InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
+                             PyObject *digits, Py_ssize_t *digits_pos,
+                             Py_ssize_t n_chars, Py_ssize_t n_zeros,
+                             PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
+                             Py_UCS4 *maxchar)
+{
+    if (!writer) {
+        /* if maxchar > 127, maxchar is already set */
+        if (*maxchar == 127 && thousands_sep) {
+            Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
+            *maxchar = Py_MAX(*maxchar, maxchar2);
+        }
+        return;
+    }
+
+    if (thousands_sep) {
+        *buffer_pos -= thousands_sep_len;
+
+        /* Copy the thousands_sep chars into the buffer. */
+        _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+                                      thousands_sep, 0,
+                                      thousands_sep_len);
+    }
+
+    *buffer_pos -= n_chars;
+    *digits_pos -= n_chars;
+    _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+                                  digits, *digits_pos,
+                                  n_chars);
+
+    if (n_zeros) {
+        *buffer_pos -= n_zeros;
+        int kind = PyUnicode_KIND(writer->buffer);
+        void *data = PyUnicode_DATA(writer->buffer);
+        unicode_fill(kind, data, '0', *buffer_pos, n_zeros);
+    }
+}
diff --git a/contrib/tools/python3/Objects/stringlib/partition.h b/contrib/tools/python3/Objects/stringlib/partition.h
new file mode 100644
index 00000000000..bcc217697b2
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/partition.h
@@ -0,0 +1,125 @@
+/* stringlib: partition implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#  error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+#if !STRINGLIB_MUTABLE && !defined(STRINGLIB_GET_EMPTY)
+#  error "STRINGLIB_GET_EMPTY must be defined if STRINGLIB_MUTABLE is zero"
+#endif
+
+
+Py_LOCAL_INLINE(PyObject*)
+STRINGLIB(partition)(PyObject* str_obj,
+                    const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                    PyObject* sep_obj,
+                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
+{
+    PyObject* out;
+    Py_ssize_t pos;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+
+    out = PyTuple_New(3);
+    if (!out)
+        return NULL;
+
+    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_SEARCH);
+
+    if (pos < 0) {
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
+
+        if (PyErr_Occurred()) {
+            Py_DECREF(out);
+            return NULL;
+        }
+#else
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+        PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
+        assert(empty != NULL);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 1, empty);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 2, empty);
+#endif
+        return out;
+    }
+
+    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
+    Py_INCREF(sep_obj);
+    PyTuple_SET_ITEM(out, 1, sep_obj);
+    pos += sep_len;
+    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
+
+    if (PyErr_Occurred()) {
+        Py_DECREF(out);
+        return NULL;
+    }
+
+    return out;
+}
+
+Py_LOCAL_INLINE(PyObject*)
+STRINGLIB(rpartition)(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     PyObject* sep_obj,
+                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
+{
+    PyObject* out;
+    Py_ssize_t pos;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+
+    out = PyTuple_New(3);
+    if (!out)
+        return NULL;
+
+    pos = FASTSEARCH(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
+
+    if (pos < 0) {
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
+
+        if (PyErr_Occurred()) {
+            Py_DECREF(out);
+            return NULL;
+        }
+#else
+        PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
+        assert(empty != NULL);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 0, empty);
+        Py_INCREF(empty);
+        PyTuple_SET_ITEM(out, 1, empty);
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
+#endif
+        return out;
+    }
+
+    PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, pos));
+    Py_INCREF(sep_obj);
+    PyTuple_SET_ITEM(out, 1, sep_obj);
+    pos += sep_len;
+    PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str + pos, str_len - pos));
+
+    if (PyErr_Occurred()) {
+        Py_DECREF(out);
+        return NULL;
+    }
+
+    return out;
+}
+
diff --git a/contrib/tools/python3/Objects/stringlib/replace.h b/contrib/tools/python3/Objects/stringlib/replace.h
new file mode 100644
index 00000000000..123c9f850f5
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/replace.h
@@ -0,0 +1,53 @@
+/* stringlib: replace implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+Py_LOCAL_INLINE(void)
+STRINGLIB(replace_1char_inplace)(STRINGLIB_CHAR* s, STRINGLIB_CHAR* end,
+                                 Py_UCS4 u1, Py_UCS4 u2, Py_ssize_t maxcount)
+{
+    *s = u2;
+    while (--maxcount && ++s != end) {
+        /* Find the next character to be replaced.
+
+           If it occurs often, it is faster to scan for it using an inline
+           loop.  If it occurs seldom, it is faster to scan for it using a
+           function call; the overhead of the function call is amortized
+           across the many characters that call covers.  We start with an
+           inline loop and use a heuristic to determine whether to fall back
+           to a function call. */
+        if (*s != u1) {
+            int attempts = 10;
+            /* search u1 in a dummy loop */
+            while (1) {
+                if (++s == end)
+                    return;
+                if (*s == u1)
+                    break;
+                if (!--attempts) {
+                    /* if u1 was not found for attempts iterations,
+                       use FASTSEARCH() or memchr() */
+#ifdef STRINGLIB_FAST_MEMCHR
+                    s++;
+                    s = STRINGLIB_FAST_MEMCHR(s, u1, end - s);
+                    if (s == NULL)
+                        return;
+#else
+                    Py_ssize_t i;
+                    STRINGLIB_CHAR ch1 = (STRINGLIB_CHAR) u1;
+                    s++;
+                    i = FASTSEARCH(s, end - s, &ch1, 1, 0, FAST_SEARCH);
+                    if (i < 0)
+                        return;
+                    s += i;
+#endif
+                    /* restart the dummy loop */
+                    break;
+                }
+            }
+        }
+        *s = u2;
+    }
+}
diff --git a/contrib/tools/python3/Objects/stringlib/split.h b/contrib/tools/python3/Objects/stringlib/split.h
new file mode 100644
index 00000000000..0c11b7214e9
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/split.h
@@ -0,0 +1,390 @@
+/* stringlib: split implementation */
+
+#ifndef STRINGLIB_FASTSEARCH_H
+#error must include "stringlib/fastsearch.h" before including this module
+#endif
+
+/* Overallocate the initial list to reduce the number of reallocs for small
+   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
+   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
+   text (roughly 11 words per line) and field delimited data (usually 1-10
+   fields).  For large strings the split algorithms are bandwidth limited
+   so increasing the preallocation likely will not improve things.*/
+
+#define MAX_PREALLOC 12
+
+/* 5 splits gives 6 elements */
+#define PREALLOC_SIZE(maxsplit) \
+    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
+
+#define SPLIT_APPEND(data, left, right)         \
+    sub = STRINGLIB_NEW((data) + (left),        \
+                        (right) - (left));      \
+    if (sub == NULL)                            \
+        goto onError;                           \
+    if (PyList_Append(list, sub)) {             \
+        Py_DECREF(sub);                         \
+        goto onError;                           \
+    }                                           \
+    else                                        \
+        Py_DECREF(sub);
+
+#define SPLIT_ADD(data, left, right) {          \
+    sub = STRINGLIB_NEW((data) + (left),        \
+                        (right) - (left));      \
+    if (sub == NULL)                            \
+        goto onError;                           \
+    if (count < MAX_PREALLOC) {                 \
+        PyList_SET_ITEM(list, count, sub);      \
+    } else {                                    \
+        if (PyList_Append(list, sub)) {         \
+            Py_DECREF(sub);                     \
+            goto onError;                       \
+        }                                       \
+        else                                    \
+            Py_DECREF(sub);                     \
+    }                                           \
+    count++; }
+
+
+/* Always force the list to the expected size. */
+#define FIX_PREALLOC_SIZE(list) Py_SET_SIZE(list, count)
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(split_whitespace)(PyObject* str_obj,
+                           const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                           Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while (maxcount-- > 0) {
+        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
+            i++;
+        if (i == str_len) break;
+        j = i; i++;
+        while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
+            i++;
+#if !STRINGLIB_MUTABLE
+        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No whitespace in str_obj, so just use it as list[0] */
+            Py_INCREF(str_obj);
+            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+            count++;
+            break;
+        }
+#endif
+        SPLIT_ADD(str, j, i);
+    }
+
+    if (i < str_len) {
+        /* Only occurs when maxcount was reached */
+        /* Skip any remaining whitespace and copy to end of string */
+        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
+            i++;
+        if (i != str_len)
+            SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(split_char)(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     const STRINGLIB_CHAR ch,
+                     Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while ((j < str_len) && (maxcount-- > 0)) {
+        for(; j < str_len; j++) {
+            /* I found that using memchr makes no difference */
+            if (str[j] == ch) {
+                SPLIT_ADD(str, i, j);
+                i = j = j + 1;
+                break;
+            }
+        }
+    }
+#if !STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* ch not in str_obj, so just use str_obj as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    if (i <= str_len) {
+        SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(split)(PyObject* str_obj,
+                const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
+                Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, pos, count=0;
+    PyObject *list, *sub;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+    else if (sep_len == 1)
+        return STRINGLIB(split_char)(str_obj, str, str_len, sep[0], maxcount);
+
+    list = PyList_New(PREALLOC_SIZE(maxcount));
+    if (list == NULL)
+        return NULL;
+
+    i = j = 0;
+    while (maxcount-- > 0) {
+        pos = FASTSEARCH(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
+        if (pos < 0)
+            break;
+        j = i + pos;
+        SPLIT_ADD(str, i, j);
+        i = j + sep_len;
+    }
+#if !STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* No match in str_obj, so just use it as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    {
+        SPLIT_ADD(str, i, str_len);
+    }
+    FIX_PREALLOC_SIZE(list);
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(rsplit_whitespace)(PyObject* str_obj,
+                            const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                            Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = str_len - 1;
+    while (maxcount-- > 0) {
+        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
+            i--;
+        if (i < 0) break;
+        j = i; i--;
+        while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
+            i--;
+#if !STRINGLIB_MUTABLE
+        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No whitespace in str_obj, so just use it as list[0] */
+            Py_INCREF(str_obj);
+            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+            count++;
+            break;
+        }
+#endif
+        SPLIT_ADD(str, i + 1, j + 1);
+    }
+
+    if (i >= 0) {
+        /* Only occurs when maxcount was reached */
+        /* Skip any remaining whitespace and copy to beginning of string */
+        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
+            i--;
+        if (i >= 0)
+            SPLIT_ADD(str, 0, i + 1);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(rsplit_char)(PyObject* str_obj,
+                      const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                      const STRINGLIB_CHAR ch,
+                      Py_ssize_t maxcount)
+{
+    Py_ssize_t i, j, count=0;
+    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    i = j = str_len - 1;
+    while ((i >= 0) && (maxcount-- > 0)) {
+        for(; i >= 0; i--) {
+            if (str[i] == ch) {
+                SPLIT_ADD(str, i + 1, j + 1);
+                j = i = i - 1;
+                break;
+            }
+        }
+    }
+#if !STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* ch not in str_obj, so just use str_obj as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    if (j >= -1) {
+        SPLIT_ADD(str, 0, j + 1);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(rsplit)(PyObject* str_obj,
+                 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
+                 Py_ssize_t maxcount)
+{
+    Py_ssize_t j, pos, count=0;
+    PyObject *list, *sub;
+
+    if (sep_len == 0) {
+        PyErr_SetString(PyExc_ValueError, "empty separator");
+        return NULL;
+    }
+    else if (sep_len == 1)
+        return STRINGLIB(rsplit_char)(str_obj, str, str_len, sep[0], maxcount);
+
+    list = PyList_New(PREALLOC_SIZE(maxcount));
+    if (list == NULL)
+        return NULL;
+
+    j = str_len;
+    while (maxcount-- > 0) {
+        pos = FASTSEARCH(str, j, sep, sep_len, -1, FAST_RSEARCH);
+        if (pos < 0)
+            break;
+        SPLIT_ADD(str, pos + sep_len, j);
+        j = pos;
+    }
+#if !STRINGLIB_MUTABLE
+    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
+        /* No match in str_obj, so just use it as list[0] */
+        Py_INCREF(str_obj);
+        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
+        count++;
+    } else
+#endif
+    {
+        SPLIT_ADD(str, 0, j);
+    }
+    FIX_PREALLOC_SIZE(list);
+    if (PyList_Reverse(list) < 0)
+        goto onError;
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
+Py_LOCAL_INLINE(PyObject *)
+STRINGLIB(splitlines)(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     int keepends)
+{
+    /* This does not use the preallocated list because splitlines is
+       usually run with hundreds of newlines.  The overhead of
+       switching between PyList_SET_ITEM and append causes about a
+       2-3% slowdown for that common case.  A smarter implementation
+       could move the if check out, so the SET_ITEMs are done first
+       and the appends only done when the prealloc buffer is full.
+       That's too much work for little gain.*/
+
+    Py_ssize_t i;
+    Py_ssize_t j;
+    PyObject *list = PyList_New(0);
+    PyObject *sub;
+
+    if (list == NULL)
+        return NULL;
+
+    for (i = j = 0; i < str_len; ) {
+        Py_ssize_t eol;
+
+        /* Find a line and append it */
+        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
+            i++;
+
+        /* Skip the line break reading CRLF as one line break */
+        eol = i;
+        if (i < str_len) {
+            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
+                i += 2;
+            else
+                i++;
+            if (keepends)
+                eol = i;
+        }
+#if !STRINGLIB_MUTABLE
+        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
+            /* No linebreak in str_obj, so just use it as list[0] */
+            if (PyList_Append(list, str_obj))
+                goto onError;
+            break;
+        }
+#endif
+        SPLIT_APPEND(str, j, eol);
+        j = i;
+    }
+    return list;
+
+  onError:
+    Py_DECREF(list);
+    return NULL;
+}
+
diff --git a/contrib/tools/python3/Objects/stringlib/stringdefs.h b/contrib/tools/python3/Objects/stringlib/stringdefs.h
new file mode 100644
index 00000000000..484b98b7291
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/stringdefs.h
@@ -0,0 +1,28 @@
+#ifndef STRINGLIB_STRINGDEFS_H
+#define STRINGLIB_STRINGDEFS_H
+
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     0
+
+#define FASTSEARCH fastsearch
+#define STRINGLIB(F) stringlib_##F
+#define STRINGLIB_OBJECT         PyBytesObject
+#define STRINGLIB_SIZEOF_CHAR    1
+#define STRINGLIB_CHAR           char
+#define STRINGLIB_TYPE_NAME      "string"
+#define STRINGLIB_PARSE_CODE     "S"
+#define STRINGLIB_ISSPACE        Py_ISSPACE
+#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
+#define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
+#define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
+#define STRINGLIB_STR            PyBytes_AS_STRING
+#define STRINGLIB_LEN            PyBytes_GET_SIZE
+#define STRINGLIB_NEW            PyBytes_FromStringAndSize
+#define STRINGLIB_CHECK          PyBytes_Check
+#define STRINGLIB_CHECK_EXACT    PyBytes_CheckExact
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_Repr
+#define STRINGLIB_FAST_MEMCHR    memchr
+#endif /* !STRINGLIB_STRINGDEFS_H */
diff --git a/contrib/tools/python3/Objects/stringlib/transmogrify.h b/contrib/tools/python3/Objects/stringlib/transmogrify.h
new file mode 100644
index 00000000000..71099bb586e
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/transmogrify.h
@@ -0,0 +1,739 @@
+#if STRINGLIB_IS_UNICODE
+# error "transmogrify.h only compatible with byte-wise strings"
+#endif
+
+/* the more complicated methods.  parts of these should be pulled out into the
+   shared code in bytes_methods.c to cut down on duplicate code bloat.  */
+
+/*[clinic input]
+class B "PyObject *" "&PyType_Type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=2935558188d97c76]*/
+
+#include "clinic/transmogrify.h.h"
+
+static inline PyObject *
+return_self(PyObject *self)
+{
+#if !STRINGLIB_MUTABLE
+    if (STRINGLIB_CHECK_EXACT(self)) {
+        return Py_NewRef(self);
+    }
+#endif
+    return STRINGLIB_NEW(STRINGLIB_STR(self), STRINGLIB_LEN(self));
+}
+
+/*[clinic input]
+B.expandtabs as stringlib_expandtabs
+
+    tabsize: int = 8
+
+Return a copy where all tab characters are expanded using spaces.
+
+If tabsize is not given, a tab size of 8 characters is assumed.
+[clinic start generated code]*/
+
+static PyObject *
+stringlib_expandtabs_impl(PyObject *self, int tabsize)
+/*[clinic end generated code: output=069cb7fae72e4c2b input=3c6d3b12aa3ccbea]*/
+{
+    const char *e, *p;
+    char *q;
+    Py_ssize_t i, j;
+    PyObject *u;
+
+    /* First pass: determine size of output string */
+    i = j = 0;
+    e = STRINGLIB_STR(self) + STRINGLIB_LEN(self);
+    for (p = STRINGLIB_STR(self); p < e; p++) {
+        if (*p == '\t') {
+            if (tabsize > 0) {
+                Py_ssize_t incr = tabsize - (j % tabsize);
+                if (j > PY_SSIZE_T_MAX - incr)
+                    goto overflow;
+                j += incr;
+            }
+        }
+        else {
+            if (j > PY_SSIZE_T_MAX - 1)
+                goto overflow;
+            j++;
+            if (*p == '\n' || *p == '\r') {
+                if (i > PY_SSIZE_T_MAX - j)
+                    goto overflow;
+                i += j;
+                j = 0;
+            }
+        }
+    }
+
+    if (i > PY_SSIZE_T_MAX - j)
+        goto overflow;
+
+    /* Second pass: create output string and fill it */
+    u = STRINGLIB_NEW(NULL, i + j);
+    if (!u)
+        return NULL;
+
+    j = 0;
+    q = STRINGLIB_STR(u);
+
+    for (p = STRINGLIB_STR(self); p < e; p++) {
+        if (*p == '\t') {
+            if (tabsize > 0) {
+                i = tabsize - (j % tabsize);
+                j += i;
+                while (i--)
+                    *q++ = ' ';
+            }
+        }
+        else {
+            j++;
+            *q++ = *p;
+            if (*p == '\n' || *p == '\r')
+                j = 0;
+        }
+    }
+
+    return u;
+  overflow:
+    PyErr_SetString(PyExc_OverflowError, "result too long");
+    return NULL;
+}
+
+static inline PyObject *
+pad(PyObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
+{
+    PyObject *u;
+
+    if (left < 0)
+        left = 0;
+    if (right < 0)
+        right = 0;
+
+    if (left == 0 && right == 0) {
+        return return_self(self);
+    }
+
+    u = STRINGLIB_NEW(NULL, left + STRINGLIB_LEN(self) + right);
+    if (u) {
+        if (left)
+            memset(STRINGLIB_STR(u), fill, left);
+        memcpy(STRINGLIB_STR(u) + left,
+               STRINGLIB_STR(self),
+               STRINGLIB_LEN(self));
+        if (right)
+            memset(STRINGLIB_STR(u) + left + STRINGLIB_LEN(self),
+                   fill, right);
+    }
+
+    return u;
+}
+
+/*[clinic input]
+B.ljust as stringlib_ljust
+
+    width: Py_ssize_t
+    fillchar: char = b' '
+    /
+
+Return a left-justified string of length width.
+
+Padding is done using the specified fill character.
+[clinic start generated code]*/
+
+static PyObject *
+stringlib_ljust_impl(PyObject *self, Py_ssize_t width, char fillchar)
+/*[clinic end generated code: output=c79ca173c5ff8337 input=eff2d014bc7d80df]*/
+{
+    if (STRINGLIB_LEN(self) >= width) {
+        return return_self(self);
+    }
+
+    return pad(self, 0, width - STRINGLIB_LEN(self), fillchar);
+}
+
+
+/*[clinic input]
+B.rjust as stringlib_rjust
+
+    width: Py_ssize_t
+    fillchar: char = b' '
+    /
+
+Return a right-justified string of length width.
+
+Padding is done using the specified fill character.
+[clinic start generated code]*/
+
+static PyObject *
+stringlib_rjust_impl(PyObject *self, Py_ssize_t width, char fillchar)
+/*[clinic end generated code: output=7df5d728a5439570 input=218b0bd31308955d]*/
+{
+    if (STRINGLIB_LEN(self) >= width) {
+        return return_self(self);
+    }
+
+    return pad(self, width - STRINGLIB_LEN(self), 0, fillchar);
+}
+
+
+/*[clinic input]
+B.center as stringlib_center
+
+    width: Py_ssize_t
+    fillchar: char = b' '
+    /
+
+Return a centered string of length width.
+
+Padding is done using the specified fill character.
+[clinic start generated code]*/
+
+static PyObject *
+stringlib_center_impl(PyObject *self, Py_ssize_t width, char fillchar)
+/*[clinic end generated code: output=d8da2e055288b4c2 input=3776fd278765d89b]*/
+{
+    Py_ssize_t marg, left;
+
+    if (STRINGLIB_LEN(self) >= width) {
+        return return_self(self);
+    }
+
+    marg = width - STRINGLIB_LEN(self);
+    left = marg / 2 + (marg & width & 1);
+
+    return pad(self, left, marg - left, fillchar);
+}
+
+/*[clinic input]
+B.zfill as stringlib_zfill
+
+    width: Py_ssize_t
+    /
+
+Pad a numeric string with zeros on the left, to fill a field of the given width.
+
+The original string is never truncated.
+[clinic start generated code]*/
+
+static PyObject *
+stringlib_zfill_impl(PyObject *self, Py_ssize_t width)
+/*[clinic end generated code: output=0b3c684a7f1b2319 input=2da6d7b8e9bcb19a]*/
+{
+    Py_ssize_t fill;
+    PyObject *s;
+    char *p;
+
+    if (STRINGLIB_LEN(self) >= width) {
+        return return_self(self);
+    }
+
+    fill = width - STRINGLIB_LEN(self);
+
+    s = pad(self, fill, 0, '0');
+
+    if (s == NULL)
+        return NULL;
+
+    p = STRINGLIB_STR(s);
+    if (p[fill] == '+' || p[fill] == '-') {
+        /* move sign to beginning of string */
+        p[0] = p[fill];
+        p[fill] = '0';
+    }
+
+    return s;
+}
+
+
+/* find and count characters and substrings */
+
+#define findchar(target, target_len, c)                         \
+  ((char *)memchr((const void *)(target), c, target_len))
+
+
+static Py_ssize_t
+countchar(const char *target, Py_ssize_t target_len, char c,
+          Py_ssize_t maxcount)
+{
+    Py_ssize_t count = 0;
+    const char *start = target;
+    const char *end = target + target_len;
+
+    while ((start = findchar(start, end - start, c)) != NULL) {
+        count++;
+        if (count >= maxcount)
+            break;
+        start += 1;
+    }
+    return count;
+}
+
+
+/* Algorithms for different cases of string replacement */
+
+/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
+static PyObject *
+stringlib_replace_interleave(PyObject *self,
+                             const char *to_s, Py_ssize_t to_len,
+                             Py_ssize_t maxcount)
+{
+    const char *self_s;
+    char *result_s;
+    Py_ssize_t self_len, result_len;
+    Py_ssize_t count, i;
+    PyObject *result;
+
+    self_len = STRINGLIB_LEN(self);
+
+    /* 1 at the end plus 1 after every character;
+       count = min(maxcount, self_len + 1) */
+    if (maxcount <= self_len) {
+        count = maxcount;
+    }
+    else {
+        /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
+        count = self_len + 1;
+    }
+
+    /* Check for overflow */
+    /*   result_len = count * to_len + self_len; */
+    assert(count > 0);
+    if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "replace bytes is too long");
+        return NULL;
+    }
+    result_len = count * to_len + self_len;
+    result = STRINGLIB_NEW(NULL, result_len);
+    if (result == NULL) {
+        return NULL;
+    }
+
+    self_s = STRINGLIB_STR(self);
+    result_s = STRINGLIB_STR(result);
+
+    if (to_len > 1) {
+        /* Lay the first one down (guaranteed this will occur) */
+        memcpy(result_s, to_s, to_len);
+        result_s += to_len;
+        count -= 1;
+
+        for (i = 0; i < count; i++) {
+            *result_s++ = *self_s++;
+            memcpy(result_s, to_s, to_len);
+            result_s += to_len;
+        }
+    }
+    else {
+        result_s[0] = to_s[0];
+        result_s += to_len;
+        count -= 1;
+        for (i = 0; i < count; i++) {
+            *result_s++ = *self_s++;
+            result_s[0] = to_s[0];
+            result_s += to_len;
+        }
+    }
+
+    /* Copy the rest of the original string */
+    memcpy(result_s, self_s, self_len - i);
+
+    return result;
+}
+
+/* Special case for deleting a single character */
+/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
+static PyObject *
+stringlib_replace_delete_single_character(PyObject *self,
+                                          char from_c, Py_ssize_t maxcount)
+{
+    const char *self_s, *start, *next, *end;
+    char *result_s;
+    Py_ssize_t self_len, result_len;
+    Py_ssize_t count;
+    PyObject *result;
+
+    self_len = STRINGLIB_LEN(self);
+    self_s = STRINGLIB_STR(self);
+
+    count = countchar(self_s, self_len, from_c, maxcount);
+    if (count == 0) {
+        return return_self(self);
+    }
+
+    result_len = self_len - count;  /* from_len == 1 */
+    assert(result_len>=0);
+
+    result = STRINGLIB_NEW(NULL, result_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+
+    start = self_s;
+    end = self_s + self_len;
+    while (count-- > 0) {
+        next = findchar(start, end - start, from_c);
+        if (next == NULL)
+            break;
+        memcpy(result_s, start, next - start);
+        result_s += (next - start);
+        start = next + 1;
+    }
+    memcpy(result_s, start, end - start);
+
+    return result;
+}
+
+/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
+
+static PyObject *
+stringlib_replace_delete_substring(PyObject *self,
+                                   const char *from_s, Py_ssize_t from_len,
+                                   Py_ssize_t maxcount)
+{
+    const char *self_s, *start, *next, *end;
+    char *result_s;
+    Py_ssize_t self_len, result_len;
+    Py_ssize_t count, offset;
+    PyObject *result;
+
+    self_len = STRINGLIB_LEN(self);
+    self_s = STRINGLIB_STR(self);
+
+    count = stringlib_count(self_s, self_len,
+                            from_s, from_len,
+                            maxcount);
+
+    if (count == 0) {
+        /* no matches */
+        return return_self(self);
+    }
+
+    result_len = self_len - (count * from_len);
+    assert (result_len>=0);
+
+    result = STRINGLIB_NEW(NULL, result_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+
+    start = self_s;
+    end = self_s + self_len;
+    while (count-- > 0) {
+        offset = stringlib_find(start, end - start,
+                                from_s, from_len,
+                                0);
+        if (offset == -1)
+            break;
+        next = start + offset;
+
+        memcpy(result_s, start, next - start);
+
+        result_s += (next - start);
+        start = next + from_len;
+    }
+    memcpy(result_s, start, end - start);
+    return result;
+}
+
+/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
+static PyObject *
+stringlib_replace_single_character_in_place(PyObject *self,
+                                            char from_c, char to_c,
+                                            Py_ssize_t maxcount)
+{
+    const char *self_s, *end;
+    char *result_s, *start, *next;
+    Py_ssize_t self_len;
+    PyObject *result;
+
+    /* The result string will be the same size */
+    self_s = STRINGLIB_STR(self);
+    self_len = STRINGLIB_LEN(self);
+
+    next = findchar(self_s, self_len, from_c);
+
+    if (next == NULL) {
+        /* No matches; return the original bytes */
+        return return_self(self);
+    }
+
+    /* Need to make a new bytes */
+    result = STRINGLIB_NEW(NULL, self_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+    memcpy(result_s, self_s, self_len);
+
+    /* change everything in-place, starting with this one */
+    start =  result_s + (next - self_s);
+    *start = to_c;
+    start++;
+    end = result_s + self_len;
+
+    while (--maxcount > 0) {
+        next = findchar(start, end - start, from_c);
+        if (next == NULL)
+            break;
+        *next = to_c;
+        start = next + 1;
+    }
+
+    return result;
+}
+
+/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
+static PyObject *
+stringlib_replace_substring_in_place(PyObject *self,
+                                     const char *from_s, Py_ssize_t from_len,
+                                     const char *to_s, Py_ssize_t to_len,
+                                     Py_ssize_t maxcount)
+{
+    const char *self_s, *end;
+    char *result_s, *start;
+    Py_ssize_t self_len, offset;
+    PyObject *result;
+
+    /* The result bytes will be the same size */
+
+    self_s = STRINGLIB_STR(self);
+    self_len = STRINGLIB_LEN(self);
+
+    offset = stringlib_find(self_s, self_len,
+                            from_s, from_len,
+                            0);
+    if (offset == -1) {
+        /* No matches; return the original bytes */
+        return return_self(self);
+    }
+
+    /* Need to make a new bytes */
+    result = STRINGLIB_NEW(NULL, self_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+    memcpy(result_s, self_s, self_len);
+
+    /* change everything in-place, starting with this one */
+    start =  result_s + offset;
+    memcpy(start, to_s, from_len);
+    start += from_len;
+    end = result_s + self_len;
+
+    while ( --maxcount > 0) {
+        offset = stringlib_find(start, end - start,
+                                from_s, from_len,
+                                0);
+        if (offset == -1)
+            break;
+        memcpy(start + offset, to_s, from_len);
+        start += offset + from_len;
+    }
+
+    return result;
+}
+
+/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
+static PyObject *
+stringlib_replace_single_character(PyObject *self,
+                                   char from_c,
+                                   const char *to_s, Py_ssize_t to_len,
+                                   Py_ssize_t maxcount)
+{
+    const char *self_s, *start, *next, *end;
+    char *result_s;
+    Py_ssize_t self_len, result_len;
+    Py_ssize_t count;
+    PyObject *result;
+
+    self_s = STRINGLIB_STR(self);
+    self_len = STRINGLIB_LEN(self);
+
+    count = countchar(self_s, self_len, from_c, maxcount);
+    if (count == 0) {
+        /* no matches, return unchanged */
+        return return_self(self);
+    }
+
+    /* use the difference between current and new, hence the "-1" */
+    /*   result_len = self_len + count * (to_len-1)  */
+    assert(count > 0);
+    if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
+        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
+        return NULL;
+    }
+    result_len = self_len + count * (to_len - 1);
+
+    result = STRINGLIB_NEW(NULL, result_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+
+    start = self_s;
+    end = self_s + self_len;
+    while (count-- > 0) {
+        next = findchar(start, end - start, from_c);
+        if (next == NULL)
+            break;
+
+        if (next == start) {
+            /* replace with the 'to' */
+            memcpy(result_s, to_s, to_len);
+            result_s += to_len;
+            start += 1;
+        } else {
+            /* copy the unchanged old then the 'to' */
+            memcpy(result_s, start, next - start);
+            result_s += (next - start);
+            memcpy(result_s, to_s, to_len);
+            result_s += to_len;
+            start = next + 1;
+        }
+    }
+    /* Copy the remainder of the remaining bytes */
+    memcpy(result_s, start, end - start);
+
+    return result;
+}
+
+/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
+static PyObject *
+stringlib_replace_substring(PyObject *self,
+                            const char *from_s, Py_ssize_t from_len,
+                            const char *to_s, Py_ssize_t to_len,
+                            Py_ssize_t maxcount)
+{
+    const char *self_s, *start, *next, *end;
+    char *result_s;
+    Py_ssize_t self_len, result_len;
+    Py_ssize_t count, offset;
+    PyObject *result;
+
+    self_s = STRINGLIB_STR(self);
+    self_len = STRINGLIB_LEN(self);
+
+    count = stringlib_count(self_s, self_len,
+                            from_s, from_len,
+                            maxcount);
+
+    if (count == 0) {
+        /* no matches, return unchanged */
+        return return_self(self);
+    }
+
+    /* Check for overflow */
+    /*    result_len = self_len + count * (to_len-from_len) */
+    assert(count > 0);
+    if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
+        PyErr_SetString(PyExc_OverflowError, "replace bytes is too long");
+        return NULL;
+    }
+    result_len = self_len + count * (to_len - from_len);
+
+    result = STRINGLIB_NEW(NULL, result_len);
+    if (result == NULL) {
+        return NULL;
+    }
+    result_s = STRINGLIB_STR(result);
+
+    start = self_s;
+    end = self_s + self_len;
+    while (count-- > 0) {
+        offset = stringlib_find(start, end - start,
+                                from_s, from_len,
+                                0);
+        if (offset == -1)
+            break;
+        next = start + offset;
+        if (next == start) {
+            /* replace with the 'to' */
+            memcpy(result_s, to_s, to_len);
+            result_s += to_len;
+            start += from_len;
+        } else {
+            /* copy the unchanged old then the 'to' */
+            memcpy(result_s, start, next - start);
+            result_s += (next - start);
+            memcpy(result_s, to_s, to_len);
+            result_s += to_len;
+            start = next + from_len;
+        }
+    }
+    /* Copy the remainder of the remaining bytes */
+    memcpy(result_s, start, end - start);
+
+    return result;
+}
+
+
+static PyObject *
+stringlib_replace(PyObject *self,
+                  const char *from_s, Py_ssize_t from_len,
+                  const char *to_s, Py_ssize_t to_len,
+                  Py_ssize_t maxcount)
+{
+    if (STRINGLIB_LEN(self) < from_len) {
+        /* nothing to do; return the original bytes */
+        return return_self(self);
+    }
+    if (maxcount < 0) {
+        maxcount = PY_SSIZE_T_MAX;
+    } else if (maxcount == 0) {
+        /* nothing to do; return the original bytes */
+        return return_self(self);
+    }
+
+    /* Handle zero-length special cases */
+    if (from_len == 0) {
+        if (to_len == 0) {
+            /* nothing to do; return the original bytes */
+            return return_self(self);
+        }
+        /* insert the 'to' bytes everywhere.    */
+        /*    >>> b"Python".replace(b"", b".")  */
+        /*    b'.P.y.t.h.o.n.'                  */
+        return stringlib_replace_interleave(self, to_s, to_len, maxcount);
+    }
+
+    if (to_len == 0) {
+        /* delete all occurrences of 'from' bytes */
+        if (from_len == 1) {
+            return stringlib_replace_delete_single_character(
+                self, from_s[0], maxcount);
+        } else {
+            return stringlib_replace_delete_substring(
+                self, from_s, from_len, maxcount);
+        }
+    }
+
+    /* Handle special case where both bytes have the same length */
+
+    if (from_len == to_len) {
+        if (from_len == 1) {
+            return stringlib_replace_single_character_in_place(
+                self, from_s[0], to_s[0], maxcount);
+        } else {
+            return stringlib_replace_substring_in_place(
+                self, from_s, from_len, to_s, to_len, maxcount);
+        }
+    }
+
+    /* Otherwise use the more generic algorithms */
+    if (from_len == 1) {
+        return stringlib_replace_single_character(
+            self, from_s[0], to_s, to_len, maxcount);
+    } else {
+        /* len('from')>=2, len('to')>=1 */
+        return stringlib_replace_substring(
+            self, from_s, from_len, to_s, to_len, maxcount);
+    }
+}
+
+#undef findchar
diff --git a/contrib/tools/python3/Objects/stringlib/ucs1lib.h b/contrib/tools/python3/Objects/stringlib/ucs1lib.h
new file mode 100644
index 00000000000..1b9b65ecbaa
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/ucs1lib.h
@@ -0,0 +1,27 @@
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define FASTSEARCH               ucs1lib_fastsearch
+#define STRINGLIB(F)             ucs1lib_##F
+#define STRINGLIB_OBJECT         PyUnicodeObject
+#define STRINGLIB_SIZEOF_CHAR    1
+#define STRINGLIB_MAX_CHAR       0xFFu
+#define STRINGLIB_CHAR           Py_UCS1
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_PARSE_CODE     "U"
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_STR            PyUnicode_1BYTE_DATA
+#define STRINGLIB_LEN            PyUnicode_GET_LENGTH
+#define STRINGLIB_NEW            _PyUnicode_FromUCS1
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
+#define STRINGLIB_FAST_MEMCHR    memchr
+#define STRINGLIB_MUTABLE 0
+
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_ASCII
diff --git a/contrib/tools/python3/Objects/stringlib/ucs2lib.h b/contrib/tools/python3/Objects/stringlib/ucs2lib.h
new file mode 100644
index 00000000000..4b49bbb31d7
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/ucs2lib.h
@@ -0,0 +1,30 @@
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define FASTSEARCH               ucs2lib_fastsearch
+#define STRINGLIB(F)             ucs2lib_##F
+#define STRINGLIB_OBJECT         PyUnicodeObject
+#define STRINGLIB_SIZEOF_CHAR    2
+#define STRINGLIB_MAX_CHAR       0xFFFFu
+#define STRINGLIB_CHAR           Py_UCS2
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_PARSE_CODE     "U"
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_STR            PyUnicode_2BYTE_DATA
+#define STRINGLIB_LEN            PyUnicode_GET_LENGTH
+#define STRINGLIB_NEW            _PyUnicode_FromUCS2
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
+#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 2
+#define STRINGLIB_FAST_MEMCHR(s, c, n)              \
+    (Py_UCS2 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
+
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_ASCII
diff --git a/contrib/tools/python3/Objects/stringlib/ucs4lib.h b/contrib/tools/python3/Objects/stringlib/ucs4lib.h
new file mode 100644
index 00000000000..def4ca5d17d
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/ucs4lib.h
@@ -0,0 +1,31 @@
+/* this is sort of a hack.  there's at least one place (formatting
+   floats) where some stringlib code takes a different path if it's
+   compiled as unicode. */
+#define STRINGLIB_IS_UNICODE     1
+
+#define FASTSEARCH               ucs4lib_fastsearch
+#define STRINGLIB(F)             ucs4lib_##F
+#define STRINGLIB_OBJECT         PyUnicodeObject
+#define STRINGLIB_SIZEOF_CHAR    4
+#define STRINGLIB_MAX_CHAR       0x10FFFFu
+#define STRINGLIB_CHAR           Py_UCS4
+#define STRINGLIB_TYPE_NAME      "unicode"
+#define STRINGLIB_PARSE_CODE     "U"
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
+#define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
+#define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
+#define STRINGLIB_STR            PyUnicode_4BYTE_DATA
+#define STRINGLIB_LEN            PyUnicode_GET_LENGTH
+#define STRINGLIB_NEW            _PyUnicode_FromUCS4
+#define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
+#define STRINGLIB_MUTABLE 0
+#if SIZEOF_WCHAR_T == 4
+#define STRINGLIB_FAST_MEMCHR(s, c, n)              \
+    (Py_UCS4 *)wmemchr((const wchar_t *)(s), c, n)
+#endif
+
+#define STRINGLIB_TOSTR          PyObject_Str
+#define STRINGLIB_TOASCII        PyObject_ASCII
+
diff --git a/contrib/tools/python3/Objects/stringlib/undef.h b/contrib/tools/python3/Objects/stringlib/undef.h
new file mode 100644
index 00000000000..cc873a2ec4e
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/undef.h
@@ -0,0 +1,11 @@
+#undef FASTSEARCH
+#undef STRINGLIB
+#undef STRINGLIB_SIZEOF_CHAR
+#undef STRINGLIB_MAX_CHAR
+#undef STRINGLIB_CHAR
+#undef STRINGLIB_STR
+#undef STRINGLIB_LEN
+#undef STRINGLIB_NEW
+#undef STRINGLIB_IS_UNICODE
+#undef STRINGLIB_MUTABLE
+#undef STRINGLIB_FAST_MEMCHR
diff --git a/contrib/tools/python3/Objects/stringlib/unicode_format.h b/contrib/tools/python3/Objects/stringlib/unicode_format.h
new file mode 100644
index 00000000000..ccd7c77c0a0
--- /dev/null
+++ b/contrib/tools/python3/Objects/stringlib/unicode_format.h
@@ -0,0 +1,1288 @@
+/*
+    unicode_format.h -- implementation of str.format().
+*/
+
+#include "pycore_floatobject.h"   // _PyFloat_FormatAdvancedWriter()
+
+/************************************************************************/
+/***********   Global data structures and forward declarations  *********/
+/************************************************************************/
+
+/*
+   A SubString consists of the characters between two string or
+   unicode pointers.
+*/
+typedef struct {
+    PyObject *str; /* borrowed reference */
+    Py_ssize_t start, end;
+} SubString;
+
+
+typedef enum {
+    ANS_INIT,
+    ANS_AUTO,
+    ANS_MANUAL
+} AutoNumberState;   /* Keep track if we're auto-numbering fields */
+
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+    AutoNumberState an_state;
+    int an_field_number;
+} AutoNumber;
+
+
+/* forward declaration for recursion */
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+             int recursion_depth, AutoNumber *auto_number);
+
+
+
+/************************************************************************/
+/**************************  Utility  functions  ************************/
+/************************************************************************/
+
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+    auto_number->an_state = ANS_INIT;
+    auto_number->an_field_number = 0;
+}
+
+/* fill in a SubString from a pointer and length */
+Py_LOCAL_INLINE(void)
+SubString_init(SubString *str, PyObject *s, Py_ssize_t start, Py_ssize_t end)
+{
+    str->str = s;
+    str->start = start;
+    str->end = end;
+}
+
+/* return a new string.  if str->str is NULL, return None */
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object(SubString *str)
+{
+    if (str->str == NULL)
+        Py_RETURN_NONE;
+    return PyUnicode_Substring(str->str, str->start, str->end);
+}
+
+/* return a new string.  if str->str is NULL, return a new empty string */
+Py_LOCAL_INLINE(PyObject *)
+SubString_new_object_or_empty(SubString *str)
+{
+    if (str->str == NULL) {
+        return PyUnicode_New(0, 0);
+    }
+    return SubString_new_object(str);
+}
+
+/* Return 1 if an error has been detected switching between automatic
+   field numbering and manual field specification, else return 0. Set
+   ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+    if (state == ANS_MANUAL) {
+        if (field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "manual field specification to "
+                            "automatic field numbering");
+            return 1;
+        }
+    }
+    else {
+        if (!field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "automatic field numbering to "
+                            "manual field specification");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
+/************************************************************************/
+/***********  Format string parsing -- integers and identifiers *********/
+/************************************************************************/
+
+static Py_ssize_t
+get_integer(const SubString *str)
+{
+    Py_ssize_t accumulator = 0;
+    Py_ssize_t digitval;
+    Py_ssize_t i;
+
+    /* empty string is an error */
+    if (str->start >= str->end)
+        return -1;
+
+    for (i = str->start; i < str->end; i++) {
+        digitval = Py_UNICODE_TODECIMAL(PyUnicode_READ_CHAR(str->str, i));
+        if (digitval < 0)
+            return -1;
+        /*
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
+        */
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
+            PyErr_Format(PyExc_ValueError,
+                         "Too many decimal digits in format string");
+            return -1;
+        }
+        accumulator = accumulator * 10 + digitval;
+    }
+    return accumulator;
+}
+
+/************************************************************************/
+/******** Functions to get field objects and specification strings ******/
+/************************************************************************/
+
+/* do the equivalent of obj.name */
+static PyObject *
+getattr(PyObject *obj, SubString *name)
+{
+    PyObject *newobj;
+    PyObject *str = SubString_new_object(name);
+    if (str == NULL)
+        return NULL;
+    newobj = PyObject_GetAttr(obj, str);
+    Py_DECREF(str);
+    return newobj;
+}
+
+/* do the equivalent of obj[idx], where obj is a sequence */
+static PyObject *
+getitem_sequence(PyObject *obj, Py_ssize_t idx)
+{
+    return PySequence_GetItem(obj, idx);
+}
+
+/* do the equivalent of obj[idx], where obj is not a sequence */
+static PyObject *
+getitem_idx(PyObject *obj, Py_ssize_t idx)
+{
+    PyObject *newobj;
+    PyObject *idx_obj = PyLong_FromSsize_t(idx);
+    if (idx_obj == NULL)
+        return NULL;
+    newobj = PyObject_GetItem(obj, idx_obj);
+    Py_DECREF(idx_obj);
+    return newobj;
+}
+
+/* do the equivalent of obj[name] */
+static PyObject *
+getitem_str(PyObject *obj, SubString *name)
+{
+    PyObject *newobj;
+    PyObject *str = SubString_new_object(name);
+    if (str == NULL)
+        return NULL;
+    newobj = PyObject_GetItem(obj, str);
+    Py_DECREF(str);
+    return newobj;
+}
+
+typedef struct {
+    /* the entire string we're parsing.  we assume that someone else
+       is managing its lifetime, and that it will exist for the
+       lifetime of the iterator.  can be empty */
+    SubString str;
+
+    /* index to where we are inside field_name */
+    Py_ssize_t index;
+} FieldNameIterator;
+
+
+static int
+FieldNameIterator_init(FieldNameIterator *self, PyObject *s,
+                       Py_ssize_t start, Py_ssize_t end)
+{
+    SubString_init(&self->str, s, start, end);
+    self->index = start;
+    return 1;
+}
+
+static int
+_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
+{
+    Py_UCS4 c;
+
+    name->str = self->str.str;
+    name->start = self->index;
+
+    /* return everything until '.' or '[' */
+    while (self->index < self->str.end) {
+        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
+        switch (c) {
+        case '[':
+        case '.':
+            /* backup so that we this character will be seen next time */
+            self->index--;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+    /* end of string is okay */
+    name->end = self->index;
+    return 1;
+}
+
+static int
+_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
+{
+    int bracket_seen = 0;
+    Py_UCS4 c;
+
+    name->str = self->str.str;
+    name->start = self->index;
+
+    /* return everything until ']' */
+    while (self->index < self->str.end) {
+        c = PyUnicode_READ_CHAR(self->str.str, self->index++);
+        switch (c) {
+        case ']':
+            bracket_seen = 1;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+    /* make sure we ended with a ']' */
+    if (!bracket_seen) {
+        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
+        return 0;
+    }
+
+    /* end of string is okay */
+    /* don't include the ']' */
+    name->end = self->index-1;
+    return 1;
+}
+
+/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
+static int
+FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
+                       Py_ssize_t *name_idx, SubString *name)
+{
+    /* check at end of input */
+    if (self->index >= self->str.end)
+        return 1;
+
+    switch (PyUnicode_READ_CHAR(self->str.str, self->index++)) {
+    case '.':
+        *is_attribute = 1;
+        if (_FieldNameIterator_attr(self, name) == 0)
+            return 0;
+        *name_idx = -1;
+        break;
+    case '[':
+        *is_attribute = 0;
+        if (_FieldNameIterator_item(self, name) == 0)
+            return 0;
+        *name_idx = get_integer(name);
+        if (*name_idx == -1 && PyErr_Occurred())
+            return 0;
+        break;
+    default:
+        /* Invalid character follows ']' */
+        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
+                        "follow ']' in format field specifier");
+        return 0;
+    }
+
+    /* empty string is an error */
+    if (name->start == name->end) {
+        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
+        return 0;
+    }
+
+    return 2;
+}
+
+
+/* input: field_name
+   output: 'first' points to the part before the first '[' or '.'
+           'first_idx' is -1 if 'first' is not an integer, otherwise
+                       it's the value of first converted to an integer
+           'rest' is an iterator to return the rest
+*/
+static int
+field_name_split(PyObject *str, Py_ssize_t start, Py_ssize_t end, SubString *first,
+                 Py_ssize_t *first_idx, FieldNameIterator *rest,
+                 AutoNumber *auto_number)
+{
+    Py_UCS4 c;
+    Py_ssize_t i = start;
+    int field_name_is_empty;
+    int using_numeric_index;
+
+    /* find the part up until the first '.' or '[' */
+    while (i < end) {
+        switch (c = PyUnicode_READ_CHAR(str, i++)) {
+        case '[':
+        case '.':
+            /* backup so that we this character is available to the
+               "rest" iterator */
+            i--;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    /* set up the return values */
+    SubString_init(first, str, start, i);
+    FieldNameIterator_init(rest, str, i, end);
+
+    /* see if "first" is an integer, in which case it's used as an index */
+    *first_idx = get_integer(first);
+    if (*first_idx == -1 && PyErr_Occurred())
+        return 0;
+
+    field_name_is_empty = first->start >= first->end;
+
+    /* If the field name is omitted or if we have a numeric index
+       specified, then we're doing numeric indexing into args. */
+    using_numeric_index = field_name_is_empty || *first_idx != -1;
+
+    /* We always get here exactly one time for each field we're
+       processing. And we get here in field order (counting by left
+       braces). So this is the perfect place to handle automatic field
+       numbering if the field name is omitted. */
+
+    /* Check if we need to do the auto-numbering. It's not needed if
+       we're called from string.Format routines, because it's handled
+       in that class by itself. */
+    if (auto_number) {
+        /* Initialize our auto numbering state if this is the first
+           time we're either auto-numbering or manually numbering. */
+        if (auto_number->an_state == ANS_INIT && using_numeric_index)
+            auto_number->an_state = field_name_is_empty ?
+                ANS_AUTO : ANS_MANUAL;
+
+        /* Make sure our state is consistent with what we're doing
+           this time through. Only check if we're using a numeric
+           index. */
+        if (using_numeric_index)
+            if (autonumber_state_error(auto_number->an_state,
+                                       field_name_is_empty))
+                return 0;
+        /* Zero length field means we want to do auto-numbering of the
+           fields. */
+        if (field_name_is_empty)
+            *first_idx = (auto_number->an_field_number)++;
+    }
+
+    return 1;
+}
+
+
+/*
+    get_field_object returns the object inside {}, before the
+    format_spec.  It handles getindex and getattr lookups and consumes
+    the entire input string.
+*/
+static PyObject *
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+                 AutoNumber *auto_number)
+{
+    PyObject *obj = NULL;
+    int ok;
+    int is_attribute;
+    SubString name;
+    SubString first;
+    Py_ssize_t index;
+    FieldNameIterator rest;
+
+    if (!field_name_split(input->str, input->start, input->end, &first,
+                          &index, &rest, auto_number)) {
+        goto error;
+    }
+
+    if (index == -1) {
+        /* look up in kwargs */
+        PyObject *key = SubString_new_object(&first);
+        if (key == NULL) {
+            goto error;
+        }
+        if (kwargs == NULL) {
+            PyErr_SetObject(PyExc_KeyError, key);
+            Py_DECREF(key);
+            goto error;
+        }
+        /* Use PyObject_GetItem instead of PyDict_GetItem because this
+           code is no longer just used with kwargs. It might be passed
+           a non-dict when called through format_map. */
+        obj = PyObject_GetItem(kwargs, key);
+        Py_DECREF(key);
+        if (obj == NULL) {
+            goto error;
+        }
+    }
+    else {
+        /* If args is NULL, we have a format string with a positional field
+           with only kwargs to retrieve it from. This can only happen when
+           used with format_map(), where positional arguments are not
+           allowed. */
+        if (args == NULL) {
+            PyErr_SetString(PyExc_ValueError, "Format string contains "
+                            "positional fields");
+            goto error;
+        }
+
+        /* look up in args */
+        obj = PySequence_GetItem(args, index);
+        if (obj == NULL) {
+            PyErr_Format(PyExc_IndexError,
+                         "Replacement index %zd out of range for positional "
+                         "args tuple",
+                         index);
+             goto error;
+        }
+    }
+
+    /* iterate over the rest of the field_name */
+    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
+                                        &name)) == 2) {
+        PyObject *tmp;
+
+        if (is_attribute)
+            /* getattr lookup "." */
+            tmp = getattr(obj, &name);
+        else
+            /* getitem lookup "[]" */
+            if (index == -1)
+                tmp = getitem_str(obj, &name);
+            else
+                if (PySequence_Check(obj))
+                    tmp = getitem_sequence(obj, index);
+                else
+                    /* not a sequence */
+                    tmp = getitem_idx(obj, index);
+        if (tmp == NULL)
+            goto error;
+
+        /* assign to obj */
+        Py_SETREF(obj, tmp);
+    }
+    /* end of iterator, this is the non-error case */
+    if (ok == 1)
+        return obj;
+error:
+    Py_XDECREF(obj);
+    return NULL;
+}
+
+/************************************************************************/
+/*****************  Field rendering functions  **************************/
+/************************************************************************/
+
+/*
+    render_field() is the main function in this section.  It takes the
+    field object and field specification string generated by
+    get_field_and_spec, and renders the field into the output string.
+
+    render_field calls fieldobj.__format__(format_spec) method, and
+    appends to the output.
+*/
+static int
+render_field(PyObject *fieldobj, SubString *format_spec, _PyUnicodeWriter *writer)
+{
+    int ok = 0;
+    PyObject *result = NULL;
+    PyObject *format_spec_object = NULL;
+    int (*formatter) (_PyUnicodeWriter*, PyObject *, PyObject *, Py_ssize_t, Py_ssize_t) = NULL;
+    int err;
+
+    /* If we know the type exactly, skip the lookup of __format__ and just
+       call the formatter directly. */
+    if (PyUnicode_CheckExact(fieldobj))
+        formatter = _PyUnicode_FormatAdvancedWriter;
+    else if (PyLong_CheckExact(fieldobj))
+        formatter = _PyLong_FormatAdvancedWriter;
+    else if (PyFloat_CheckExact(fieldobj))
+        formatter = _PyFloat_FormatAdvancedWriter;
+    else if (PyComplex_CheckExact(fieldobj))
+        formatter = _PyComplex_FormatAdvancedWriter;
+
+    if (formatter) {
+        /* we know exactly which formatter will be called when __format__ is
+           looked up, so call it directly, instead. */
+        err = formatter(writer, fieldobj, format_spec->str,
+                        format_spec->start, format_spec->end);
+        return (err == 0);
+    }
+    else {
+        /* We need to create an object out of the pointers we have, because
+           __format__ takes a string/unicode object for format_spec. */
+        if (format_spec->str)
+            format_spec_object = PyUnicode_Substring(format_spec->str,
+                                                     format_spec->start,
+                                                     format_spec->end);
+        else
+            format_spec_object = PyUnicode_New(0, 0);
+        if (format_spec_object == NULL)
+            goto done;
+
+        result = PyObject_Format(fieldobj, format_spec_object);
+    }
+    if (result == NULL)
+        goto done;
+
+    if (_PyUnicodeWriter_WriteStr(writer, result) == -1)
+        goto done;
+    ok = 1;
+
+done:
+    Py_XDECREF(format_spec_object);
+    Py_XDECREF(result);
+    return ok;
+}
+
+static int
+parse_field(SubString *str, SubString *field_name, SubString *format_spec,
+            int *format_spec_needs_expanding, Py_UCS4 *conversion)
+{
+    /* Note this function works if the field name is zero length,
+       which is good.  Zero length field names are handled later, in
+       field_name_split. */
+
+    Py_UCS4 c = 0;
+
+    /* initialize these, as they may be empty */
+    *conversion = '\0';
+    SubString_init(format_spec, NULL, 0, 0);
+
+    /* Search for the field name.  it's terminated by the end of
+       the string, or a ':' or '!' */
+    field_name->str = str->str;
+    field_name->start = str->start;
+    while (str->start < str->end) {
+        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+        case '{':
+            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
+            return 0;
+        case '[':
+            for (; str->start < str->end; str->start++)
+                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
+                    break;
+            continue;
+        case '}':
+        case ':':
+        case '!':
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    field_name->end = str->start - 1;
+    if (c == '!' || c == ':') {
+        Py_ssize_t count;
+        /* we have a format specifier and/or a conversion */
+        /* don't include the last character */
+
+        /* see if there's a conversion specifier */
+        if (c == '!') {
+            /* there must be another character present */
+            if (str->start >= str->end) {
+                PyErr_SetString(PyExc_ValueError,
+                                "end of string while looking for conversion "
+                                "specifier");
+                return 0;
+            }
+            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
+
+            if (str->start < str->end) {
+                c = PyUnicode_READ_CHAR(str->str, str->start++);
+                if (c == '}')
+                    return 1;
+                if (c != ':') {
+                    PyErr_SetString(PyExc_ValueError,
+                                    "expected ':' after conversion specifier");
+                    return 0;
+                }
+            }
+        }
+        format_spec->str = str->str;
+        format_spec->start = str->start;
+        count = 1;
+        while (str->start < str->end) {
+            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
+            case '{':
+                *format_spec_needs_expanding = 1;
+                count++;
+                break;
+            case '}':
+                count--;
+                if (count == 0) {
+                    format_spec->end = str->start - 1;
+                    return 1;
+                }
+                break;
+            default:
+                break;
+            }
+        }
+
+        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
+        return 0;
+    }
+    else if (c != '}') {
+        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
+        return 0;
+    }
+
+    return 1;
+}
+
+/************************************************************************/
+/******* Output string allocation and escape-to-markup processing  ******/
+/************************************************************************/
+
+/* MarkupIterator breaks the string into pieces of either literal
+   text, or things inside {} that need to be marked up.  it is
+   designed to make it easy to wrap a Python iterator around it, for
+   use with the Formatter class */
+
+typedef struct {
+    SubString str;
+} MarkupIterator;
+
+static int
+MarkupIterator_init(MarkupIterator *self, PyObject *str,
+                    Py_ssize_t start, Py_ssize_t end)
+{
+    SubString_init(&self->str, str, start, end);
+    return 1;
+}
+
+/* returns 0 on error, 1 on non-error termination, and 2 if it got a
+   string (or something to be expanded) */
+static int
+MarkupIterator_next(MarkupIterator *self, SubString *literal,
+                    int *field_present, SubString *field_name,
+                    SubString *format_spec, Py_UCS4 *conversion,
+                    int *format_spec_needs_expanding)
+{
+    int at_end;
+    Py_UCS4 c = 0;
+    Py_ssize_t start;
+    Py_ssize_t len;
+    int markup_follows = 0;
+
+    /* initialize all of the output variables */
+    SubString_init(literal, NULL, 0, 0);
+    SubString_init(field_name, NULL, 0, 0);
+    SubString_init(format_spec, NULL, 0, 0);
+    *conversion = '\0';
+    *format_spec_needs_expanding = 0;
+    *field_present = 0;
+
+    /* No more input, end of iterator.  This is the normal exit
+       path. */
+    if (self->str.start >= self->str.end)
+        return 1;
+
+    start = self->str.start;
+
+    /* First read any literal text. Read until the end of string, an
+       escaped '{' or '}', or an unescaped '{'.  In order to never
+       allocate memory and so I can just pass pointers around, if
+       there's an escaped '{' or '}' then we'll return the literal
+       including the brace, but no format object.  The next time
+       through, we'll return the rest of the literal, skipping past
+       the second consecutive brace. */
+    while (self->str.start < self->str.end) {
+        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
+        case '{':
+        case '}':
+            markup_follows = 1;
+            break;
+        default:
+            continue;
+        }
+        break;
+    }
+
+    at_end = self->str.start >= self->str.end;
+    len = self->str.start - start;
+
+    if ((c == '}') && (at_end ||
+                       (c != PyUnicode_READ_CHAR(self->str.str,
+                                                 self->str.start)))) {
+        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
+                        "in format string");
+        return 0;
+    }
+    if (at_end && c == '{') {
+        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
+                        "in format string");
+        return 0;
+    }
+    if (!at_end) {
+        if (c == PyUnicode_READ_CHAR(self->str.str, self->str.start)) {
+            /* escaped } or {, skip it in the input.  there is no
+               markup object following us, just this literal text */
+            self->str.start++;
+            markup_follows = 0;
+        }
+        else
+            len--;
+    }
+
+    /* record the literal text */
+    literal->str = self->str.str;
+    literal->start = start;
+    literal->end = start + len;
+
+    if (!markup_follows)
+        return 2;
+
+    /* this is markup; parse the field */
+    *field_present = 1;
+    if (!parse_field(&self->str, field_name, format_spec,
+                     format_spec_needs_expanding, conversion))
+        return 0;
+    return 2;
+}
+
+
+/* do the !r or !s conversion on obj */
+static PyObject *
+do_conversion(PyObject *obj, Py_UCS4 conversion)
+{
+    /* XXX in pre-3.0, do we need to convert this to unicode, since it
+       might have returned a string? */
+    switch (conversion) {
+    case 'r':
+        return PyObject_Repr(obj);
+    case 's':
+        return PyObject_Str(obj);
+    case 'a':
+        return PyObject_ASCII(obj);
+    default:
+        if (conversion > 32 && conversion < 127) {
+                /* It's the ASCII subrange; casting to char is safe
+                   (assuming the execution character set is an ASCII
+                   superset). */
+                PyErr_Format(PyExc_ValueError,
+                     "Unknown conversion specifier %c",
+                     (char)conversion);
+        } else
+                PyErr_Format(PyExc_ValueError,
+                     "Unknown conversion specifier \\x%x",
+                     (unsigned int)conversion);
+        return NULL;
+    }
+}
+
+/* given:
+
+   {field_name!conversion:format_spec}
+
+   compute the result and write it to output.
+   format_spec_needs_expanding is an optimization.  if it's false,
+   just output the string directly, otherwise recursively expand the
+   format_spec string.
+
+   field_name is allowed to be zero length, in which case we
+   are doing auto field numbering.
+*/
+
+static int
+output_markup(SubString *field_name, SubString *format_spec,
+              int format_spec_needs_expanding, Py_UCS4 conversion,
+              _PyUnicodeWriter *writer, PyObject *args, PyObject *kwargs,
+              int recursion_depth, AutoNumber *auto_number)
+{
+    PyObject *tmp = NULL;
+    PyObject *fieldobj = NULL;
+    SubString expanded_format_spec;
+    SubString *actual_format_spec;
+    int result = 0;
+
+    /* convert field_name to an object */
+    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
+    if (fieldobj == NULL)
+        goto done;
+
+    if (conversion != '\0') {
+        tmp = do_conversion(fieldobj, conversion);
+        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+            goto done;
+
+        /* do the assignment, transferring ownership: fieldobj = tmp */
+        Py_SETREF(fieldobj, tmp);
+        tmp = NULL;
+    }
+
+    /* if needed, recursively compute the format_spec */
+    if (format_spec_needs_expanding) {
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+                           auto_number);
+        if (tmp == NULL || PyUnicode_READY(tmp) == -1)
+            goto done;
+
+        /* note that in the case we're expanding the format string,
+           tmp must be kept around until after the call to
+           render_field. */
+        SubString_init(&expanded_format_spec, tmp, 0, PyUnicode_GET_LENGTH(tmp));
+        actual_format_spec = &expanded_format_spec;
+    }
+    else
+        actual_format_spec = format_spec;
+
+    if (render_field(fieldobj, actual_format_spec, writer) == 0)
+        goto done;
+
+    result = 1;
+
+done:
+    Py_XDECREF(fieldobj);
+    Py_XDECREF(tmp);
+
+    return result;
+}
+
+/*
+    do_markup is the top-level loop for the format() method.  It
+    searches through the format string for escapes to markup codes, and
+    calls other functions to move non-markup text to the output,
+    and to perform the markup to the output.
+*/
+static int
+do_markup(SubString *input, PyObject *args, PyObject *kwargs,
+          _PyUnicodeWriter *writer, int recursion_depth, AutoNumber *auto_number)
+{
+    MarkupIterator iter;
+    int format_spec_needs_expanding;
+    int result;
+    int field_present;
+    SubString literal;
+    SubString field_name;
+    SubString format_spec;
+    Py_UCS4 conversion;
+
+    MarkupIterator_init(&iter, input->str, input->start, input->end);
+    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+                                         &field_name, &format_spec,
+                                         &conversion,
+                                         &format_spec_needs_expanding)) == 2) {
+        if (literal.end != literal.start) {
+            if (!field_present && iter.str.start == iter.str.end)
+                writer->overallocate = 0;
+            if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
+                                                literal.start, literal.end) < 0)
+                return 0;
+        }
+
+        if (field_present) {
+            if (iter.str.start == iter.str.end)
+                writer->overallocate = 0;
+            if (!output_markup(&field_name, &format_spec,
+                               format_spec_needs_expanding, conversion, writer,
+                               args, kwargs, recursion_depth, auto_number))
+                return 0;
+        }
+    }
+    return result;
+}
+
+
+/*
+    build_string allocates the output string and then
+    calls do_markup to do the heavy lifting.
+*/
+static PyObject *
+build_string(SubString *input, PyObject *args, PyObject *kwargs,
+             int recursion_depth, AutoNumber *auto_number)
+{
+    _PyUnicodeWriter writer;
+
+    /* check the recursion level */
+    if (recursion_depth <= 0) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Max string recursion exceeded");
+        return NULL;
+    }
+
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    writer.min_length = PyUnicode_GET_LENGTH(input->str) + 100;
+
+    if (!do_markup(input, args, kwargs, &writer, recursion_depth,
+                   auto_number)) {
+        _PyUnicodeWriter_Dealloc(&writer);
+        return NULL;
+    }
+
+    return _PyUnicodeWriter_Finish(&writer);
+}
+
+/************************************************************************/
+/*********** main routine ***********************************************/
+/************************************************************************/
+
+/* this is the main entry point */
+static PyObject *
+do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+    SubString input;
+
+    /* PEP 3101 says only 2 levels, so that
+       "{0:{1}}".format('abc', 's')            # works
+       "{0:{1:{2}}}".format('abc', 's', '')    # fails
+    */
+    int recursion_depth = 2;
+
+    AutoNumber auto_number;
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    AutoNumber_Init(&auto_number);
+    SubString_init(&input, self, 0, PyUnicode_GET_LENGTH(self));
+    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
+}
+
+static PyObject *
+do_string_format_map(PyObject *self, PyObject *obj)
+{
+    return do_string_format(self, NULL, obj);
+}
+
+
+/************************************************************************/
+/*********** formatteriterator ******************************************/
+/************************************************************************/
+
+/* This is used to implement string.Formatter.vparse().  It exists so
+   Formatter can share code with the built in unicode.format() method.
+   It's really just a wrapper around MarkupIterator that is callable
+   from Python. */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *str;
+    MarkupIterator it_markup;
+} formatteriterobject;
+
+static void
+formatteriter_dealloc(formatteriterobject *it)
+{
+    Py_XDECREF(it->str);
+    PyObject_Free(it);
+}
+
+/* returns a tuple:
+   (literal, field_name, format_spec, conversion)
+
+   literal is any literal text to output.  might be zero length
+   field_name is the string before the ':'.  might be None
+   format_spec is the string after the ':'.  mibht be None
+   conversion is either None, or the string after the '!'
+*/
+static PyObject *
+formatteriter_next(formatteriterobject *it)
+{
+    SubString literal;
+    SubString field_name;
+    SubString format_spec;
+    Py_UCS4 conversion;
+    int format_spec_needs_expanding;
+    int field_present;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+                                     &field_name, &format_spec, &conversion,
+                                     &format_spec_needs_expanding);
+
+    /* all of the SubString objects point into it->str, so no
+       memory management needs to be done on them */
+    assert(0 <= result && result <= 2);
+    if (result == 0 || result == 1)
+        /* if 0, error has already been set, if 1, iterator is empty */
+        return NULL;
+    else {
+        PyObject *literal_str = NULL;
+        PyObject *field_name_str = NULL;
+        PyObject *format_spec_str = NULL;
+        PyObject *conversion_str = NULL;
+        PyObject *tuple = NULL;
+
+        literal_str = SubString_new_object(&literal);
+        if (literal_str == NULL)
+            goto done;
+
+        field_name_str = SubString_new_object(&field_name);
+        if (field_name_str == NULL)
+            goto done;
+
+        /* if field_name is non-zero length, return a string for
+           format_spec (even if zero length), else return None */
+        format_spec_str = (field_present ?
+                           SubString_new_object_or_empty :
+                           SubString_new_object)(&format_spec);
+        if (format_spec_str == NULL)
+            goto done;
+
+        /* if the conversion is not specified, return a None,
+           otherwise create a one length string with the conversion
+           character */
+        if (conversion == '\0') {
+            conversion_str = Py_NewRef(Py_None);
+        }
+        else
+            conversion_str = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
+                                                       &conversion, 1);
+        if (conversion_str == NULL)
+            goto done;
+
+        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
+                             conversion_str);
+    done:
+        Py_XDECREF(literal_str);
+        Py_XDECREF(field_name_str);
+        Py_XDECREF(format_spec_str);
+        Py_XDECREF(conversion_str);
+        return tuple;
+    }
+}
+
+static PyMethodDef formatteriter_methods[] = {
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyTypeObject PyFormatterIter_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "formatteriterator",                /* tp_name */
+    sizeof(formatteriterobject),        /* tp_basicsize */
+    0,                                  /* tp_itemsize */
+    /* methods */
+    (destructor)formatteriter_dealloc,  /* tp_dealloc */
+    0,                                  /* tp_vectorcall_offset */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_as_async */
+    0,                                  /* tp_repr */
+    0,                                  /* tp_as_number */
+    0,                                  /* tp_as_sequence */
+    0,                                  /* tp_as_mapping */
+    0,                                  /* tp_hash */
+    0,                                  /* tp_call */
+    0,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+    0,                                  /* tp_setattro */
+    0,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    0,                                  /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    0,                                  /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)formatteriter_next,   /* tp_iternext */
+    formatteriter_methods,              /* tp_methods */
+    0,
+};
+
+/* unicode_formatter_parser is used to implement
+   string.Formatter.vformat.  it parses a string and returns tuples
+   describing the parsed elements.  It's a wrapper around
+   stringlib/string_format.h's MarkupIterator */
+static PyObject *
+formatter_parser(PyObject *ignored, PyObject *self)
+{
+    formatteriterobject *it;
+
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
+    if (it == NULL)
+        return NULL;
+
+    /* take ownership, give the object to the iterator */
+    it->str = Py_NewRef(self);
+
+    /* initialize the contained MarkupIterator */
+    MarkupIterator_init(&it->it_markup, (PyObject*)self, 0, PyUnicode_GET_LENGTH(self));
+    return (PyObject *)it;
+}
+
+
+/************************************************************************/
+/*********** fieldnameiterator ******************************************/
+/************************************************************************/
+
+
+/* This is used to implement string.Formatter.vparse().  It parses the
+   field name into attribute and item values.  It's a Python-callable
+   wrapper around FieldNameIterator */
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *str;
+    FieldNameIterator it_field;
+} fieldnameiterobject;
+
+static void
+fieldnameiter_dealloc(fieldnameiterobject *it)
+{
+    Py_XDECREF(it->str);
+    PyObject_Free(it);
+}
+
+/* returns a tuple:
+   (is_attr, value)
+   is_attr is true if we used attribute syntax (e.g., '.foo')
+              false if we used index syntax (e.g., '[foo]')
+   value is an integer or string
+*/
+static PyObject *
+fieldnameiter_next(fieldnameiterobject *it)
+{
+    int result;
+    int is_attr;
+    Py_ssize_t idx;
+    SubString name;
+
+    result = FieldNameIterator_next(&it->it_field, &is_attr,
+                                    &idx, &name);
+    if (result == 0 || result == 1)
+        /* if 0, error has already been set, if 1, iterator is empty */
+        return NULL;
+    else {
+        PyObject* result = NULL;
+        PyObject* is_attr_obj = NULL;
+        PyObject* obj = NULL;
+
+        is_attr_obj = PyBool_FromLong(is_attr);
+        if (is_attr_obj == NULL)
+            goto done;
+
+        /* either an integer or a string */
+        if (idx != -1)
+            obj = PyLong_FromSsize_t(idx);
+        else
+            obj = SubString_new_object(&name);
+        if (obj == NULL)
+            goto done;
+
+        /* return a tuple of values */
+        result = PyTuple_Pack(2, is_attr_obj, obj);
+
+    done:
+        Py_XDECREF(is_attr_obj);
+        Py_XDECREF(obj);
+        return result;
+    }
+}
+
+static PyMethodDef fieldnameiter_methods[] = {
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyTypeObject PyFieldNameIter_Type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "fieldnameiterator",                /* tp_name */
+    sizeof(fieldnameiterobject),        /* tp_basicsize */
+    0,                                  /* tp_itemsize */
+    /* methods */
+    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
+    0,                                  /* tp_vectorcall_offset */
+    0,                                  /* tp_getattr */
+    0,                                  /* tp_setattr */
+    0,                                  /* tp_as_async */
+    0,                                  /* tp_repr */
+    0,                                  /* tp_as_number */
+    0,                                  /* tp_as_sequence */
+    0,                                  /* tp_as_mapping */
+    0,                                  /* tp_hash */
+    0,                                  /* tp_call */
+    0,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+    0,                                  /* tp_setattro */
+    0,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+    0,                                  /* tp_doc */
+    0,                                  /* tp_traverse */
+    0,                                  /* tp_clear */
+    0,                                  /* tp_richcompare */
+    0,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
+    fieldnameiter_methods,              /* tp_methods */
+    0};
+
+/* unicode_formatter_field_name_split is used to implement
+   string.Formatter.vformat.  it takes a PEP 3101 "field name", and
+   returns a tuple of (first, rest): "first", the part before the
+   first '.' or '['; and "rest", an iterator for the rest of the field
+   name.  it's a wrapper around stringlib/string_format.h's
+   field_name_split.  The iterator it returns is a
+   FieldNameIterator */
+static PyObject *
+formatter_field_name_split(PyObject *ignored, PyObject *self)
+{
+    SubString first;
+    Py_ssize_t first_idx;
+    fieldnameiterobject *it;
+
+    PyObject *first_obj = NULL;
+    PyObject *result = NULL;
+
+    if (!PyUnicode_Check(self)) {
+        PyErr_Format(PyExc_TypeError, "expected str, got %s", Py_TYPE(self)->tp_name);
+        return NULL;
+    }
+
+    if (PyUnicode_READY(self) == -1)
+        return NULL;
+
+    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
+    if (it == NULL)
+        return NULL;
+
+    /* take ownership, give the object to the iterator.  this is
+       just to keep the field_name alive */
+    it->str = Py_NewRef(self);
+
+    /* Pass in auto_number = NULL. We'll return an empty string for
+       first_obj in that case. */
+    if (!field_name_split((PyObject*)self, 0, PyUnicode_GET_LENGTH(self),
+                          &first, &first_idx, &it->it_field, NULL))
+        goto done;
+
+    /* first becomes an integer, if possible; else a string */
+    if (first_idx != -1)
+        first_obj = PyLong_FromSsize_t(first_idx);
+    else
+        /* convert "first" into a string object */
+        first_obj = SubString_new_object(&first);
+    if (first_obj == NULL)
+        goto done;
+
+    /* return a tuple of values */
+    result = PyTuple_Pack(2, first_obj, it);
+
+done:
+    Py_XDECREF(it);
+    Py_XDECREF(first_obj);
+    return result;
+}
author	thegeorg <[email protected]>	2024-02-19 02:38:52 +0300
committer	thegeorg <[email protected]>	2024-02-19 02:50:43 +0300
commit	d96fa07134c06472bfee6718b5cfd1679196fc99 (patch)
tree	31ec344fa9d3ff8dc038692516b6438dfbdb8a2d /contrib/tools/python3/Objects/stringlib
parent	452cf9e068aef7110e35e654c5d47eb80111ef89 (diff)