Restoring authorship annotation for <[email protected]>. Commit 2 of 2.

author: shadchin <[email protected]> 2022-02-10 16:44:39 +0300
committer: Daniil Cherednik <[email protected]> 2022-02-10 16:44:39 +0300
commit: e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree: 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/python3/src/Objects/unicodeobject.c
parent: 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
1 files changed, 1690 insertions, 1690 deletions
diff --git a/contrib/tools/python3/src/Objects/unicodeobject.c b/contrib/tools/python3/src/Objects/unicodeobject.c
index 6ee20925e9a..7767d140e6c 100644
--- a/contrib/tools/python3/src/Objects/unicodeobject.c
+++ b/contrib/tools/python3/src/Objects/unicodeobject.c
@@ -40,15 +40,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
-#include "pycore_abstract.h"       // _PyIndex_Check() 
-#include "pycore_bytes_methods.h" 
-#include "pycore_fileutils.h" 
-#include "pycore_initconfig.h" 
-#include "pycore_interp.h"         // PyInterpreterState.fs_codec 
-#include "pycore_object.h" 
-#include "pycore_pathconfig.h" 
-#include "pycore_pylifecycle.h" 
-#include "pycore_pystate.h"        // _PyInterpreterState_GET() 
+#include "pycore_abstract.h"       // _PyIndex_Check()
+#include "pycore_bytes_methods.h"
+#include "pycore_fileutils.h"
+#include "pycore_initconfig.h"
+#include "pycore_interp.h"         // PyInterpreterState.fs_codec
+#include "pycore_object.h"
+#include "pycore_pathconfig.h"
+#include "pycore_pylifecycle.h"
+#include "pycore_pystate.h"        // _PyInterpreterState_GET()
 #include "ucnhash.h"
 #include "stringlib/eq.h"
 
@@ -56,15 +56,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 #include <windows.h>
 #endif
 
-#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION 
-#include "pycore_fileutils.h"     // _Py_LocaleUsesNonUnicodeWchar() 
-#endif 
- 
-/* Uncomment to display statistics on interned strings at exit when 
-   using Valgrind or Insecure++. */ 
-/* #define INTERNED_STATS 1 */ 
- 
- 
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+#include "pycore_fileutils.h"     // _Py_LocaleUsesNonUnicodeWchar()
+#endif
+
+/* Uncomment to display statistics on interned strings at exit when
+   using Valgrind or Insecure++. */
+/* #define INTERNED_STATS 1 */
+
+
 /*[clinic input]
 class str "PyObject *" "&PyUnicode_Type"
 [clinic start generated code]*/
@@ -97,8 +97,8 @@ NOTE: In the interpreter's initialization phase, some globals are currently
 extern "C" {
 #endif
 
-// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111). 
-// The value must be the same in fileutils.c. 
+// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
+// The value must be the same in fileutils.c.
 #define MAX_UNICODE 0x10ffff
 
 #ifdef Py_DEBUG
@@ -125,13 +125,13 @@ extern "C" {
          _PyUnicode_UTF8_LENGTH(op))
 #define _PyUnicode_WSTR(op)                             \
     (((PyASCIIObject*)(op))->wstr)
- 
-/* Don't use deprecated macro of unicodeobject.h */ 
-#undef PyUnicode_WSTR_LENGTH 
-#define PyUnicode_WSTR_LENGTH(op) \ 
-    (PyUnicode_IS_COMPACT_ASCII(op) ?                  \ 
-     ((PyASCIIObject*)op)->length :                    \ 
-     ((PyCompactUnicodeObject*)op)->wstr_length) 
+
+/* Don't use deprecated macro of unicodeobject.h */
+#undef PyUnicode_WSTR_LENGTH
+#define PyUnicode_WSTR_LENGTH(op) \
+    (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
+     ((PyASCIIObject*)op)->length :                    \
+     ((PyCompactUnicodeObject*)op)->wstr_length)
 #define _PyUnicode_WSTR_LENGTH(op)                      \
     (((PyCompactUnicodeObject*)(op))->wstr_length)
 #define _PyUnicode_LENGTH(op)                           \
@@ -186,8 +186,8 @@ extern "C" {
 #define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
     do {                                                \
         to_type *_to = (to_type *)(to);                \
-        const from_type *_iter = (const from_type *)(begin);\ 
-        const from_type *_end = (const from_type *)(end);\ 
+        const from_type *_iter = (const from_type *)(begin);\
+        const from_type *_end = (const from_type *)(end);\
         Py_ssize_t n = (_end) - (_iter);                \
         const from_type *_unrolled_end =                \
             _iter + _Py_SIZE_ROUND_DOWN(n, 4);          \
@@ -210,8 +210,8 @@ extern "C" {
 #  define OVERALLOCATE_FACTOR 4
 #endif
 
-#define INTERNED_STRINGS 
- 
+#define INTERNED_STRINGS
+
 /* This dictionary holds all interned unicode strings.  Note that references
    to strings in this dictionary are *not* counted in the string's ob_refcnt.
    When the interned string reaches a refcnt of 0 the string deallocation
@@ -220,9 +220,9 @@ extern "C" {
    Another way to look at this is that to say that the actual reference
    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
 */
-#ifdef INTERNED_STRINGS 
+#ifdef INTERNED_STRINGS
 static PyObject *interned = NULL;
-#endif 
+#endif
 
 /* The empty Unicode object is shared to improve performance. */
 static PyObject *unicode_empty = NULL;
@@ -246,64 +246,64 @@ static PyObject *unicode_empty = NULL;
         return unicode_empty;                           \
     } while (0)
 
-static inline void 
-unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value, 
-             Py_ssize_t start, Py_ssize_t length) 
-{ 
-    assert(0 <= start); 
-    assert(kind != PyUnicode_WCHAR_KIND); 
-    switch (kind) { 
-    case PyUnicode_1BYTE_KIND: { 
-        assert(value <= 0xff); 
-        Py_UCS1 ch = (unsigned char)value; 
-        Py_UCS1 *to = (Py_UCS1 *)data + start; 
-        memset(to, ch, length); 
-        break; 
-    } 
-    case PyUnicode_2BYTE_KIND: { 
-        assert(value <= 0xffff); 
-        Py_UCS2 ch = (Py_UCS2)value; 
-        Py_UCS2 *to = (Py_UCS2 *)data + start; 
-        const Py_UCS2 *end = to + length; 
-        for (; to < end; ++to) *to = ch; 
-        break; 
-    } 
-    case PyUnicode_4BYTE_KIND: { 
-        assert(value <= MAX_UNICODE); 
-        Py_UCS4 ch = value; 
-        Py_UCS4 * to = (Py_UCS4 *)data + start; 
-        const Py_UCS4 *end = to + length; 
-        for (; to < end; ++to) *to = ch; 
-        break; 
-    } 
-    default: Py_UNREACHABLE(); 
-    } 
-} 
+static inline void
+unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value,
+             Py_ssize_t start, Py_ssize_t length)
+{
+    assert(0 <= start);
+    assert(kind != PyUnicode_WCHAR_KIND);
+    switch (kind) {
+    case PyUnicode_1BYTE_KIND: {
+        assert(value <= 0xff);
+        Py_UCS1 ch = (unsigned char)value;
+        Py_UCS1 *to = (Py_UCS1 *)data + start;
+        memset(to, ch, length);
+        break;
+    }
+    case PyUnicode_2BYTE_KIND: {
+        assert(value <= 0xffff);
+        Py_UCS2 ch = (Py_UCS2)value;
+        Py_UCS2 *to = (Py_UCS2 *)data + start;
+        const Py_UCS2 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    case PyUnicode_4BYTE_KIND: {
+        assert(value <= MAX_UNICODE);
+        Py_UCS4 ch = value;
+        Py_UCS4 * to = (Py_UCS4 *)data + start;
+        const Py_UCS4 *end = to + length;
+        for (; to < end; ++to) *to = ch;
+        break;
+    }
+    default: Py_UNREACHABLE();
+    }
+}
 
 
 /* Forward declaration */
 static inline int
 _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
-static inline void 
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer); 
-static PyObject * 
-unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, 
-                    const char *errors); 
-static PyObject * 
-unicode_decode_utf8(const char *s, Py_ssize_t size, 
-                    _Py_error_handler error_handler, const char *errors, 
-                    Py_ssize_t *consumed); 
+static inline void
+_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+                    const char *errors);
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+                    _Py_error_handler error_handler, const char *errors,
+                    Py_ssize_t *consumed);
 
 /* List of static strings. */
 static _Py_Identifier *static_strings = NULL;
 
-#define LATIN1_SINGLETONS 
- 
-#ifdef LATIN1_SINGLETONS 
+#define LATIN1_SINGLETONS
+
+#ifdef LATIN1_SINGLETONS
 /* Single character Unicode strings in the Latin-1 range are being
    shared as well. */
 static PyObject *unicode_latin1[256] = {NULL};
-#endif 
+#endif
 
 /* Fast detection of the most frequent whitespace characters */
 const unsigned char _Py_ascii_whitespace[] = {
@@ -394,8 +394,8 @@ static int convert_uc(PyObject *obj, void *addr);
 
 #include "clinic/unicodeobject.c.h"
 
-_Py_error_handler 
-_Py_GetErrorHandler(const char *errors) 
+_Py_error_handler
+_Py_GetErrorHandler(const char *errors)
 {
     if (errors == NULL || strcmp(errors, "strict") == 0) {
         return _Py_ERROR_STRICT;
@@ -421,83 +421,83 @@ _Py_GetErrorHandler(const char *errors)
     return _Py_ERROR_OTHER;
 }
 
- 
-static _Py_error_handler 
-get_error_handler_wide(const wchar_t *errors) 
-{ 
-    if (errors == NULL || wcscmp(errors, L"strict") == 0) { 
-        return _Py_ERROR_STRICT; 
-    } 
-    if (wcscmp(errors, L"surrogateescape") == 0) { 
-        return _Py_ERROR_SURROGATEESCAPE; 
-    } 
-    if (wcscmp(errors, L"replace") == 0) { 
-        return _Py_ERROR_REPLACE; 
-    } 
-    if (wcscmp(errors, L"ignore") == 0) { 
-        return _Py_ERROR_IGNORE; 
-    } 
-    if (wcscmp(errors, L"backslashreplace") == 0) { 
-        return _Py_ERROR_BACKSLASHREPLACE; 
-    } 
-    if (wcscmp(errors, L"surrogatepass") == 0) { 
-        return _Py_ERROR_SURROGATEPASS; 
-    } 
-    if (wcscmp(errors, L"xmlcharrefreplace") == 0) { 
-        return _Py_ERROR_XMLCHARREFREPLACE; 
-    } 
-    return _Py_ERROR_OTHER; 
-} 
- 
- 
-static inline int 
-unicode_check_encoding_errors(const char *encoding, const char *errors) 
-{ 
-    if (encoding == NULL && errors == NULL) { 
-        return 0; 
-    } 
- 
-    PyInterpreterState *interp = _PyInterpreterState_GET(); 
-#ifndef Py_DEBUG 
-    /* In release mode, only check in development mode (-X dev) */ 
-    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { 
-        return 0; 
-    } 
-#else 
-    /* Always check in debug mode */ 
-#endif 
- 
-    /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the 
-       codec registry is ready: before_PyUnicode_InitEncodings() is called. */ 
-    if (!interp->unicode.fs_codec.encoding) { 
-        return 0; 
-    } 
- 
-    /* Disable checks during Python finalization. For example, it allows to 
-       call _PyObject_Dump() during finalization for debugging purpose. */ 
-    if (interp->finalizing) { 
-        return 0; 
-    } 
- 
-    if (encoding != NULL) { 
-        PyObject *handler = _PyCodec_Lookup(encoding); 
-        if (handler == NULL) { 
-            return -1; 
-        } 
-        Py_DECREF(handler); 
-    } 
- 
-    if (errors != NULL) { 
-        PyObject *handler = PyCodec_LookupError(errors); 
-        if (handler == NULL) { 
-            return -1; 
-        } 
-        Py_DECREF(handler); 
-    } 
-    return 0; 
-} 
- 
- 
+
+static _Py_error_handler
+get_error_handler_wide(const wchar_t *errors)
+{
+    if (errors == NULL || wcscmp(errors, L"strict") == 0) {
+        return _Py_ERROR_STRICT;
+    }
+    if (wcscmp(errors, L"surrogateescape") == 0) {
+        return _Py_ERROR_SURROGATEESCAPE;
+    }
+    if (wcscmp(errors, L"replace") == 0) {
+        return _Py_ERROR_REPLACE;
+    }
+    if (wcscmp(errors, L"ignore") == 0) {
+        return _Py_ERROR_IGNORE;
+    }
+    if (wcscmp(errors, L"backslashreplace") == 0) {
+        return _Py_ERROR_BACKSLASHREPLACE;
+    }
+    if (wcscmp(errors, L"surrogatepass") == 0) {
+        return _Py_ERROR_SURROGATEPASS;
+    }
+    if (wcscmp(errors, L"xmlcharrefreplace") == 0) {
+        return _Py_ERROR_XMLCHARREFREPLACE;
+    }
+    return _Py_ERROR_OTHER;
+}
+
+
+static inline int
+unicode_check_encoding_errors(const char *encoding, const char *errors)
+{
+    if (encoding == NULL && errors == NULL) {
+        return 0;
+    }
+
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+#ifndef Py_DEBUG
+    /* In release mode, only check in development mode (-X dev) */
+    if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
+        return 0;
+    }
+#else
+    /* Always check in debug mode */
+#endif
+
+    /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
+       codec registry is ready: before_PyUnicode_InitEncodings() is called. */
+    if (!interp->unicode.fs_codec.encoding) {
+        return 0;
+    }
+
+    /* Disable checks during Python finalization. For example, it allows to
+       call _PyObject_Dump() during finalization for debugging purpose. */
+    if (interp->finalizing) {
+        return 0;
+    }
+
+    if (encoding != NULL) {
+        PyObject *handler = _PyCodec_Lookup(encoding);
+        if (handler == NULL) {
+            return -1;
+        }
+        Py_DECREF(handler);
+    }
+
+    if (errors != NULL) {
+        PyObject *handler = PyCodec_LookupError(errors);
+        if (handler == NULL) {
+            return -1;
+        }
+        Py_DECREF(handler);
+    }
+    return 0;
+}
+
+
 /* The max unicode value is always 0x10FFFF while using the PEP-393 API.
    This function is kept for backward compatibility with the old API. */
 Py_UNICODE
@@ -515,21 +515,21 @@ PyUnicode_GetMax(void)
 int
 _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 {
-#define CHECK(expr) \ 
-    do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0) 
- 
+#define CHECK(expr) \
+    do { if (!(expr)) { _PyObject_ASSERT_FAILED_MSG(op, Py_STRINGIFY(expr)); } } while (0)
+
     PyASCIIObject *ascii;
     unsigned int kind;
 
-    assert(op != NULL); 
-    CHECK(PyUnicode_Check(op)); 
+    assert(op != NULL);
+    CHECK(PyUnicode_Check(op));
 
     ascii = (PyASCIIObject *)op;
     kind = ascii->state.kind;
 
     if (ascii->state.ascii == 1 && ascii->state.compact == 1) {
-        CHECK(kind == PyUnicode_1BYTE_KIND); 
-        CHECK(ascii->state.ready == 1); 
+        CHECK(kind == PyUnicode_1BYTE_KIND);
+        CHECK(ascii->state.ready == 1);
     }
     else {
         PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
@@ -537,41 +537,41 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 
         if (ascii->state.compact == 1) {
             data = compact + 1;
-            CHECK(kind == PyUnicode_1BYTE_KIND 
-                                 || kind == PyUnicode_2BYTE_KIND 
-                                 || kind == PyUnicode_4BYTE_KIND); 
-            CHECK(ascii->state.ascii == 0); 
-            CHECK(ascii->state.ready == 1); 
-            CHECK(compact->utf8 != data); 
+            CHECK(kind == PyUnicode_1BYTE_KIND
+                                 || kind == PyUnicode_2BYTE_KIND
+                                 || kind == PyUnicode_4BYTE_KIND);
+            CHECK(ascii->state.ascii == 0);
+            CHECK(ascii->state.ready == 1);
+            CHECK(compact->utf8 != data);
         }
         else {
             PyUnicodeObject *unicode = (PyUnicodeObject *)op;
 
             data = unicode->data.any;
             if (kind == PyUnicode_WCHAR_KIND) {
-                CHECK(ascii->length == 0); 
-                CHECK(ascii->hash == -1); 
-                CHECK(ascii->state.compact == 0); 
-                CHECK(ascii->state.ascii == 0); 
-                CHECK(ascii->state.ready == 0); 
-                CHECK(ascii->state.interned == SSTATE_NOT_INTERNED); 
-                CHECK(ascii->wstr != NULL); 
-                CHECK(data == NULL); 
-                CHECK(compact->utf8 == NULL); 
+                CHECK(ascii->length == 0);
+                CHECK(ascii->hash == -1);
+                CHECK(ascii->state.compact == 0);
+                CHECK(ascii->state.ascii == 0);
+                CHECK(ascii->state.ready == 0);
+                CHECK(ascii->state.interned == SSTATE_NOT_INTERNED);
+                CHECK(ascii->wstr != NULL);
+                CHECK(data == NULL);
+                CHECK(compact->utf8 == NULL);
             }
             else {
-                CHECK(kind == PyUnicode_1BYTE_KIND 
-                                     || kind == PyUnicode_2BYTE_KIND 
-                                     || kind == PyUnicode_4BYTE_KIND); 
-                CHECK(ascii->state.compact == 0); 
-                CHECK(ascii->state.ready == 1); 
-                CHECK(data != NULL); 
+                CHECK(kind == PyUnicode_1BYTE_KIND
+                                     || kind == PyUnicode_2BYTE_KIND
+                                     || kind == PyUnicode_4BYTE_KIND);
+                CHECK(ascii->state.compact == 0);
+                CHECK(ascii->state.ready == 1);
+                CHECK(data != NULL);
                 if (ascii->state.ascii) {
-                    CHECK(compact->utf8 == data); 
-                    CHECK(compact->utf8_length == ascii->length); 
+                    CHECK(compact->utf8 == data);
+                    CHECK(compact->utf8_length == ascii->length);
                 }
                 else
-                    CHECK(compact->utf8 != data); 
+                    CHECK(compact->utf8 != data);
             }
         }
         if (kind != PyUnicode_WCHAR_KIND) {
@@ -583,23 +583,23 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
 #endif
                )
             {
-                CHECK(ascii->wstr == data); 
-                CHECK(compact->wstr_length == ascii->length); 
+                CHECK(ascii->wstr == data);
+                CHECK(compact->wstr_length == ascii->length);
             } else
-                CHECK(ascii->wstr != data); 
+                CHECK(ascii->wstr != data);
         }
 
         if (compact->utf8 == NULL)
-            CHECK(compact->utf8_length == 0); 
+            CHECK(compact->utf8_length == 0);
         if (ascii->wstr == NULL)
-            CHECK(compact->wstr_length == 0); 
+            CHECK(compact->wstr_length == 0);
     }
- 
-    /* check that the best kind is used: O(n) operation */ 
-    if (check_content && kind != PyUnicode_WCHAR_KIND) { 
+
+    /* check that the best kind is used: O(n) operation */
+    if (check_content && kind != PyUnicode_WCHAR_KIND) {
         Py_ssize_t i;
         Py_UCS4 maxchar = 0;
-        const void *data; 
+        const void *data;
         Py_UCS4 ch;
 
         data = PyUnicode_DATA(ascii);
@@ -611,28 +611,28 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
         }
         if (kind == PyUnicode_1BYTE_KIND) {
             if (ascii->state.ascii == 0) {
-                CHECK(maxchar >= 128); 
-                CHECK(maxchar <= 255); 
+                CHECK(maxchar >= 128);
+                CHECK(maxchar <= 255);
             }
             else
-                CHECK(maxchar < 128); 
+                CHECK(maxchar < 128);
         }
         else if (kind == PyUnicode_2BYTE_KIND) {
-            CHECK(maxchar >= 0x100); 
-            CHECK(maxchar <= 0xFFFF); 
+            CHECK(maxchar >= 0x100);
+            CHECK(maxchar <= 0xFFFF);
         }
         else {
-            CHECK(maxchar >= 0x10000); 
-            CHECK(maxchar <= MAX_UNICODE); 
+            CHECK(maxchar >= 0x10000);
+            CHECK(maxchar <= MAX_UNICODE);
         }
-        CHECK(PyUnicode_READ(kind, data, ascii->length) == 0); 
+        CHECK(PyUnicode_READ(kind, data, ascii->length) == 0);
     }
     return 1;
- 
-#undef CHECK 
+
+#undef CHECK
 }
 
- 
+
 static PyObject*
 unicode_result_wchar(PyObject *unicode)
 {
@@ -682,9 +682,9 @@ unicode_result_ready(PyObject *unicode)
         return unicode_empty;
     }
 
-#ifdef LATIN1_SINGLETONS 
+#ifdef LATIN1_SINGLETONS
     if (length == 1) {
-        const void *data = PyUnicode_DATA(unicode); 
+        const void *data = PyUnicode_DATA(unicode);
         int kind = PyUnicode_KIND(unicode);
         Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
         if (ch < 256) {
@@ -704,7 +704,7 @@ unicode_result_ready(PyObject *unicode)
             }
         }
     }
-#endif 
+#endif
 
     assert(_PyUnicode_CheckConsistency(unicode, 1));
     return unicode;
@@ -743,7 +743,7 @@ backslashreplace(_PyBytesWriter *writer, char *str,
     Py_ssize_t size, i;
     Py_UCS4 ch;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
 
     assert(PyUnicode_IS_READY(unicode));
     kind = PyUnicode_KIND(unicode);
@@ -810,7 +810,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
     Py_ssize_t size, i;
     Py_UCS4 ch;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
 
     assert(PyUnicode_IS_READY(unicode));
     kind = PyUnicode_KIND(unicode);
@@ -852,11 +852,11 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str,
 
     /* generate replacement */
     for (i = collstart; i < collend; ++i) {
-        size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i)); 
-        if (size < 0) { 
-            return NULL; 
-        } 
-        str += size; 
+        size = sprintf(str, "&#%d;", PyUnicode_READ(kind, data, i));
+        if (size < 0) {
+            return NULL;
+        }
+        str += size;
     }
     return str;
 }
@@ -890,7 +890,7 @@ static BLOOM_MASK bloom_linebreak = ~(BLOOM_MASK)0;
      (BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
 
 static inline BLOOM_MASK
-make_bloom_mask(int kind, const void* ptr, Py_ssize_t len) 
+make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
 {
 #define BLOOM_UPDATE(TYPE, MASK, PTR, LEN)             \
     do {                                               \
@@ -980,14 +980,14 @@ ensure_unicode(PyObject *obj)
 #include "stringlib/find_max_char.h"
 #include "stringlib/undef.h"
 
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
 #include "stringlib/unicodedefs.h"
 #include "stringlib/fastsearch.h"
 #include "stringlib/count.h"
 #include "stringlib/find.h"
 #include "stringlib/undef.h"
-_Py_COMP_DIAG_POP 
+_Py_COMP_DIAG_POP
 
 /* --- Unicode Object ----------------------------------------------------- */
 
@@ -1001,21 +1001,21 @@ findchar(const void *s, int kind,
         if ((Py_UCS1) ch != ch)
             return -1;
         if (direction > 0)
-            return ucs1lib_find_char((const Py_UCS1 *) s, size, (Py_UCS1) ch); 
+            return ucs1lib_find_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
         else
-            return ucs1lib_rfind_char((const Py_UCS1 *) s, size, (Py_UCS1) ch); 
+            return ucs1lib_rfind_char((const Py_UCS1 *) s, size, (Py_UCS1) ch);
     case PyUnicode_2BYTE_KIND:
         if ((Py_UCS2) ch != ch)
             return -1;
         if (direction > 0)
-            return ucs2lib_find_char((const Py_UCS2 *) s, size, (Py_UCS2) ch); 
+            return ucs2lib_find_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
         else
-            return ucs2lib_rfind_char((const Py_UCS2 *) s, size, (Py_UCS2) ch); 
+            return ucs2lib_rfind_char((const Py_UCS2 *) s, size, (Py_UCS2) ch);
     case PyUnicode_4BYTE_KIND:
         if (direction > 0)
-            return ucs4lib_find_char((const Py_UCS4 *) s, size, ch); 
+            return ucs4lib_find_char((const Py_UCS4 *) s, size, ch);
         else
-            return ucs4lib_rfind_char((const Py_UCS4 *) s, size, ch); 
+            return ucs4lib_rfind_char((const Py_UCS4 *) s, size, ch);
     default:
         Py_UNREACHABLE();
     }
@@ -1074,12 +1074,12 @@ resize_compact(PyObject *unicode, Py_ssize_t length)
         _PyUnicode_UTF8(unicode) = NULL;
         _PyUnicode_UTF8_LENGTH(unicode) = 0;
     }
-#ifdef Py_REF_DEBUG 
-    _Py_RefTotal--; 
-#endif 
-#ifdef Py_TRACE_REFS 
+#ifdef Py_REF_DEBUG
+    _Py_RefTotal--;
+#endif
+#ifdef Py_TRACE_REFS
     _Py_ForgetReference(unicode);
-#endif 
+#endif
 
     new_unicode = (PyObject *)PyObject_REALLOC(unicode, new_size);
     if (new_unicode == NULL) {
@@ -1332,18 +1332,18 @@ unicode_kind_name(PyObject *unicode)
 
 #ifdef Py_DEBUG
 /* Functions wrapping macros for use in debugger */
-const char *_PyUnicode_utf8(void *unicode_raw){ 
-    PyObject *unicode = _PyObject_CAST(unicode_raw); 
+const char *_PyUnicode_utf8(void *unicode_raw){
+    PyObject *unicode = _PyObject_CAST(unicode_raw);
     return PyUnicode_UTF8(unicode);
 }
 
-const void *_PyUnicode_compact_data(void *unicode_raw) { 
-    PyObject *unicode = _PyObject_CAST(unicode_raw); 
+const void *_PyUnicode_compact_data(void *unicode_raw) {
+    PyObject *unicode = _PyObject_CAST(unicode_raw);
     return _PyUnicode_COMPACT_DATA(unicode);
 }
-const void *_PyUnicode_data(void *unicode_raw) { 
-    PyObject *unicode = _PyObject_CAST(unicode_raw); 
-    printf("obj %p\n", (void*)unicode); 
+const void *_PyUnicode_data(void *unicode_raw) {
+    PyObject *unicode = _PyObject_CAST(unicode_raw);
+    printf("obj %p\n", (void*)unicode);
     printf("compact %d\n", PyUnicode_IS_COMPACT(unicode));
     printf("compact ascii %d\n", PyUnicode_IS_COMPACT_ASCII(unicode));
     printf("ascii op %p\n", ((void*)((PyASCIIObject*)(unicode) + 1)));
@@ -1358,7 +1358,7 @@ _PyUnicode_Dump(PyObject *op)
     PyASCIIObject *ascii = (PyASCIIObject *)op;
     PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
     PyUnicodeObject *unicode = (PyUnicodeObject *)op;
-    const void *data; 
+    const void *data;
 
     if (ascii->state.compact)
     {
@@ -1374,14 +1374,14 @@ _PyUnicode_Dump(PyObject *op)
 
     if (ascii->wstr == data)
         printf("shared ");
-    printf("wstr=%p", (void *)ascii->wstr); 
+    printf("wstr=%p", (void *)ascii->wstr);
 
     if (!(ascii->state.ascii == 1 && ascii->state.compact == 1)) {
         printf(" (%" PY_FORMAT_SIZE_T "u), ", compact->wstr_length);
         if (!ascii->state.compact && compact->utf8 == unicode->data.any)
             printf("shared ");
         printf("utf8=%p (%" PY_FORMAT_SIZE_T "u)",
-               (void *)compact->utf8, compact->utf8_length); 
+               (void *)compact->utf8, compact->utf8_length);
     }
     printf(", data=%p\n", data);
 }
@@ -1558,8 +1558,8 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
                  Py_ssize_t how_many, int check_maxchar)
 {
     unsigned int from_kind, to_kind;
-    const void *from_data; 
-    void *to_data; 
+    const void *from_data;
+    void *to_data;
 
     assert(0 <= how_many);
     assert(0 <= from_start);
@@ -1584,7 +1584,7 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
     if (!check_maxchar
         && PyUnicode_MAX_CHAR_VALUE(from) > PyUnicode_MAX_CHAR_VALUE(to))
     {
-        Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to); 
+        Py_UCS4 to_maxchar = PyUnicode_MAX_CHAR_VALUE(to);
         Py_UCS4 ch;
         Py_ssize_t i;
         for (i=0; i < how_many; i++) {
@@ -1602,12 +1602,12 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
                check that all written characters are pure ASCII */
             Py_UCS4 max_char;
             max_char = ucs1lib_find_max_char(from_data,
-                                             (const Py_UCS1*)from_data + how_many); 
+                                             (const Py_UCS1*)from_data + how_many);
             if (max_char >= 128)
                 return -1;
         }
         memcpy((char*)to_data + to_kind * to_start,
-                  (const char*)from_data + from_kind * from_start, 
+                  (const char*)from_data + from_kind * from_start,
                   to_kind * how_many);
     }
     else if (from_kind == PyUnicode_1BYTE_KIND
@@ -1794,8 +1794,8 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
             *maxchar = ch;
             if (*maxchar > MAX_UNICODE) {
                 PyErr_Format(PyExc_ValueError,
-                             "character U+%x is not in range [U+0000; U+%x]", 
-                             ch, MAX_UNICODE); 
+                             "character U+%x is not in range [U+0000; U+%x]",
+                             ch, MAX_UNICODE);
                 return -1;
             }
         }
@@ -1891,7 +1891,7 @@ _PyUnicode_Ready(PyObject *unicode)
         _PyUnicode_WSTR_LENGTH(unicode) = 0;
 #endif
     }
-    /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */ 
+    /* maxchar exceeds 16 bit, wee need 4 bytes for unicode characters */
     else {
 #if SIZEOF_WCHAR_T == 2
         /* in case the native representation is 2-bytes, we need to allocate a
@@ -1941,32 +1941,32 @@ unicode_dealloc(PyObject *unicode)
 
     case SSTATE_INTERNED_MORTAL:
         /* revive dead object temporarily for DelItem */
-        Py_SET_REFCNT(unicode, 3); 
-#ifdef INTERNED_STRINGS 
-        if (PyDict_DelItem(interned, unicode) != 0) { 
-            _PyErr_WriteUnraisableMsg("deletion of interned string failed", 
-                                      NULL); 
-        } 
-#endif 
+        Py_SET_REFCNT(unicode, 3);
+#ifdef INTERNED_STRINGS
+        if (PyDict_DelItem(interned, unicode) != 0) {
+            _PyErr_WriteUnraisableMsg("deletion of interned string failed",
+                                      NULL);
+        }
+#endif
         break;
 
     case SSTATE_INTERNED_IMMORTAL:
-        _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died"); 
-        break; 
+        _PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
+        break;
 
     default:
-        Py_UNREACHABLE(); 
+        Py_UNREACHABLE();
     }
 
-    if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) { 
+    if (_PyUnicode_HAS_WSTR_MEMORY(unicode)) {
         PyObject_DEL(_PyUnicode_WSTR(unicode));
-    } 
-    if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { 
+    }
+    if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) {
         PyObject_DEL(_PyUnicode_UTF8(unicode));
-    } 
-    if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) { 
+    }
+    if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) {
         PyObject_DEL(_PyUnicode_DATA_ANY(unicode));
-    } 
+    }
 
     Py_TYPE(unicode)->tp_free(unicode);
 }
@@ -1975,10 +1975,10 @@ unicode_dealloc(PyObject *unicode)
 static int
 unicode_is_singleton(PyObject *unicode)
 {
-    if (unicode == unicode_empty) { 
-        return 1; 
-    } 
-#ifdef LATIN1_SINGLETONS 
+    if (unicode == unicode_empty) {
+        return 1;
+    }
+#ifdef LATIN1_SINGLETONS
     PyASCIIObject *ascii = (PyASCIIObject *)unicode;
     if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
     {
@@ -1986,7 +1986,7 @@ unicode_is_singleton(PyObject *unicode)
         if (ch < 256 && unicode_latin1[ch] == unicode)
             return 1;
     }
-#endif 
+#endif
     return 0;
 }
 #endif
@@ -2083,10 +2083,10 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
                    const char *str, Py_ssize_t len)
 {
     enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
-    const void *data = PyUnicode_DATA(unicode); 
+    const void *data = PyUnicode_DATA(unicode);
     const char *end = str + len;
 
-    assert(index + len <= PyUnicode_GET_LENGTH(unicode)); 
+    assert(index + len <= PyUnicode_GET_LENGTH(unicode));
     switch (kind) {
     case PyUnicode_1BYTE_KIND: {
 #ifdef Py_DEBUG
@@ -2110,7 +2110,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
         assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
         break;
     }
-    case PyUnicode_4BYTE_KIND: { 
+    case PyUnicode_4BYTE_KIND: {
         Py_UCS4 *start = (Py_UCS4 *)data + index;
         Py_UCS4 *ucs4 = start;
 
@@ -2118,38 +2118,38 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
             *ucs4 = (Py_UCS4)*str;
 
         assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
-        break; 
+        break;
     }
-    default: 
-        Py_UNREACHABLE(); 
+    default:
+        Py_UNREACHABLE();
     }
 }
 
 static PyObject*
 get_latin1_char(unsigned char ch)
 {
-    PyObject *unicode; 
- 
-#ifdef LATIN1_SINGLETONS 
-    unicode = unicode_latin1[ch]; 
-    if (unicode) { 
-        Py_INCREF(unicode); 
-        return unicode; 
-    } 
-#endif 
- 
-    unicode = PyUnicode_New(1, ch); 
+    PyObject *unicode;
+
+#ifdef LATIN1_SINGLETONS
+    unicode = unicode_latin1[ch];
+    if (unicode) {
+        Py_INCREF(unicode);
+        return unicode;
+    }
+#endif
+
+    unicode = PyUnicode_New(1, ch);
     if (!unicode) {
-        return NULL; 
+        return NULL;
     }
- 
-    PyUnicode_1BYTE_DATA(unicode)[0] = ch; 
-    assert(_PyUnicode_CheckConsistency(unicode, 1)); 
- 
-#ifdef LATIN1_SINGLETONS 
+
+    PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+
+#ifdef LATIN1_SINGLETONS
     Py_INCREF(unicode);
-    unicode_latin1[ch] = unicode; 
-#endif 
+    unicode_latin1[ch] = unicode;
+#endif
     return unicode;
 }
 
@@ -2215,20 +2215,20 @@ PyUnicode_FromWideChar(const wchar_t *u, Py_ssize_t size)
     if (size == 0)
         _Py_RETURN_UNICODE_EMPTY();
 
-#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION 
-    /* Oracle Solaris uses non-Unicode internal wchar_t form for 
-       non-Unicode locales and hence needs conversion to UCS-4 first. */ 
-    if (_Py_LocaleUsesNonUnicodeWchar()) { 
-        wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size); 
-        if (!converted) { 
-            return NULL; 
-        } 
-        PyObject *unicode = _PyUnicode_FromUCS4(converted, size); 
-        PyMem_Free(converted); 
-        return unicode; 
-    } 
-#endif 
- 
+#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+    /* Oracle Solaris uses non-Unicode internal wchar_t form for
+       non-Unicode locales and hence needs conversion to UCS-4 first. */
+    if (_Py_LocaleUsesNonUnicodeWchar()) {
+        wchar_t* converted = _Py_DecodeNonUnicodeWchar(u, size);
+        if (!converted) {
+            return NULL;
+        }
+        PyObject *unicode = _PyUnicode_FromUCS4(converted, size);
+        PyMem_Free(converted);
+        return unicode;
+    }
+#endif
+
     /* Single character Unicode objects in the Latin-1 range are
        shared when using this constructor */
     if (size == 1 && (Py_UCS4)*u < 256)
@@ -2316,8 +2316,8 @@ _PyUnicode_FromId(_Py_Identifier *id)
     return id->object;
 }
 
-static void 
-unicode_clear_static_strings(void) 
+static void
+unicode_clear_static_strings(void)
 {
     _Py_Identifier *tmp, *s = static_strings;
     while (s) {
@@ -2464,7 +2464,7 @@ Py_UCS4
 _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
 {
     enum PyUnicode_Kind kind;
-    const void *startptr, *endptr; 
+    const void *startptr, *endptr;
 
     assert(PyUnicode_IS_READY(unicode));
     assert(0 <= start);
@@ -2527,15 +2527,15 @@ unicode_adjust_maxchar(PyObject **p_unicode)
         if (max_char >= 256)
             return;
     }
-    else if (kind == PyUnicode_4BYTE_KIND) { 
+    else if (kind == PyUnicode_4BYTE_KIND) {
         const Py_UCS4 *u = PyUnicode_4BYTE_DATA(unicode);
         max_char = ucs4lib_find_max_char(u, u + len);
         if (max_char >= 0x10000)
             return;
     }
-    else 
-        Py_UNREACHABLE(); 
- 
+    else
+        Py_UNREACHABLE();
+
     copy = PyUnicode_New(len, max_char);
     if (copy != NULL)
         _PyUnicode_FastCopyCharacters(copy, 0, unicode, 0, len);
@@ -2572,12 +2572,12 @@ _PyUnicode_Copy(PyObject *unicode)
 /* Widen Unicode objects to larger buffers. Don't write terminating null
    character. Return NULL on error. */
 
-static void* 
-unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind) 
+static void*
+unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind)
 {
     void *result;
 
-    assert(skind < kind); 
+    assert(skind < kind);
     switch (kind) {
     case PyUnicode_2BYTE_KIND:
         result = PyMem_New(Py_UCS2, len);
@@ -2586,8 +2586,8 @@ unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned in
         assert(skind == PyUnicode_1BYTE_KIND);
         _PyUnicode_CONVERT_BYTES(
             Py_UCS1, Py_UCS2,
-            (const Py_UCS1 *)data, 
-            ((const Py_UCS1 *)data) + len, 
+            (const Py_UCS1 *)data,
+            ((const Py_UCS1 *)data) + len,
             result);
         return result;
     case PyUnicode_4BYTE_KIND:
@@ -2597,22 +2597,22 @@ unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned in
         if (skind == PyUnicode_2BYTE_KIND) {
             _PyUnicode_CONVERT_BYTES(
                 Py_UCS2, Py_UCS4,
-                (const Py_UCS2 *)data, 
-                ((const Py_UCS2 *)data) + len, 
+                (const Py_UCS2 *)data,
+                ((const Py_UCS2 *)data) + len,
                 result);
         }
         else {
             assert(skind == PyUnicode_1BYTE_KIND);
             _PyUnicode_CONVERT_BYTES(
                 Py_UCS1, Py_UCS4,
-                (const Py_UCS1 *)data, 
-                ((const Py_UCS1 *)data) + len, 
+                (const Py_UCS1 *)data,
+                ((const Py_UCS1 *)data) + len,
                 result);
         }
         return result;
     default:
-        Py_UNREACHABLE(); 
-        return NULL; 
+        Py_UNREACHABLE();
+        return NULL;
     }
 }
 
@@ -2621,7 +2621,7 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
         int copy_null)
 {
     int kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t len, targetlen;
     if (PyUnicode_READY(string) == -1)
         return NULL;
@@ -2648,19 +2648,19 @@ as_ucs4(PyObject *string, Py_UCS4 *target, Py_ssize_t targetsize,
         }
     }
     if (kind == PyUnicode_1BYTE_KIND) {
-        const Py_UCS1 *start = (const Py_UCS1 *) data; 
+        const Py_UCS1 *start = (const Py_UCS1 *) data;
         _PyUnicode_CONVERT_BYTES(Py_UCS1, Py_UCS4, start, start + len, target);
     }
     else if (kind == PyUnicode_2BYTE_KIND) {
-        const Py_UCS2 *start = (const Py_UCS2 *) data; 
+        const Py_UCS2 *start = (const Py_UCS2 *) data;
         _PyUnicode_CONVERT_BYTES(Py_UCS2, Py_UCS4, start, start + len, target);
     }
-    else if (kind == PyUnicode_4BYTE_KIND) { 
+    else if (kind == PyUnicode_4BYTE_KIND) {
         memcpy(target, data, len * sizeof(Py_UCS4));
     }
-    else { 
-        Py_UNREACHABLE(); 
-    } 
+    else {
+        Py_UNREACHABLE();
+    }
     if (copy_null)
         target[len] = 0;
     return target;
@@ -3126,83 +3126,83 @@ PyUnicode_FromFormat(const char *format, ...)
     return ret;
 }
 
-static Py_ssize_t 
-unicode_get_widechar_size(PyObject *unicode) 
-{ 
-    Py_ssize_t res; 
- 
-    assert(unicode != NULL); 
-    assert(_PyUnicode_CHECK(unicode)); 
- 
-    if (_PyUnicode_WSTR(unicode) != NULL) { 
-        return PyUnicode_WSTR_LENGTH(unicode); 
-    } 
-    assert(PyUnicode_IS_READY(unicode)); 
- 
-    res = _PyUnicode_LENGTH(unicode); 
-#if SIZEOF_WCHAR_T == 2 
-    if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) { 
-        const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode); 
-        const Py_UCS4 *end = s + res; 
-        for (; s < end; ++s) { 
-            if (*s > 0xFFFF) { 
-                ++res; 
-            } 
-        } 
-    } 
-#endif 
-    return res; 
-} 
- 
-static void 
-unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size) 
-{ 
-    const wchar_t *wstr; 
- 
-    assert(unicode != NULL); 
-    assert(_PyUnicode_CHECK(unicode)); 
- 
-    wstr = _PyUnicode_WSTR(unicode); 
-    if (wstr != NULL) { 
-        memcpy(w, wstr, size * sizeof(wchar_t)); 
-        return; 
-    } 
-    assert(PyUnicode_IS_READY(unicode)); 
- 
-    if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) { 
-        const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode); 
-        for (; size--; ++s, ++w) { 
-            *w = *s; 
-        } 
-    } 
-    else { 
-#if SIZEOF_WCHAR_T == 4 
-        assert(PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND); 
-        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(unicode); 
-        for (; size--; ++s, ++w) { 
-            *w = *s; 
-        } 
-#else 
-        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND); 
-        const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode); 
-        for (; size--; ++s, ++w) { 
-            Py_UCS4 ch = *s; 
-            if (ch > 0xFFFF) { 
-                assert(ch <= MAX_UNICODE); 
-                /* encode surrogate pair in this case */ 
-                *w++ = Py_UNICODE_HIGH_SURROGATE(ch); 
-                if (!size--) 
-                    break; 
-                *w = Py_UNICODE_LOW_SURROGATE(ch); 
-            } 
-            else { 
-                *w = ch; 
-            } 
-        } 
-#endif 
-    } 
-} 
- 
+static Py_ssize_t
+unicode_get_widechar_size(PyObject *unicode)
+{
+    Py_ssize_t res;
+
+    assert(unicode != NULL);
+    assert(_PyUnicode_CHECK(unicode));
+
+    if (_PyUnicode_WSTR(unicode) != NULL) {
+        return PyUnicode_WSTR_LENGTH(unicode);
+    }
+    assert(PyUnicode_IS_READY(unicode));
+
+    res = _PyUnicode_LENGTH(unicode);
+#if SIZEOF_WCHAR_T == 2
+    if (PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND) {
+        const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
+        const Py_UCS4 *end = s + res;
+        for (; s < end; ++s) {
+            if (*s > 0xFFFF) {
+                ++res;
+            }
+        }
+    }
+#endif
+    return res;
+}
+
+static void
+unicode_copy_as_widechar(PyObject *unicode, wchar_t *w, Py_ssize_t size)
+{
+    const wchar_t *wstr;
+
+    assert(unicode != NULL);
+    assert(_PyUnicode_CHECK(unicode));
+
+    wstr = _PyUnicode_WSTR(unicode);
+    if (wstr != NULL) {
+        memcpy(w, wstr, size * sizeof(wchar_t));
+        return;
+    }
+    assert(PyUnicode_IS_READY(unicode));
+
+    if (PyUnicode_KIND(unicode) == PyUnicode_1BYTE_KIND) {
+        const Py_UCS1 *s = PyUnicode_1BYTE_DATA(unicode);
+        for (; size--; ++s, ++w) {
+            *w = *s;
+        }
+    }
+    else {
+#if SIZEOF_WCHAR_T == 4
+        assert(PyUnicode_KIND(unicode) == PyUnicode_2BYTE_KIND);
+        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(unicode);
+        for (; size--; ++s, ++w) {
+            *w = *s;
+        }
+#else
+        assert(PyUnicode_KIND(unicode) == PyUnicode_4BYTE_KIND);
+        const Py_UCS4 *s = PyUnicode_4BYTE_DATA(unicode);
+        for (; size--; ++s, ++w) {
+            Py_UCS4 ch = *s;
+            if (ch > 0xFFFF) {
+                assert(ch <= MAX_UNICODE);
+                /* encode surrogate pair in this case */
+                *w++ = Py_UNICODE_HIGH_SURROGATE(ch);
+                if (!size--)
+                    break;
+                *w = Py_UNICODE_LOW_SURROGATE(ch);
+            }
+            else {
+                *w = ch;
+            }
+        }
+#endif
+    }
+}
+
 #ifdef HAVE_WCHAR_H
 
 /* Convert a Unicode object to a wide character string.
@@ -3224,35 +3224,35 @@ PyUnicode_AsWideChar(PyObject *unicode,
         PyErr_BadInternalCall();
         return -1;
     }
-    if (!PyUnicode_Check(unicode)) { 
-        PyErr_BadArgument(); 
+    if (!PyUnicode_Check(unicode)) {
+        PyErr_BadArgument();
         return -1;
-    } 
+    }
+
+    res = unicode_get_widechar_size(unicode);
+    if (w == NULL) {
+        return res + 1;
+    }
 
-    res = unicode_get_widechar_size(unicode); 
-    if (w == NULL) { 
-        return res + 1; 
+    if (size > res) {
+        size = res + 1;
     }
- 
-    if (size > res) { 
-        size = res + 1; 
-    } 
-    else { 
-        res = size; 
-    } 
-    unicode_copy_as_widechar(unicode, w, size); 
- 
-#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION 
-    /* Oracle Solaris uses non-Unicode internal wchar_t form for 
-       non-Unicode locales and hence needs conversion first. */ 
-    if (_Py_LocaleUsesNonUnicodeWchar()) { 
-        if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) { 
-            return -1; 
-        } 
-    } 
-#endif 
- 
-    return res; 
+    else {
+        res = size;
+    }
+    unicode_copy_as_widechar(unicode, w, size);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+    /* Oracle Solaris uses non-Unicode internal wchar_t form for
+       non-Unicode locales and hence needs conversion first. */
+    if (_Py_LocaleUsesNonUnicodeWchar()) {
+        if (_Py_EncodeNonUnicodeWchar_InPlace(w, size) < 0) {
+            return -1;
+        }
+    }
+#endif
+
+    return res;
 }
 
 wchar_t*
@@ -3266,38 +3266,38 @@ PyUnicode_AsWideCharString(PyObject *unicode,
         PyErr_BadInternalCall();
         return NULL;
     }
-    if (!PyUnicode_Check(unicode)) { 
-        PyErr_BadArgument(); 
+    if (!PyUnicode_Check(unicode)) {
+        PyErr_BadArgument();
         return NULL;
     }
 
-    buflen = unicode_get_widechar_size(unicode); 
-    buffer = (wchar_t *) PyMem_NEW(wchar_t, (buflen + 1)); 
+    buflen = unicode_get_widechar_size(unicode);
+    buffer = (wchar_t *) PyMem_NEW(wchar_t, (buflen + 1));
     if (buffer == NULL) {
         PyErr_NoMemory();
         return NULL;
     }
-    unicode_copy_as_widechar(unicode, buffer, buflen + 1); 
- 
-#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION 
-    /* Oracle Solaris uses non-Unicode internal wchar_t form for 
-       non-Unicode locales and hence needs conversion first. */ 
-    if (_Py_LocaleUsesNonUnicodeWchar()) { 
-        if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) { 
-            return NULL; 
-        } 
-    } 
-#endif 
- 
-    if (size != NULL) { 
+    unicode_copy_as_widechar(unicode, buffer, buflen + 1);
+
+#if HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
+    /* Oracle Solaris uses non-Unicode internal wchar_t form for
+       non-Unicode locales and hence needs conversion first. */
+    if (_Py_LocaleUsesNonUnicodeWchar()) {
+        if (_Py_EncodeNonUnicodeWchar_InPlace(buffer, (buflen + 1)) < 0) {
+            return NULL;
+        }
+    }
+#endif
+
+    if (size != NULL) {
         *size = buflen;
-    } 
-    else if (wcslen(buffer) != (size_t)buflen) { 
-        PyMem_FREE(buffer); 
-        PyErr_SetString(PyExc_ValueError, 
-                        "embedded null character"); 
-        return NULL; 
-    } 
+    }
+    else if (wcslen(buffer) != (size_t)buflen) {
+        PyMem_FREE(buffer);
+        PyErr_SetString(PyExc_ValueError,
+                        "embedded null character");
+        return NULL;
+    }
     return buffer;
 }
 
@@ -3352,13 +3352,13 @@ PyUnicode_FromEncodedObject(PyObject *obj,
 
     /* Decoding bytes objects is the most common case and should be fast */
     if (PyBytes_Check(obj)) {
-        if (PyBytes_GET_SIZE(obj) == 0) { 
-            if (unicode_check_encoding_errors(encoding, errors) < 0) { 
-                return NULL; 
-            } 
+        if (PyBytes_GET_SIZE(obj) == 0) {
+            if (unicode_check_encoding_errors(encoding, errors) < 0) {
+                return NULL;
+            }
             _Py_RETURN_UNICODE_EMPTY();
-        } 
-        return PyUnicode_Decode( 
+        }
+        return PyUnicode_Decode(
                 PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj),
                 encoding, errors);
     }
@@ -3379,9 +3379,9 @@ PyUnicode_FromEncodedObject(PyObject *obj,
 
     if (buffer.len == 0) {
         PyBuffer_Release(&buffer);
-        if (unicode_check_encoding_errors(encoding, errors) < 0) { 
-            return NULL; 
-        } 
+        if (unicode_check_encoding_errors(encoding, errors) < 0) {
+            return NULL;
+        }
         _Py_RETURN_UNICODE_EMPTY();
     }
 
@@ -3449,14 +3449,14 @@ PyUnicode_Decode(const char *s,
     Py_buffer info;
     char buflower[11];   /* strlen("iso-8859-1\0") == 11, longest shortcut */
 
-    if (unicode_check_encoding_errors(encoding, errors) < 0) { 
-        return NULL; 
-    } 
- 
-    if (size == 0) { 
-        _Py_RETURN_UNICODE_EMPTY(); 
-    } 
- 
+    if (unicode_check_encoding_errors(encoding, errors) < 0) {
+        return NULL;
+    }
+
+    if (size == 0) {
+        _Py_RETURN_UNICODE_EMPTY();
+    }
+
     if (encoding == NULL) {
         return PyUnicode_DecodeUTF8Stateful(s, size, errors, NULL);
     }
@@ -3639,7 +3639,7 @@ PyUnicode_AsEncodedObject(PyObject *unicode,
 
 
 static PyObject *
-unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler, 
+unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
                       int current_locale)
 {
     Py_ssize_t wlen;
@@ -3658,7 +3658,7 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
     size_t error_pos;
     const char *reason;
     int res = _Py_EncodeLocaleEx(wstr, &str, &error_pos, &reason,
-                                 current_locale, error_handler); 
+                                 current_locale, error_handler);
     PyMem_Free(wstr);
 
     if (res != 0) {
@@ -3674,9 +3674,9 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
                 Py_DECREF(exc);
             }
         }
-        else if (res == -3) { 
-            PyErr_SetString(PyExc_ValueError, "unsupported error handler"); 
-        } 
+        else if (res == -3) {
+            PyErr_SetString(PyExc_ValueError, "unsupported error handler");
+        }
         else {
             PyErr_NoMemory();
         }
@@ -3691,41 +3691,41 @@ unicode_encode_locale(PyObject *unicode, _Py_error_handler error_handler,
 PyObject *
 PyUnicode_EncodeLocale(PyObject *unicode, const char *errors)
 {
-    _Py_error_handler error_handler = _Py_GetErrorHandler(errors); 
-    return unicode_encode_locale(unicode, error_handler, 1); 
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_encode_locale(unicode, error_handler, 1);
 }
 
 PyObject *
 PyUnicode_EncodeFSDefault(PyObject *unicode)
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET(); 
-    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec; 
-    if (fs_codec->utf8) { 
-        return unicode_encode_utf8(unicode, 
-                                   fs_codec->error_handler, 
-                                   fs_codec->errors); 
-    } 
-#ifndef _Py_FORCE_UTF8_FS_ENCODING 
-    else if (fs_codec->encoding) { 
-        return PyUnicode_AsEncodedString(unicode, 
-                                         fs_codec->encoding, 
-                                         fs_codec->errors); 
-    } 
-#endif 
-    else { 
-        /* Before _PyUnicode_InitEncodings() is called, the Python codec 
-           machinery is not ready and so cannot be used: 
-           use wcstombs() in this case. */ 
-        const PyConfig *config = _PyInterpreterState_GetConfig(interp); 
-        const wchar_t *filesystem_errors = config->filesystem_errors; 
-        assert(filesystem_errors != NULL); 
-        _Py_error_handler errors = get_error_handler_wide(filesystem_errors); 
-        assert(errors != _Py_ERROR_UNKNOWN); 
-#ifdef _Py_FORCE_UTF8_FS_ENCODING 
-        return unicode_encode_utf8(unicode, errors, NULL); 
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+    if (fs_codec->utf8) {
+        return unicode_encode_utf8(unicode,
+                                   fs_codec->error_handler,
+                                   fs_codec->errors);
+    }
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+    else if (fs_codec->encoding) {
+        return PyUnicode_AsEncodedString(unicode,
+                                         fs_codec->encoding,
+                                         fs_codec->errors);
+    }
+#endif
+    else {
+        /* Before _PyUnicode_InitEncodings() is called, the Python codec
+           machinery is not ready and so cannot be used:
+           use wcstombs() in this case. */
+        const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+        const wchar_t *filesystem_errors = config->filesystem_errors;
+        assert(filesystem_errors != NULL);
+        _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        return unicode_encode_utf8(unicode, errors, NULL);
 #else
-        return unicode_encode_locale(unicode, errors, 0); 
-#endif 
+        return unicode_encode_locale(unicode, errors, 0);
+#endif
     }
 }
 
@@ -3742,10 +3742,10 @@ PyUnicode_AsEncodedString(PyObject *unicode,
         return NULL;
     }
 
-    if (unicode_check_encoding_errors(encoding, errors) < 0) { 
-        return NULL; 
-    } 
- 
+    if (unicode_check_encoding_errors(encoding, errors) < 0) {
+        return NULL;
+    }
+
     if (encoding == NULL) {
         return _PyUnicode_AsUTF8String(unicode, errors);
     }
@@ -3869,8 +3869,8 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
 }
 
 static PyObject*
-unicode_decode_locale(const char *str, Py_ssize_t len, 
-                      _Py_error_handler errors, int current_locale) 
+unicode_decode_locale(const char *str, Py_ssize_t len,
+                      _Py_error_handler errors, int current_locale)
 {
     if (str[len] != '\0' || (size_t)len != strlen(str))  {
         PyErr_SetString(PyExc_ValueError, "embedded null byte");
@@ -3881,7 +3881,7 @@ unicode_decode_locale(const char *str, Py_ssize_t len,
     size_t wlen;
     const char *reason;
     int res = _Py_DecodeLocaleEx(str, &wstr, &wlen, &reason,
-                                 current_locale, errors); 
+                                 current_locale, errors);
     if (res != 0) {
         if (res == -2) {
             PyObject *exc;
@@ -3895,9 +3895,9 @@ unicode_decode_locale(const char *str, Py_ssize_t len,
                 Py_DECREF(exc);
             }
         }
-        else if (res == -3) { 
-            PyErr_SetString(PyExc_ValueError, "unsupported error handler"); 
-        } 
+        else if (res == -3) {
+            PyErr_SetString(PyExc_ValueError, "unsupported error handler");
+        }
         else {
             PyErr_NoMemory();
         }
@@ -3913,16 +3913,16 @@ PyObject*
 PyUnicode_DecodeLocaleAndSize(const char *str, Py_ssize_t len,
                               const char *errors)
 {
-    _Py_error_handler error_handler = _Py_GetErrorHandler(errors); 
-    return unicode_decode_locale(str, len, error_handler, 1); 
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_decode_locale(str, len, error_handler, 1);
 }
 
 PyObject*
 PyUnicode_DecodeLocale(const char *str, const char *errors)
 {
     Py_ssize_t size = (Py_ssize_t)strlen(str);
-    _Py_error_handler error_handler = _Py_GetErrorHandler(errors); 
-    return unicode_decode_locale(str, size, error_handler, 1); 
+    _Py_error_handler error_handler = _Py_GetErrorHandler(errors);
+    return unicode_decode_locale(str, size, error_handler, 1);
 }
 
 
@@ -3935,35 +3935,35 @@ PyUnicode_DecodeFSDefault(const char *s) {
 PyObject*
 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
 {
-    PyInterpreterState *interp = _PyInterpreterState_GET(); 
-    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec; 
-    if (fs_codec->utf8) { 
-        return unicode_decode_utf8(s, size, 
-                                   fs_codec->error_handler, 
-                                   fs_codec->errors, 
-                                   NULL); 
-    } 
-#ifndef _Py_FORCE_UTF8_FS_ENCODING 
-    else if (fs_codec->encoding) { 
-        return PyUnicode_Decode(s, size, 
-                                fs_codec->encoding, 
-                                fs_codec->errors); 
-    } 
-#endif 
-    else { 
-        /* Before _PyUnicode_InitEncodings() is called, the Python codec 
-           machinery is not ready and so cannot be used: 
-           use mbstowcs() in this case. */ 
-        const PyConfig *config = _PyInterpreterState_GetConfig(interp); 
-        const wchar_t *filesystem_errors = config->filesystem_errors; 
-        assert(filesystem_errors != NULL); 
-        _Py_error_handler errors = get_error_handler_wide(filesystem_errors); 
-        assert(errors != _Py_ERROR_UNKNOWN); 
-#ifdef _Py_FORCE_UTF8_FS_ENCODING 
-        return unicode_decode_utf8(s, size, errors, NULL, NULL); 
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+    if (fs_codec->utf8) {
+        return unicode_decode_utf8(s, size,
+                                   fs_codec->error_handler,
+                                   fs_codec->errors,
+                                   NULL);
+    }
+#ifndef _Py_FORCE_UTF8_FS_ENCODING
+    else if (fs_codec->encoding) {
+        return PyUnicode_Decode(s, size,
+                                fs_codec->encoding,
+                                fs_codec->errors);
+    }
+#endif
+    else {
+        /* Before _PyUnicode_InitEncodings() is called, the Python codec
+           machinery is not ready and so cannot be used:
+           use mbstowcs() in this case. */
+        const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+        const wchar_t *filesystem_errors = config->filesystem_errors;
+        assert(filesystem_errors != NULL);
+        _Py_error_handler errors = get_error_handler_wide(filesystem_errors);
+        assert(errors != _Py_ERROR_UNKNOWN);
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+        return unicode_decode_utf8(s, size, errors, NULL, NULL);
 #else
-        return unicode_decode_locale(s, size, errors, 0); 
-#endif 
+        return unicode_decode_locale(s, size, errors, 0);
+#endif
     }
 }
 
@@ -3974,7 +3974,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr)
     PyObject *path = NULL;
     PyObject *output = NULL;
     Py_ssize_t size;
-    const char *data; 
+    const char *data;
     if (arg == NULL) {
         Py_DECREF(*(PyObject**)addr);
         *(PyObject**)addr = NULL;
@@ -4079,8 +4079,8 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
 }
 
 
-static int unicode_fill_utf8(PyObject *unicode); 
- 
+static int unicode_fill_utf8(PyObject *unicode);
+
 const char *
 PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
 {
@@ -4092,7 +4092,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize)
         return NULL;
 
     if (PyUnicode_UTF8(unicode) == NULL) {
-        if (unicode_fill_utf8(unicode) == -1) { 
+        if (unicode_fill_utf8(unicode) == -1) {
             return NULL;
         }
     }
@@ -4115,38 +4115,38 @@ PyUnicode_AsUnicodeAndSize(PyObject *unicode, Py_ssize_t *size)
         PyErr_BadArgument();
         return NULL;
     }
-    Py_UNICODE *w = _PyUnicode_WSTR(unicode); 
-    if (w == NULL) { 
+    Py_UNICODE *w = _PyUnicode_WSTR(unicode);
+    if (w == NULL) {
         /* Non-ASCII compact unicode object */
-        assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND); 
+        assert(_PyUnicode_KIND(unicode) != PyUnicode_WCHAR_KIND);
         assert(PyUnicode_IS_READY(unicode));
 
-        Py_ssize_t wlen = unicode_get_widechar_size(unicode); 
-        if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { 
-            PyErr_NoMemory(); 
+        Py_ssize_t wlen = unicode_get_widechar_size(unicode);
+        if ((size_t)wlen > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
+            PyErr_NoMemory();
             return NULL;
         }
-        w = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * (wlen + 1)); 
-        if (w == NULL) { 
-            PyErr_NoMemory(); 
-            return NULL; 
+        w = (wchar_t *) PyObject_MALLOC(sizeof(wchar_t) * (wlen + 1));
+        if (w == NULL) {
+            PyErr_NoMemory();
+            return NULL;
+        }
+        unicode_copy_as_widechar(unicode, w, wlen + 1);
+        _PyUnicode_WSTR(unicode) = w;
+        if (!PyUnicode_IS_COMPACT_ASCII(unicode)) {
+            _PyUnicode_WSTR_LENGTH(unicode) = wlen;
         }
-        unicode_copy_as_widechar(unicode, w, wlen + 1); 
-        _PyUnicode_WSTR(unicode) = w; 
-        if (!PyUnicode_IS_COMPACT_ASCII(unicode)) { 
-            _PyUnicode_WSTR_LENGTH(unicode) = wlen; 
-        } 
     }
     if (size != NULL)
         *size = PyUnicode_WSTR_LENGTH(unicode);
-    return w; 
+    return w;
 }
 
-/* Deprecated APIs */ 
- 
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
- 
+/* Deprecated APIs */
+
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+
 Py_UNICODE *
 PyUnicode_AsUnicode(PyObject *unicode)
 {
@@ -4185,8 +4185,8 @@ PyUnicode_GetSize(PyObject *unicode)
     return -1;
 }
 
-_Py_COMP_DIAG_POP 
- 
+_Py_COMP_DIAG_POP
+
 Py_ssize_t
 PyUnicode_GetLength(PyObject *unicode)
 {
@@ -4202,7 +4202,7 @@ PyUnicode_GetLength(PyObject *unicode)
 Py_UCS4
 PyUnicode_ReadChar(PyObject *unicode, Py_ssize_t index)
 {
-    const void *data; 
+    const void *data;
     int kind;
 
     if (!PyUnicode_Check(unicode)) {
@@ -4277,21 +4277,21 @@ onError:
 }
 
 #ifdef MS_WINDOWS
-static int 
-widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize) 
-{ 
-    if (newsize > *size) { 
-        wchar_t *newbuf = *buf; 
-        if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) { 
-            PyErr_NoMemory(); 
-            return -1; 
-        } 
-        *buf = newbuf; 
-    } 
-    *size = newsize; 
-    return 0; 
-} 
- 
+static int
+widechar_resize(wchar_t **buf, Py_ssize_t *size, Py_ssize_t newsize)
+{
+    if (newsize > *size) {
+        wchar_t *newbuf = *buf;
+        if (PyMem_Resize(newbuf, wchar_t, newsize) == NULL) {
+            PyErr_NoMemory();
+            return -1;
+        }
+        *buf = newbuf;
+    }
+    *size = newsize;
+    return 0;
+}
+
 /* error handling callback helper:
    build arguments, call the callback and check the arguments,
    if no exception occurred, copy the replacement to the output
@@ -4305,7 +4305,7 @@ unicode_decode_call_errorhandler_wchar(
     const char *encoding, const char *reason,
     const char **input, const char **inend, Py_ssize_t *startinpos,
     Py_ssize_t *endinpos, PyObject **exceptionObject, const char **inptr,
-    wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos) 
+    wchar_t **buf, Py_ssize_t *bufsize, Py_ssize_t *outpos)
 {
     static const char *argparse = "Un;decoding error handler must return (str, int) tuple";
 
@@ -4333,7 +4333,7 @@ unicode_decode_call_errorhandler_wchar(
     if (*exceptionObject == NULL)
         goto onError;
 
-    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject); 
+    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
     if (restuple == NULL)
         goto onError;
     if (!PyTuple_Check(restuple)) {
@@ -4362,10 +4362,10 @@ unicode_decode_call_errorhandler_wchar(
         goto onError;
     }
 
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
     repwstr = PyUnicode_AsUnicodeAndSize(repunicode, &repwlen);
-_Py_COMP_DIAG_POP 
+_Py_COMP_DIAG_POP
     if (repwstr == NULL)
         goto onError;
     /* need more space? (at least enough for what we
@@ -4379,15 +4379,15 @@ _Py_COMP_DIAG_POP
     if (requiredsize > PY_SSIZE_T_MAX - (insize - newpos))
         goto overflow;
     requiredsize += insize - newpos;
-    outsize = *bufsize; 
+    outsize = *bufsize;
     if (requiredsize > outsize) {
         if (outsize <= PY_SSIZE_T_MAX/2 && requiredsize < 2*outsize)
             requiredsize = 2*outsize;
-        if (widechar_resize(buf, bufsize, requiredsize) < 0) { 
+        if (widechar_resize(buf, bufsize, requiredsize) < 0) {
             goto onError;
-        } 
+        }
     }
-    wcsncpy(*buf + *outpos, repwstr, repwlen); 
+    wcsncpy(*buf + *outpos, repwstr, repwlen);
     *outpos += repwlen;
     *endinpos = newpos;
     *inptr = *input + newpos;
@@ -4440,7 +4440,7 @@ unicode_decode_call_errorhandler_writer(
     if (*exceptionObject == NULL)
         goto onError;
 
-    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject); 
+    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
     if (restuple == NULL)
         goto onError;
     if (!PyTuple_Check(restuple)) {
@@ -4714,11 +4714,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                 if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
                     goto onError;
             }
-            else if (s < e && !IS_BASE64(*s)) { 
-                s++; 
-                errmsg = "ill-formed sequence"; 
-                goto utf7Error; 
-            } 
+            else if (s < e && !IS_BASE64(*s)) {
+                s++;
+                errmsg = "ill-formed sequence";
+                goto utf7Error;
+            }
             else { /* begin base64-encoded section */
                 inShift = 1;
                 surrogate = 0;
@@ -4807,7 +4807,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
                       const char *errors)
 {
     int kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t len;
     PyObject *v;
     int inShift = 0;
@@ -4815,7 +4815,7 @@ _PyUnicode_EncodeUTF7(PyObject *str,
     unsigned int base64bits = 0;
     unsigned long base64buffer = 0;
     char * out;
-    const char * start; 
+    const char * start;
 
     if (PyUnicode_READY(str) == -1)
         return NULL;
@@ -5007,7 +5007,7 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
             /* Help allocation */
             const char *_p = p;
             while (_p < aligned_end) {
-                unsigned long value = *(const unsigned long *) _p; 
+                unsigned long value = *(const unsigned long *) _p;
                 if (value & ASCII_CHAR_MASK)
                     break;
                 _p += SIZEOF_LONG;
@@ -5024,10 +5024,10 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
     return p - start;
 }
 
-static PyObject * 
-unicode_decode_utf8(const char *s, Py_ssize_t size, 
-                    _Py_error_handler error_handler, const char *errors, 
-                    Py_ssize_t *consumed) 
+static PyObject *
+unicode_decode_utf8(const char *s, Py_ssize_t size,
+                    _Py_error_handler error_handler, const char *errors,
+                    Py_ssize_t *consumed)
 {
     if (size == 0) {
         if (consumed)
@@ -5042,29 +5042,29 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
         return get_latin1_char((unsigned char)s[0]);
     }
 
-    const char *starts = s; 
-    const char *end = s + size; 
+    const char *starts = s;
+    const char *end = s + size;
+
+    // fast path: try ASCII string.
+    PyObject *u = PyUnicode_New(size, 127);
+    if (u == NULL) {
+        return NULL;
+    }
+    s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u));
+    if (s == end) {
+        return u;
+    }
+
+    // Use _PyUnicodeWriter after fast path is failed.
+    _PyUnicodeWriter writer;
+    _PyUnicodeWriter_InitWithBuffer(&writer, u);
+    writer.pos = s - starts;
+
+    Py_ssize_t startinpos, endinpos;
+    const char *errmsg = "";
+    PyObject *error_handler_obj = NULL;
+    PyObject *exc = NULL;
 
-    // fast path: try ASCII string. 
-    PyObject *u = PyUnicode_New(size, 127); 
-    if (u == NULL) { 
-        return NULL; 
-    } 
-    s += ascii_decode(s, end, PyUnicode_1BYTE_DATA(u)); 
-    if (s == end) { 
-        return u; 
-    } 
- 
-    // Use _PyUnicodeWriter after fast path is failed. 
-    _PyUnicodeWriter writer; 
-    _PyUnicodeWriter_InitWithBuffer(&writer, u); 
-    writer.pos = s - starts; 
- 
-    Py_ssize_t startinpos, endinpos; 
-    const char *errmsg = ""; 
-    PyObject *error_handler_obj = NULL; 
-    PyObject *exc = NULL; 
- 
     while (s < end) {
         Py_UCS4 ch;
         int kind = writer.kind;
@@ -5095,13 +5095,13 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
             endinpos = startinpos + 1;
             break;
         case 2:
-            if (consumed && (unsigned char)s[0] == 0xED && end - s == 2 
-                && (unsigned char)s[1] >= 0xA0 && (unsigned char)s[1] <= 0xBF) 
-            { 
-                /* Truncated surrogate code in range D800-DFFF */ 
-                goto End; 
-            } 
-            /* fall through */ 
+            if (consumed && (unsigned char)s[0] == 0xED && end - s == 2
+                && (unsigned char)s[1] >= 0xA0 && (unsigned char)s[1] <= 0xBF)
+            {
+                /* Truncated surrogate code in range D800-DFFF */
+                goto End;
+            }
+            /* fall through */
         case 3:
         case 4:
             errmsg = "invalid continuation byte";
@@ -5115,7 +5115,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
         }
 
         if (error_handler == _Py_ERROR_UNKNOWN)
-            error_handler = _Py_GetErrorHandler(errors); 
+            error_handler = _Py_GetErrorHandler(errors);
 
         switch (error_handler) {
         case _Py_ERROR_IGNORE:
@@ -5170,16 +5170,16 @@ onError:
 }
 
 
-PyObject * 
-PyUnicode_DecodeUTF8Stateful(const char *s, 
-                             Py_ssize_t size, 
-                             const char *errors, 
-                             Py_ssize_t *consumed) 
-{ 
-    return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed); 
-} 
- 
- 
+PyObject *
+PyUnicode_DecodeUTF8Stateful(const char *s,
+                             Py_ssize_t size,
+                             const char *errors,
+                             Py_ssize_t *consumed)
+{
+    return unicode_decode_utf8(s, size, _Py_ERROR_UNKNOWN, errors, consumed);
+}
+
+
 /* UTF-8 decoder: use surrogateescape error handler if 'surrogateescape' is
    non-zero, use strict error handler otherwise.
 
@@ -5194,29 +5194,29 @@ PyUnicode_DecodeUTF8Stateful(const char *s,
    is not NULL, write the decoding error message into *reason. */
 int
 _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
-                 const char **reason, _Py_error_handler errors) 
+                 const char **reason, _Py_error_handler errors)
 {
     const char *orig_s = s;
     const char *e;
     wchar_t *unicode;
     Py_ssize_t outpos;
 
-    int surrogateescape = 0; 
-    int surrogatepass = 0; 
-    switch (errors) 
-    { 
-    case _Py_ERROR_STRICT: 
-        break; 
-    case _Py_ERROR_SURROGATEESCAPE: 
-        surrogateescape = 1; 
-        break; 
-    case _Py_ERROR_SURROGATEPASS: 
-        surrogatepass = 1; 
-        break; 
-    default: 
-        return -3; 
-    } 
- 
+    int surrogateescape = 0;
+    int surrogatepass = 0;
+    switch (errors)
+    {
+    case _Py_ERROR_STRICT:
+        break;
+    case _Py_ERROR_SURROGATEESCAPE:
+        surrogateescape = 1;
+        break;
+    case _Py_ERROR_SURROGATEPASS:
+        surrogatepass = 1;
+        break;
+    default:
+        return -3;
+    }
+
     /* Note: size will always be longer than the resulting Unicode
        character count */
     if (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t) < (size + 1)) {
@@ -5249,45 +5249,45 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
 #endif
         }
         else {
-            if (!ch && s == e) { 
+            if (!ch && s == e) {
                 break;
-            } 
- 
-            if (surrogateescape) { 
-                unicode[outpos++] = 0xDC00 + (unsigned char)*s++; 
-            } 
-            else { 
-                /* Is it a valid three-byte code? */ 
-                if (surrogatepass 
-                    && (e - s) >= 3 
-                    && (s[0] & 0xf0) == 0xe0 
-                    && (s[1] & 0xc0) == 0x80 
-                    && (s[2] & 0xc0) == 0x80) 
-                { 
-                    ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f); 
-                    s += 3; 
-                    unicode[outpos++] = ch; 
-                } 
-                else { 
-                    PyMem_RawFree(unicode ); 
-                    if (reason != NULL) { 
-                        switch (ch) { 
-                        case 0: 
-                            *reason = "unexpected end of data"; 
-                            break; 
-                        case 1: 
-                            *reason = "invalid start byte"; 
-                            break; 
-                        /* 2, 3, 4 */ 
-                        default: 
-                            *reason = "invalid continuation byte"; 
-                            break; 
-                        } 
+            }
+
+            if (surrogateescape) {
+                unicode[outpos++] = 0xDC00 + (unsigned char)*s++;
+            }
+            else {
+                /* Is it a valid three-byte code? */
+                if (surrogatepass
+                    && (e - s) >= 3
+                    && (s[0] & 0xf0) == 0xe0
+                    && (s[1] & 0xc0) == 0x80
+                    && (s[2] & 0xc0) == 0x80)
+                {
+                    ch = ((s[0] & 0x0f) << 12) + ((s[1] & 0x3f) << 6) + (s[2] & 0x3f);
+                    s += 3;
+                    unicode[outpos++] = ch;
+                }
+                else {
+                    PyMem_RawFree(unicode );
+                    if (reason != NULL) {
+                        switch (ch) {
+                        case 0:
+                            *reason = "unexpected end of data";
+                            break;
+                        case 1:
+                            *reason = "invalid start byte";
+                            break;
+                        /* 2, 3, 4 */
+                        default:
+                            *reason = "invalid continuation byte";
+                            break;
+                        }
                     }
-                    if (wlen != NULL) { 
-                        *wlen = s - orig_s; 
-                    } 
-                    return -2; 
+                    if (wlen != NULL) {
+                        *wlen = s - orig_s;
+                    }
+                    return -2;
                 }
             }
         }
@@ -5300,21 +5300,21 @@ _Py_DecodeUTF8Ex(const char *s, Py_ssize_t size, wchar_t **wstr, size_t *wlen,
     return 0;
 }
 
- 
+
 wchar_t*
-_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen, 
-                               size_t *wlen) 
+_Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
+                               size_t *wlen)
 {
     wchar_t *wstr;
-    int res = _Py_DecodeUTF8Ex(arg, arglen, 
-                               &wstr, wlen, 
-                               NULL, _Py_ERROR_SURROGATEESCAPE); 
+    int res = _Py_DecodeUTF8Ex(arg, arglen,
+                               &wstr, wlen,
+                               NULL, _Py_ERROR_SURROGATEESCAPE);
     if (res != 0) {
-        /* _Py_DecodeUTF8Ex() must support _Py_ERROR_SURROGATEESCAPE */ 
-        assert(res != -3); 
-        if (wlen) { 
-            *wlen = (size_t)res; 
-        } 
+        /* _Py_DecodeUTF8Ex() must support _Py_ERROR_SURROGATEESCAPE */
+        assert(res != -3);
+        if (wlen) {
+            *wlen = (size_t)res;
+        }
         return NULL;
     }
     return wstr;
@@ -5333,29 +5333,29 @@ _Py_DecodeUTF8_surrogateescape(const char *arg, Py_ssize_t arglen,
    On memory allocation failure, return -1. */
 int
 _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
-                 const char **reason, int raw_malloc, _Py_error_handler errors) 
+                 const char **reason, int raw_malloc, _Py_error_handler errors)
 {
     const Py_ssize_t max_char_size = 4;
     Py_ssize_t len = wcslen(text);
 
     assert(len >= 0);
 
-    int surrogateescape = 0; 
-    int surrogatepass = 0; 
-    switch (errors) 
-    { 
-    case _Py_ERROR_STRICT: 
-        break; 
-    case _Py_ERROR_SURROGATEESCAPE: 
-        surrogateescape = 1; 
-        break; 
-    case _Py_ERROR_SURROGATEPASS: 
-        surrogatepass = 1; 
-        break; 
-    default: 
-        return -3; 
-    } 
- 
+    int surrogateescape = 0;
+    int surrogatepass = 0;
+    switch (errors)
+    {
+    case _Py_ERROR_STRICT:
+        break;
+    case _Py_ERROR_SURROGATEESCAPE:
+        surrogateescape = 1;
+        break;
+    case _Py_ERROR_SURROGATEPASS:
+        surrogatepass = 1;
+        break;
+    default:
+        return -3;
+    }
+
     if (len > PY_SSIZE_T_MAX / max_char_size - 1) {
         return -1;
     }
@@ -5372,19 +5372,19 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
 
     char *p = bytes;
     Py_ssize_t i;
-    for (i = 0; i < len; ) { 
-        Py_ssize_t ch_pos = i; 
+    for (i = 0; i < len; ) {
+        Py_ssize_t ch_pos = i;
         Py_UCS4 ch = text[i];
-        i++; 
-#if Py_UNICODE_SIZE == 2 
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) 
-            && i < len 
-            && Py_UNICODE_IS_LOW_SURROGATE(text[i])) 
-        { 
-            ch = Py_UNICODE_JOIN_SURROGATES(ch, text[i]); 
-            i++; 
-        } 
-#endif 
+        i++;
+#if Py_UNICODE_SIZE == 2
+        if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+            && i < len
+            && Py_UNICODE_IS_LOW_SURROGATE(text[i]))
+        {
+            ch = Py_UNICODE_JOIN_SURROGATES(ch, text[i]);
+            i++;
+        }
+#endif
 
         if (ch < 0x80) {
             /* Encode ASCII */
@@ -5396,11 +5396,11 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
             *p++ = (char)(0xc0 | (ch >> 6));
             *p++ = (char)(0x80 | (ch & 0x3f));
         }
-        else if (Py_UNICODE_IS_SURROGATE(ch) && !surrogatepass) { 
+        else if (Py_UNICODE_IS_SURROGATE(ch) && !surrogatepass) {
             /* surrogateescape error handler */
             if (!surrogateescape || !(0xDC80 <= ch && ch <= 0xDCFF)) {
                 if (error_pos != NULL) {
-                    *error_pos = (size_t)ch_pos; 
+                    *error_pos = (size_t)ch_pos;
                 }
                 if (reason != NULL) {
                     *reason = "encoding error";
@@ -5463,9 +5463,9 @@ _Py_EncodeUTF8Ex(const wchar_t *text, char **str, size_t *error_pos,
    maximum possible needed (4 result bytes per Unicode character), and return
    the excess memory at the end.
 */
-static PyObject * 
-unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, 
-                    const char *errors) 
+static PyObject *
+unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
+                    const char *errors)
 {
     if (!PyUnicode_Check(unicode)) {
         PyErr_BadArgument();
@@ -5479,96 +5479,96 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
         return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode),
                                          PyUnicode_UTF8_LENGTH(unicode));
 
-    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); 
-    const void *data = PyUnicode_DATA(unicode); 
-    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); 
+    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    const void *data = PyUnicode_DATA(unicode);
+    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+    _PyBytesWriter writer;
+    char *end;
 
-    _PyBytesWriter writer; 
-    char *end; 
- 
     switch (kind) {
     default:
         Py_UNREACHABLE();
     case PyUnicode_1BYTE_KIND:
         /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
         assert(!PyUnicode_IS_ASCII(unicode));
-        end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors); 
-        break; 
+        end = ucs1lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
     case PyUnicode_2BYTE_KIND:
-        end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors); 
-        break; 
+        end = ucs2lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
     case PyUnicode_4BYTE_KIND:
-        end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors); 
-        break; 
+        end = ucs4lib_utf8_encoder(&writer, unicode, data, size, error_handler, errors);
+        break;
+    }
+
+    if (end == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        return NULL;
     }
- 
-    if (end == NULL) { 
-        _PyBytesWriter_Dealloc(&writer); 
-        return NULL; 
-    } 
-    return _PyBytesWriter_Finish(&writer, end); 
+    return _PyBytesWriter_Finish(&writer, end);
+}
+
+static int
+unicode_fill_utf8(PyObject *unicode)
+{
+    /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */
+    assert(!PyUnicode_IS_ASCII(unicode));
+
+    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode);
+    const void *data = PyUnicode_DATA(unicode);
+    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode);
+
+    _PyBytesWriter writer;
+    char *end;
+
+    switch (kind) {
+    default:
+        Py_UNREACHABLE();
+    case PyUnicode_1BYTE_KIND:
+        end = ucs1lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
+    case PyUnicode_2BYTE_KIND:
+        end = ucs2lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
+    case PyUnicode_4BYTE_KIND:
+        end = ucs4lib_utf8_encoder(&writer, unicode, data, size,
+                                   _Py_ERROR_STRICT, NULL);
+        break;
+    }
+    if (end == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        return -1;
+    }
+
+    const char *start = writer.use_small_buffer ? writer.small_buffer :
+                    PyBytes_AS_STRING(writer.buffer);
+    Py_ssize_t len = end - start;
+
+    char *cache = PyObject_MALLOC(len + 1);
+    if (cache == NULL) {
+        _PyBytesWriter_Dealloc(&writer);
+        PyErr_NoMemory();
+        return -1;
+    }
+    _PyUnicode_UTF8(unicode) = cache;
+    _PyUnicode_UTF8_LENGTH(unicode) = len;
+    memcpy(cache, start, len);
+    cache[len] = '\0';
+    _PyBytesWriter_Dealloc(&writer);
+    return 0;
 }
 
-static int 
-unicode_fill_utf8(PyObject *unicode) 
-{ 
-    /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */ 
-    assert(!PyUnicode_IS_ASCII(unicode)); 
- 
-    enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); 
-    const void *data = PyUnicode_DATA(unicode); 
-    Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); 
- 
-    _PyBytesWriter writer; 
-    char *end; 
- 
-    switch (kind) { 
-    default: 
-        Py_UNREACHABLE(); 
-    case PyUnicode_1BYTE_KIND: 
-        end = ucs1lib_utf8_encoder(&writer, unicode, data, size, 
-                                   _Py_ERROR_STRICT, NULL); 
-        break; 
-    case PyUnicode_2BYTE_KIND: 
-        end = ucs2lib_utf8_encoder(&writer, unicode, data, size, 
-                                   _Py_ERROR_STRICT, NULL); 
-        break; 
-    case PyUnicode_4BYTE_KIND: 
-        end = ucs4lib_utf8_encoder(&writer, unicode, data, size, 
-                                   _Py_ERROR_STRICT, NULL); 
-        break; 
-    } 
-    if (end == NULL) { 
-        _PyBytesWriter_Dealloc(&writer); 
-        return -1; 
-    } 
- 
-    const char *start = writer.use_small_buffer ? writer.small_buffer : 
-                    PyBytes_AS_STRING(writer.buffer); 
-    Py_ssize_t len = end - start; 
- 
-    char *cache = PyObject_MALLOC(len + 1); 
-    if (cache == NULL) { 
-        _PyBytesWriter_Dealloc(&writer); 
-        PyErr_NoMemory(); 
-        return -1; 
-    } 
-    _PyUnicode_UTF8(unicode) = cache; 
-    _PyUnicode_UTF8_LENGTH(unicode) = len; 
-    memcpy(cache, start, len); 
-    cache[len] = '\0'; 
-    _PyBytesWriter_Dealloc(&writer); 
-    return 0; 
-} 
- 
 PyObject *
-_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors) 
-{ 
-    return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors); 
-} 
- 
- 
-PyObject * 
+_PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
+{
+    return unicode_encode_utf8(unicode, _Py_ERROR_UNKNOWN, errors);
+}
+
+
+PyObject *
 PyUnicode_EncodeUTF8(const Py_UNICODE *s,
                      Py_ssize_t size,
                      const char *errors)
@@ -5618,7 +5618,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s,
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    q = (const unsigned char *)s; 
+    q = (const unsigned char *)s;
     e = q + size;
 
     if (byteorder)
@@ -5943,7 +5943,7 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
     PyObject *exc = NULL;
     const char *encoding;
 
-    q = (const unsigned char *)s; 
+    q = (const unsigned char *)s;
     e = q + size;
 
     if (byteorder)
@@ -6271,10 +6271,10 @@ PyUnicode_AsUTF16String(PyObject *unicode)
 static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL;
 
 PyObject *
-_PyUnicode_DecodeUnicodeEscapeInternal(const char *s, 
+_PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
                                Py_ssize_t size,
                                const char *errors,
-                               Py_ssize_t *consumed, 
+                               Py_ssize_t *consumed,
                                const char **first_invalid_escape)
 {
     const char *starts = s;
@@ -6287,9 +6287,9 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
     *first_invalid_escape = NULL;
 
     if (size == 0) {
-        if (consumed) { 
-            *consumed = 0; 
-        } 
+        if (consumed) {
+            *consumed = 0;
+        }
         _Py_RETURN_UNICODE_EMPTY();
     }
     /* Escaped strings will always be longer than the resulting
@@ -6334,11 +6334,11 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
             continue;
         }
 
-        Py_ssize_t startinpos = s - starts - 1; 
+        Py_ssize_t startinpos = s - starts - 1;
         /* \ - Escapes */
         if (s >= end) {
             message = "\\ at end of string";
-            goto incomplete; 
+            goto incomplete;
         }
         c = (unsigned char) *s++;
 
@@ -6392,10 +6392,10 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
             count = 8;
             message = "truncated \\UXXXXXXXX escape";
         hexescape:
-            for (ch = 0; count; ++s, --count) { 
-                if (s >= end) { 
-                    goto incomplete; 
-                } 
+            for (ch = 0; count; ++s, --count) {
+                if (s >= end) {
+                    goto incomplete;
+                }
                 c = (unsigned char)*s;
                 ch <<= 4;
                 if (c >= '0' && c <= '9') {
@@ -6408,7 +6408,7 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
                     ch += c - ('A' - 10);
                 }
                 else {
-                    goto error; 
+                    goto error;
                 }
             }
 
@@ -6437,20 +6437,20 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
             }
 
             message = "malformed \\N character escape";
-            if (s >= end) { 
-                goto incomplete; 
-            } 
-            if (*s == '{') { 
+            if (s >= end) {
+                goto incomplete;
+            }
+            if (*s == '{') {
                 const char *start = ++s;
                 size_t namelen;
                 /* look for the closing brace */
                 while (s < end && *s != '}')
                     s++;
-                if (s >= end) { 
-                    goto incomplete; 
-                } 
+                if (s >= end) {
+                    goto incomplete;
+                }
                 namelen = s - start;
-                if (namelen) { 
+                if (namelen) {
                     /* found a name.  look it up in the unicode database */
                     s++;
                     ch = 0xffffffff; /* in case 'getcode' messes up */
@@ -6476,13 +6476,13 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
             continue;
         }
 
-      incomplete: 
-        if (consumed) { 
-            *consumed = startinpos; 
-            break; 
-        } 
-      error:; 
-        Py_ssize_t endinpos = s-starts; 
+      incomplete:
+        if (consumed) {
+            *consumed = startinpos;
+            break;
+        }
+      error:;
+        Py_ssize_t endinpos = s-starts;
         writer.min_length = end - s + writer.pos;
         if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
@@ -6509,14 +6509,14 @@ _PyUnicode_DecodeUnicodeEscapeInternal(const char *s,
 }
 
 PyObject *
-_PyUnicode_DecodeUnicodeEscapeStateful(const char *s, 
+_PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
                               Py_ssize_t size,
-                              const char *errors, 
-                              Py_ssize_t *consumed) 
+                              const char *errors,
+                              Py_ssize_t *consumed)
 {
     const char *first_invalid_escape;
-    PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors, 
-                                                      consumed, 
+    PyObject *result = _PyUnicode_DecodeUnicodeEscapeInternal(s, size, errors,
+                                                      consumed,
                                                       &first_invalid_escape);
     if (result == NULL)
         return NULL;
@@ -6531,14 +6531,14 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
     return result;
 }
 
-PyObject * 
-PyUnicode_DecodeUnicodeEscape(const char *s, 
-                              Py_ssize_t size, 
-                              const char *errors) 
-{ 
-    return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL); 
-} 
- 
+PyObject *
+PyUnicode_DecodeUnicodeEscape(const char *s,
+                              Py_ssize_t size,
+                              const char *errors)
+{
+    return _PyUnicode_DecodeUnicodeEscapeStateful(s, size, errors, NULL);
+}
+
 /* Return a Unicode-Escape string version of the Unicode object. */
 
 PyObject *
@@ -6548,7 +6548,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
     PyObject *repr;
     char *p;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t expandsize;
 
     /* Initial allocation is based on the longest-possible character
@@ -6677,10 +6677,10 @@ PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
 /* --- Raw Unicode Escape Codec ------------------------------------------- */
 
 PyObject *
-_PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s, 
-                                          Py_ssize_t size, 
-                                          const char *errors, 
-                                          Py_ssize_t *consumed) 
+_PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
+                                          Py_ssize_t size,
+                                          const char *errors,
+                                          Py_ssize_t *consumed)
 {
     const char *starts = s;
     _PyUnicodeWriter writer;
@@ -6689,9 +6689,9 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
     PyObject *exc = NULL;
 
     if (size == 0) {
-        if (consumed) { 
-            *consumed = 0; 
-        } 
+        if (consumed) {
+            *consumed = 0;
+        }
         _Py_RETURN_UNICODE_EMPTY();
     }
 
@@ -6700,7 +6700,7 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
        length after conversion to the true value. (But decoding error
        handler might have to resize the string) */
     _PyUnicodeWriter_Init(&writer);
-    writer.min_length = size; 
+    writer.min_length = size;
     if (_PyUnicodeWriter_Prepare(&writer, size, 127) < 0) {
         goto onError;
     }
@@ -6724,21 +6724,21 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
             } while(0)
 
         /* Non-escape characters are interpreted as Unicode ordinals */
-        if (c != '\\' || (s >= end && !consumed)) { 
+        if (c != '\\' || (s >= end && !consumed)) {
             WRITE_CHAR(c);
             continue;
         }
 
-        Py_ssize_t startinpos = s - starts - 1; 
-        /* \ - Escapes */ 
-        if (s >= end) { 
-            assert(consumed); 
-            // Set message to silent compiler warning. 
-            // Actually it is never used. 
-            message = "\\ at end of string"; 
-            goto incomplete; 
-        } 
- 
+        Py_ssize_t startinpos = s - starts - 1;
+        /* \ - Escapes */
+        if (s >= end) {
+            assert(consumed);
+            // Set message to silent compiler warning.
+            // Actually it is never used.
+            message = "\\ at end of string";
+            goto incomplete;
+        }
+
         c = (unsigned char) *s++;
         if (c == 'u') {
             count = 4;
@@ -6756,10 +6756,10 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
         }
 
         /* \uHHHH with 4 hex digits, \U00HHHHHH with 8 */
-        for (ch = 0; count; ++s, --count) { 
-            if (s >= end) { 
-                goto incomplete; 
-            } 
+        for (ch = 0; count; ++s, --count) {
+            if (s >= end) {
+                goto incomplete;
+            }
             c = (unsigned char)*s;
             ch <<= 4;
             if (c >= '0' && c <= '9') {
@@ -6772,23 +6772,23 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
                 ch += c - ('A' - 10);
             }
             else {
-                goto error; 
+                goto error;
             }
         }
-        if (ch > MAX_UNICODE) { 
+        if (ch > MAX_UNICODE) {
             message = "\\Uxxxxxxxx out of range";
-            goto error; 
+            goto error;
         }
-        WRITE_CHAR(ch); 
-        continue; 
+        WRITE_CHAR(ch);
+        continue;
 
-      incomplete: 
-        if (consumed) { 
-            *consumed = startinpos; 
-            break; 
-        } 
-      error:; 
-        Py_ssize_t endinpos = s-starts; 
+      incomplete:
+        if (consumed) {
+            *consumed = startinpos;
+            break;
+        }
+      error:;
+        Py_ssize_t endinpos = s-starts;
         writer.min_length = end - s + writer.pos;
         if (unicode_decode_call_errorhandler_writer(
                 errors, &errorHandler,
@@ -6810,14 +6810,14 @@ _PyUnicode_DecodeRawUnicodeEscapeStateful(const char *s,
     Py_XDECREF(errorHandler);
     Py_XDECREF(exc);
     return NULL;
-} 
+}
 
-PyObject * 
-PyUnicode_DecodeRawUnicodeEscape(const char *s, 
-                                 Py_ssize_t size, 
-                                 const char *errors) 
-{ 
-    return _PyUnicode_DecodeRawUnicodeEscapeStateful(s, size, errors, NULL); 
+PyObject *
+PyUnicode_DecodeRawUnicodeEscape(const char *s,
+                                 Py_ssize_t size,
+                                 const char *errors)
+{
+    return _PyUnicode_DecodeRawUnicodeEscapeStateful(s, size, errors, NULL);
 }
 
 
@@ -6828,7 +6828,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
     char *p;
     Py_ssize_t expandsize, pos;
     int kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t len;
 
     if (!PyUnicode_Check(unicode)) {
@@ -6868,7 +6868,7 @@ PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
         if (ch < 0x100) {
             *p++ = (char) ch;
         }
-        /* U+0100-U+ffff range: Map 16-bit characters to '\uHHHH' */ 
+        /* U+0100-U+ffff range: Map 16-bit characters to '\uHHHH' */
         else if (ch < 0x10000) {
             *p++ = '\\';
             *p++ = 'u';
@@ -6921,7 +6921,7 @@ PyUnicode_DecodeLatin1(const char *s,
                        const char *errors)
 {
     /* Latin-1 is equivalent to the first 256 ordinals in Unicode. */
-    return _PyUnicode_FromUCS1((const unsigned char*)s, size); 
+    return _PyUnicode_FromUCS1((const unsigned char*)s, size);
 }
 
 /* create or adjust a UnicodeEncodeError */
@@ -6996,7 +6996,7 @@ unicode_encode_call_errorhandler(const char *errors,
     if (*exceptionObject == NULL)
         return NULL;
 
-    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject); 
+    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
     if (restuple == NULL)
         return NULL;
     if (!PyTuple_Check(restuple)) {
@@ -7034,7 +7034,7 @@ unicode_encode_ucs1(PyObject *unicode,
     /* input state */
     Py_ssize_t pos=0, size;
     int kind;
-    const void *data; 
+    const void *data;
     /* pointer into the output */
     char *str;
     const char *encoding = (limit == 256) ? "latin-1" : "ascii";
@@ -7085,7 +7085,7 @@ unicode_encode_ucs1(PyObject *unicode,
 
             /* cache callback name lookup (if not done yet, i.e. it's the first error) */
             if (error_handler == _Py_ERROR_UNKNOWN)
-                error_handler = _Py_GetErrorHandler(errors); 
+                error_handler = _Py_GetErrorHandler(errors);
 
             switch (error_handler) {
             case _Py_ERROR_STRICT:
@@ -7245,7 +7245,7 @@ PyUnicode_DecodeASCII(const char *s,
                       const char *errors)
 {
     const char *starts = s;
-    const char *e = s + size; 
+    const char *e = s + size;
     PyObject *error_handler_obj = NULL;
     PyObject *exc = NULL;
     _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
@@ -7257,25 +7257,25 @@ PyUnicode_DecodeASCII(const char *s,
     if (size == 1 && (unsigned char)s[0] < 128)
         return get_latin1_char((unsigned char)s[0]);
 
-    // Shortcut for simple case 
-    PyObject *u = PyUnicode_New(size, 127); 
-    if (u == NULL) { 
+    // Shortcut for simple case
+    PyObject *u = PyUnicode_New(size, 127);
+    if (u == NULL) {
         return NULL;
-    } 
-    Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_1BYTE_DATA(u)); 
-    if (outpos == size) { 
-        return u; 
-    } 
+    }
+    Py_ssize_t outpos = ascii_decode(s, e, PyUnicode_1BYTE_DATA(u));
+    if (outpos == size) {
+        return u;
+    }
 
-    _PyUnicodeWriter writer; 
-    _PyUnicodeWriter_InitWithBuffer(&writer, u); 
+    _PyUnicodeWriter writer;
+    _PyUnicodeWriter_InitWithBuffer(&writer, u);
     writer.pos = outpos;
 
-    s += outpos; 
-    int kind = writer.kind; 
-    void *data = writer.data; 
-    Py_ssize_t startinpos, endinpos; 
- 
+    s += outpos;
+    int kind = writer.kind;
+    void *data = writer.data;
+    Py_ssize_t startinpos, endinpos;
+
     while (s < e) {
         unsigned char c = (unsigned char)*s;
         if (c < 128) {
@@ -7288,7 +7288,7 @@ PyUnicode_DecodeASCII(const char *s,
         /* byte outsize range 0x00..0x7f: call the error handler */
 
         if (error_handler == _Py_ERROR_UNKNOWN)
-            error_handler = _Py_GetErrorHandler(errors); 
+            error_handler = _Py_GetErrorHandler(errors);
 
         switch (error_handler)
         {
@@ -7383,12 +7383,12 @@ PyUnicode_AsASCIIString(PyObject *unicode)
 #define NEED_RETRY
 #endif
 
-/* INT_MAX is the theoretical largest chunk (or INT_MAX / 2 when 
-   transcoding from UTF-16), but INT_MAX / 4 performs better in 
-   both cases also and avoids partial characters overrunning the 
-   length limit in MultiByteToWideChar on Windows */ 
-#define DECODING_CHUNK_SIZE (INT_MAX/4) 
- 
+/* INT_MAX is the theoretical largest chunk (or INT_MAX / 2 when
+   transcoding from UTF-16), but INT_MAX / 4 performs better in
+   both cases also and avoids partial characters overrunning the
+   length limit in MultiByteToWideChar on Windows */
+#define DECODING_CHUNK_SIZE (INT_MAX/4)
+
 #ifndef WC_ERR_INVALID_CHARS
 #  define WC_ERR_INVALID_CHARS 0x0080
 #endif
@@ -7430,33 +7430,33 @@ decode_code_page_flags(UINT code_page)
  */
 static int
 decode_code_page_strict(UINT code_page,
-                        wchar_t **buf, 
-                        Py_ssize_t *bufsize, 
+                        wchar_t **buf,
+                        Py_ssize_t *bufsize,
                         const char *in,
                         int insize)
 {
-    DWORD flags = MB_ERR_INVALID_CHARS; 
+    DWORD flags = MB_ERR_INVALID_CHARS;
     wchar_t *out;
     DWORD outsize;
 
     /* First get the size of the result */
     assert(insize > 0);
-    while ((outsize = MultiByteToWideChar(code_page, flags, 
-                                          in, insize, NULL, 0)) <= 0) 
-    { 
-        if (!flags || GetLastError() != ERROR_INVALID_FLAGS) { 
-            goto error; 
-        } 
-        /* For some code pages (e.g. UTF-7) flags must be set to 0. */ 
-        flags = 0; 
-    } 
+    while ((outsize = MultiByteToWideChar(code_page, flags,
+                                          in, insize, NULL, 0)) <= 0)
+    {
+        if (!flags || GetLastError() != ERROR_INVALID_FLAGS) {
+            goto error;
+        }
+        /* For some code pages (e.g. UTF-7) flags must be set to 0. */
+        flags = 0;
+    }
 
-    /* Extend a wchar_t* buffer */ 
-    Py_ssize_t n = *bufsize;   /* Get the current length */ 
-    if (widechar_resize(buf, bufsize, n + outsize) < 0) { 
-        return -1; 
+    /* Extend a wchar_t* buffer */
+    Py_ssize_t n = *bufsize;   /* Get the current length */
+    if (widechar_resize(buf, bufsize, n + outsize) < 0) {
+        return -1;
     }
-    out = *buf + n; 
+    out = *buf + n;
 
     /* Do the conversion */
     outsize = MultiByteToWideChar(code_page, flags, in, insize, out, outsize);
@@ -7480,14 +7480,14 @@ error:
  */
 static int
 decode_code_page_errors(UINT code_page,
-                        wchar_t **buf, 
-                        Py_ssize_t *bufsize, 
+                        wchar_t **buf,
+                        Py_ssize_t *bufsize,
                         const char *in, const int size,
                         const char *errors, int final)
 {
     const char *startin = in;
     const char *endin = in + size;
-    DWORD flags = MB_ERR_INVALID_CHARS; 
+    DWORD flags = MB_ERR_INVALID_CHARS;
     /* Ideally, we should get reason from FormatMessage. This is the Windows
        2000 English version of the message. */
     const char *reason = "No mapping for the Unicode character exists "
@@ -7521,16 +7521,16 @@ decode_code_page_errors(UINT code_page,
         goto error;
     }
 
-    /* Extend a wchar_t* buffer */ 
-    Py_ssize_t n = *bufsize;   /* Get the current length */ 
-    if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) { 
-        PyErr_NoMemory(); 
-        goto error; 
+    /* Extend a wchar_t* buffer */
+    Py_ssize_t n = *bufsize;   /* Get the current length */
+    if (size > (PY_SSIZE_T_MAX - n) / (Py_ssize_t)Py_ARRAY_LENGTH(buffer)) {
+        PyErr_NoMemory();
+        goto error;
     }
-    if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) { 
-        goto error; 
+    if (widechar_resize(buf, bufsize, n + size * Py_ARRAY_LENGTH(buffer)) < 0) {
+        goto error;
     }
-    out = *buf + n; 
+    out = *buf + n;
 
     /* Decode the byte string character per character */
     while (in < endin)
@@ -7545,11 +7545,11 @@ decode_code_page_errors(UINT code_page,
             if (outsize > 0)
                 break;
             err = GetLastError();
-            if (err == ERROR_INVALID_FLAGS && flags) { 
-                /* For some code pages (e.g. UTF-7) flags must be set to 0. */ 
-                flags = 0; 
-                continue; 
-            } 
+            if (err == ERROR_INVALID_FLAGS && flags) {
+                /* For some code pages (e.g. UTF-7) flags must be set to 0. */
+                flags = 0;
+                continue;
+            }
             if (err != ERROR_NO_UNICODE_TRANSLATION
                 && err != ERROR_INSUFFICIENT_BUFFER)
             {
@@ -7570,16 +7570,16 @@ decode_code_page_errors(UINT code_page,
 
             startinpos = in - startin;
             endinpos = startinpos + 1;
-            outpos = out - *buf; 
+            outpos = out - *buf;
             if (unicode_decode_call_errorhandler_wchar(
                     errors, &errorHandler,
                     encoding, reason,
                     &startin, &endin, &startinpos, &endinpos, &exc, &in,
-                    buf, bufsize, &outpos)) 
+                    buf, bufsize, &outpos))
             {
                 goto error;
             }
-            out = *buf + outpos; 
+            out = *buf + outpos;
         }
         else {
             in += insize;
@@ -7588,9 +7588,9 @@ decode_code_page_errors(UINT code_page,
         }
     }
 
-    /* Shrink the buffer */ 
-    assert(out - *buf <= *bufsize); 
-    *bufsize = out - *buf; 
+    /* Shrink the buffer */
+    assert(out - *buf <= *bufsize);
+    *bufsize = out - *buf;
     /* (in - startin) <= size and size is an int */
     ret = Py_SAFE_DOWNCAST(in - startin, Py_ssize_t, int);
 
@@ -7606,8 +7606,8 @@ decode_code_page_stateful(int code_page,
                           const char *s, Py_ssize_t size,
                           const char *errors, Py_ssize_t *consumed)
 {
-    wchar_t *buf = NULL; 
-    Py_ssize_t bufsize = 0; 
+    wchar_t *buf = NULL;
+    Py_ssize_t bufsize = 0;
     int chunk_size, final, converted, done;
 
     if (code_page < 0) {
@@ -7625,8 +7625,8 @@ decode_code_page_stateful(int code_page,
     do
     {
 #ifdef NEED_RETRY
-        if (size > DECODING_CHUNK_SIZE) { 
-            chunk_size = DECODING_CHUNK_SIZE; 
+        if (size > DECODING_CHUNK_SIZE) {
+            chunk_size = DECODING_CHUNK_SIZE;
             final = 0;
             done = 0;
         }
@@ -7639,21 +7639,21 @@ decode_code_page_stateful(int code_page,
         }
 
         if (chunk_size == 0 && done) {
-            if (buf != NULL) 
+            if (buf != NULL)
                 break;
             _Py_RETURN_UNICODE_EMPTY();
         }
 
-        converted = decode_code_page_strict(code_page, &buf, &bufsize, 
+        converted = decode_code_page_strict(code_page, &buf, &bufsize,
                                             s, chunk_size);
         if (converted == -2)
-            converted = decode_code_page_errors(code_page, &buf, &bufsize, 
+            converted = decode_code_page_errors(code_page, &buf, &bufsize,
                                                 s, chunk_size,
                                                 errors, final);
         assert(converted != 0 || done);
 
         if (converted < 0) {
-            PyMem_Free(buf); 
+            PyMem_Free(buf);
             return NULL;
         }
 
@@ -7664,9 +7664,9 @@ decode_code_page_stateful(int code_page,
         size -= converted;
     } while (!done);
 
-    PyObject *v = PyUnicode_FromWideChar(buf, bufsize); 
-    PyMem_Free(buf); 
-    return v; 
+    PyObject *v = PyUnicode_FromWideChar(buf, bufsize);
+    PyMem_Free(buf);
+    return v;
 }
 
 PyObject *
@@ -7747,10 +7747,10 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
     substring = PyUnicode_Substring(unicode, offset, offset+len);
     if (substring == NULL)
         return -1;
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
     p = PyUnicode_AsUnicodeAndSize(substring, &size);
-_Py_COMP_DIAG_POP 
+_Py_COMP_DIAG_POP
     if (p == NULL) {
         Py_DECREF(substring);
         return -1;
@@ -7952,7 +7952,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
         else {
             Py_ssize_t i;
             enum PyUnicode_Kind kind;
-            const void *data; 
+            const void *data;
 
             if (PyUnicode_READY(rep) == -1) {
                 Py_DECREF(rep);
@@ -8033,8 +8033,8 @@ encode_code_page(int code_page,
     do
     {
 #ifdef NEED_RETRY
-        if (len > DECODING_CHUNK_SIZE) { 
-            chunk_len = DECODING_CHUNK_SIZE; 
+        if (len > DECODING_CHUNK_SIZE) {
+            chunk_len = DECODING_CHUNK_SIZE;
             done = 0;
         }
         else
@@ -8110,7 +8110,7 @@ charmap_decode_string(const char *s,
     PyObject *errorHandler = NULL, *exc = NULL;
     Py_ssize_t maplen;
     enum PyUnicode_Kind mapkind;
-    const void *mapdata; 
+    const void *mapdata;
     Py_UCS4 x;
     unsigned char ch;
 
@@ -8127,7 +8127,7 @@ charmap_decode_string(const char *s,
         /* fast-path for cp037, cp500 and iso8859_1 encodings. iso8859_1
          * is disabled in encoding aliases, latin1 is preferred because
          * its implementation is faster. */
-        const Py_UCS1 *mapdata_ucs1 = (const Py_UCS1 *)mapdata; 
+        const Py_UCS1 *mapdata_ucs1 = (const Py_UCS1 *)mapdata;
         Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
         Py_UCS4 maxchar = writer->maxchar;
 
@@ -8151,7 +8151,7 @@ charmap_decode_string(const char *s,
     while (s < e) {
         if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) {
             enum PyUnicode_Kind outkind = writer->kind;
-            const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata; 
+            const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata;
             if (outkind == PyUnicode_1BYTE_KIND) {
                 Py_UCS1 *outdata = (Py_UCS1 *)writer->data;
                 Py_UCS4 maxchar = writer->maxchar;
@@ -8260,7 +8260,7 @@ charmap_decode_mapping(const char *s,
                 goto Undefined;
             if (value < 0 || value > MAX_UNICODE) {
                 PyErr_Format(PyExc_TypeError,
-                             "character mapping must be in range(0x%x)", 
+                             "character mapping must be in range(0x%x)",
                              (unsigned long)MAX_UNICODE + 1);
                 goto onError;
             }
@@ -8381,11 +8381,11 @@ static PyTypeObject EncodingMapType = {
     sizeof(struct encoding_map),   /*tp_basicsize*/
     0,                      /*tp_itemsize*/
     /* methods */
-    0,                      /*tp_dealloc*/ 
-    0,                      /*tp_vectorcall_offset*/ 
+    0,                      /*tp_dealloc*/
+    0,                      /*tp_vectorcall_offset*/
     0,                      /*tp_getattr*/
     0,                      /*tp_setattr*/
-    0,                      /*tp_as_async*/ 
+    0,                      /*tp_as_async*/
     0,                      /*tp_repr*/
     0,                      /*tp_as_number*/
     0,                      /*tp_as_sequence*/
@@ -8431,7 +8431,7 @@ PyUnicode_BuildEncodingMap(PyObject* string)
     unsigned char *mlevel1, *mlevel2, *mlevel3;
     int count2 = 0, count3 = 0;
     int kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t length;
     Py_UCS4 ch;
 
@@ -8599,7 +8599,7 @@ charmapencode_lookup(Py_UCS4 c, PyObject *mapping)
         /* wrong return value */
         PyErr_Format(PyExc_TypeError,
                      "character mapping must return integer, bytes or None, not %.400s",
-                     Py_TYPE(x)->tp_name); 
+                     Py_TYPE(x)->tp_name);
         Py_DECREF(x);
         return NULL;
     }
@@ -8634,7 +8634,7 @@ charmapencode_output(Py_UCS4 c, PyObject *mapping,
     char *outstart;
     Py_ssize_t outsize = PyBytes_GET_SIZE(*outobj);
 
-    if (Py_IS_TYPE(mapping, &EncodingMapType)) { 
+    if (Py_IS_TYPE(mapping, &EncodingMapType)) {
         int res = encoding_map_lookup(c, mapping);
         Py_ssize_t requiredsize = *outpos+1;
         if (res == -1)
@@ -8695,7 +8695,7 @@ charmap_encoding_error(
     Py_ssize_t size, repsize;
     Py_ssize_t newpos;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t index;
     /* startpos for collecting unencodable chars */
     Py_ssize_t collstartpos = *inpos;
@@ -8713,7 +8713,7 @@ charmap_encoding_error(
     /* find all unencodable characters */
     while (collendpos < size) {
         PyObject *rep;
-        if (Py_IS_TYPE(mapping, &EncodingMapType)) { 
+        if (Py_IS_TYPE(mapping, &EncodingMapType)) {
             ch = PyUnicode_READ_CHAR(unicode, collendpos);
             val = encoding_map_lookup(ch, mapping);
             if (val != -1)
@@ -8736,7 +8736,7 @@ charmap_encoding_error(
     /* cache callback name lookup
      * (if not done yet, i.e. it's the first error) */
     if (*error_handler == _Py_ERROR_UNKNOWN)
-        *error_handler = _Py_GetErrorHandler(errors); 
+        *error_handler = _Py_GetErrorHandler(errors);
 
     switch (*error_handler) {
     case _Py_ERROR_STRICT:
@@ -8845,7 +8845,7 @@ _PyUnicode_EncodeCharmap(PyObject *unicode,
     PyObject *error_handler_obj = NULL;
     PyObject *exc = NULL;
     _Py_error_handler error_handler = _Py_ERROR_UNKNOWN;
-    const void *data; 
+    const void *data;
     int kind;
 
     if (PyUnicode_READY(unicode) == -1)
@@ -8981,7 +8981,7 @@ unicode_translate_call_errorhandler(const char *errors,
     if (*exceptionObject == NULL)
         return NULL;
 
-    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject); 
+    restuple = PyObject_CallOneArg(*errorHandler, *exceptionObject);
     if (restuple == NULL)
         return NULL;
     if (!PyTuple_Check(restuple)) {
@@ -9177,8 +9177,8 @@ unicode_fast_translate(PyObject *input, PyObject *mapping,
 {
     Py_UCS1 ascii_table[128], ch, ch2;
     Py_ssize_t len;
-    const Py_UCS1 *in, *end; 
-    Py_UCS1 *out; 
+    const Py_UCS1 *in, *end;
+    Py_UCS1 *out;
     int res = 0;
 
     len = PyUnicode_GET_LENGTH(input);
@@ -9227,7 +9227,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
                             const char *errors)
 {
     /* input object */
-    const void *data; 
+    const void *data;
     Py_ssize_t size, i;
     int kind;
     /* output buffer */
@@ -9246,7 +9246,7 @@ _PyUnicode_TranslateCharmap(PyObject *input,
 
     if (PyUnicode_READY(input) == -1)
         return NULL;
-    data = PyUnicode_DATA(input); 
+    data = PyUnicode_DATA(input);
     kind = PyUnicode_KIND(input);
     size = PyUnicode_GET_LENGTH(input);
 
@@ -9424,7 +9424,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
     Py_ssize_t i;
     Py_UCS4 maxchar;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
 
     maxchar = 127;
     for (i = 0; i < length; i++) {
@@ -9466,7 +9466,7 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
     PyObject *unicode;
     Py_ssize_t i;
     enum PyUnicode_Kind kind;
-    const void *data; 
+    const void *data;
 
     if (output == NULL) {
         PyErr_BadArgument();
@@ -9544,7 +9544,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
                int direction)
 {
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2, result;
 
     kind1 = PyUnicode_KIND(s1);
@@ -9571,7 +9571,7 @@ any_find_slice(PyObject* s1, PyObject* s2,
     }
 
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return -2;
     }
@@ -9613,9 +9613,9 @@ any_find_slice(PyObject* s1, PyObject* s2,
         }
     }
 
-    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(s2))); 
+    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(s2)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return result;
 }
@@ -9774,7 +9774,7 @@ PyUnicode_Count(PyObject *str,
 {
     Py_ssize_t result;
     int kind1, kind2;
-    const void *buf1 = NULL, *buf2 = NULL; 
+    const void *buf1 = NULL, *buf2 = NULL;
     Py_ssize_t len1, len2;
 
     if (ensure_unicode(str) < 0 || ensure_unicode(substr) < 0)
@@ -9794,7 +9794,7 @@ PyUnicode_Count(PyObject *str,
     buf1 = PyUnicode_DATA(str);
     buf2 = PyUnicode_DATA(substr);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             goto onError;
     }
@@ -9803,24 +9803,24 @@ PyUnicode_Count(PyObject *str,
     case PyUnicode_1BYTE_KIND:
         if (PyUnicode_IS_ASCII(str) && PyUnicode_IS_ASCII(substr))
             result = asciilib_count(
-                ((const Py_UCS1*)buf1) + start, end - start, 
+                ((const Py_UCS1*)buf1) + start, end - start,
                 buf2, len2, PY_SSIZE_T_MAX
                 );
         else
             result = ucs1lib_count(
-                ((const Py_UCS1*)buf1) + start, end - start, 
+                ((const Py_UCS1*)buf1) + start, end - start,
                 buf2, len2, PY_SSIZE_T_MAX
                 );
         break;
     case PyUnicode_2BYTE_KIND:
         result = ucs2lib_count(
-            ((const Py_UCS2*)buf1) + start, end - start, 
+            ((const Py_UCS2*)buf1) + start, end - start,
             buf2, len2, PY_SSIZE_T_MAX
             );
         break;
     case PyUnicode_4BYTE_KIND:
         result = ucs4lib_count(
-            ((const Py_UCS4*)buf1) + start, end - start, 
+            ((const Py_UCS4*)buf1) + start, end - start,
             buf2, len2, PY_SSIZE_T_MAX
             );
         break;
@@ -9828,15 +9828,15 @@ PyUnicode_Count(PyObject *str,
         Py_UNREACHABLE();
     }
 
-    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); 
+    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return result;
   onError:
-    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr))); 
-    if (kind2 != kind1) 
-        PyMem_Free((void *)buf2); 
+    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substr)));
+    if (kind2 != kind1)
+        PyMem_Free((void *)buf2);
     return -1;
 }
 
@@ -9884,8 +9884,8 @@ tailmatch(PyObject *self,
 {
     int kind_self;
     int kind_sub;
-    const void *data_self; 
-    const void *data_sub; 
+    const void *data_self;
+    const void *data_sub;
     Py_ssize_t offset;
     Py_ssize_t i;
     Py_ssize_t end_sub;
@@ -9959,8 +9959,8 @@ static PyObject *
 ascii_upper_or_lower(PyObject *self, int lower)
 {
     Py_ssize_t len = PyUnicode_GET_LENGTH(self);
-    const char *data = PyUnicode_DATA(self); 
-    char *resdata; 
+    const char *data = PyUnicode_DATA(self);
+    char *resdata;
     PyObject *res;
 
     res = PyUnicode_New(len, 127);
@@ -9975,7 +9975,7 @@ ascii_upper_or_lower(PyObject *self, int lower)
 }
 
 static Py_UCS4
-handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i) 
+handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i)
 {
     Py_ssize_t j;
     int final_sigma;
@@ -10004,7 +10004,7 @@ handle_capital_sigma(int kind, const void *data, Py_ssize_t length, Py_ssize_t i
 }
 
 static int
-lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i, 
+lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
            Py_UCS4 c, Py_UCS4 *mapped)
 {
     /* Obscure special case. */
@@ -10016,14 +10016,14 @@ lower_ucs4(int kind, const void *data, Py_ssize_t length, Py_ssize_t i,
 }
 
 static Py_ssize_t
-do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) 
+do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
 {
     Py_ssize_t i, k = 0;
     int n_res, j;
     Py_UCS4 c, mapped[3];
 
     c = PyUnicode_READ(kind, data, 0);
-    n_res = _PyUnicode_ToTitleFull(c, mapped); 
+    n_res = _PyUnicode_ToTitleFull(c, mapped);
     for (j = 0; j < n_res; j++) {
         *maxchar = Py_MAX(*maxchar, mapped[j]);
         res[k++] = mapped[j];
@@ -10040,7 +10040,7 @@ do_capitalize(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UC
 }
 
 static Py_ssize_t
-do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) { 
+do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) {
     Py_ssize_t i, k = 0;
 
     for (i = 0; i < length; i++) {
@@ -10065,7 +10065,7 @@ do_swapcase(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
 }
 
 static Py_ssize_t
-do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, 
+do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
                   Py_UCS4 *maxchar, int lower)
 {
     Py_ssize_t i, k = 0;
@@ -10086,19 +10086,19 @@ do_upper_or_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res,
 }
 
 static Py_ssize_t
-do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) 
+do_upper(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
 {
     return do_upper_or_lower(kind, data, length, res, maxchar, 0);
 }
 
 static Py_ssize_t
-do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) 
+do_lower(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
 {
     return do_upper_or_lower(kind, data, length, res, maxchar, 1);
 }
 
 static Py_ssize_t
-do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) 
+do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
 {
     Py_ssize_t i, k = 0;
 
@@ -10115,7 +10115,7 @@ do_casefold(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4
 }
 
 static Py_ssize_t
-do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar) 
+do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar)
 {
     Py_ssize_t i, k = 0;
     int previous_is_cased;
@@ -10143,13 +10143,13 @@ do_title(int kind, const void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *m
 
 static PyObject *
 case_operation(PyObject *self,
-               Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *)) 
+               Py_ssize_t (*perform)(int, const void *, Py_ssize_t, Py_UCS4 *, Py_UCS4 *))
 {
     PyObject *res = NULL;
     Py_ssize_t length, newlength = 0;
     int kind, outkind;
-    const void *data; 
-    void *outdata; 
+    const void *data;
+    void *outdata;
     Py_UCS4 maxchar = 0, *tmp, *tmpend;
 
     assert(PyUnicode_IS_READY(self));
@@ -10396,7 +10396,7 @@ _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
     assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode));
     assert(start >= 0);
     assert(start + length <= PyUnicode_GET_LENGTH(unicode));
-    unicode_fill(kind, data, fill_char, start, length); 
+    unicode_fill(kind, data, fill_char, start, length);
 }
 
 Py_ssize_t
@@ -10467,9 +10467,9 @@ pad(PyObject *self,
     kind = PyUnicode_KIND(u);
     data = PyUnicode_DATA(u);
     if (left)
-        unicode_fill(kind, data, fill, 0, left); 
+        unicode_fill(kind, data, fill, 0, left);
     if (right)
-        unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right); 
+        unicode_fill(kind, data, fill, left + _PyUnicode_LENGTH(self), right);
     _PyUnicode_FastCopyCharacters(u, left, self, 0, _PyUnicode_LENGTH(self));
     assert(_PyUnicode_CheckConsistency(u, 1));
     return u;
@@ -10516,7 +10516,7 @@ split(PyObject *self,
       Py_ssize_t maxcount)
 {
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2;
     PyObject* out;
 
@@ -10571,7 +10571,7 @@ split(PyObject *self,
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return NULL;
     }
@@ -10596,9 +10596,9 @@ split(PyObject *self,
     default:
         out = NULL;
     }
-    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring))); 
+    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
     return out;
 }
 
@@ -10608,7 +10608,7 @@ rsplit(PyObject *self,
        Py_ssize_t maxcount)
 {
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2;
     PyObject* out;
 
@@ -10663,7 +10663,7 @@ rsplit(PyObject *self,
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return NULL;
     }
@@ -10688,15 +10688,15 @@ rsplit(PyObject *self,
     default:
         out = NULL;
     }
-    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring))); 
+    assert((kind2 != kind1) == (buf2 != PyUnicode_DATA(substring)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
     return out;
 }
 
 static Py_ssize_t
-anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1, 
-            PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset) 
+anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
+            PyObject *str2, const void *buf2, Py_ssize_t len2, Py_ssize_t offset)
 {
     switch (kind) {
     case PyUnicode_1BYTE_KIND:
@@ -10713,8 +10713,8 @@ anylib_find(int kind, PyObject *str1, const void *buf1, Py_ssize_t len1,
 }
 
 static Py_ssize_t
-anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen, 
-             PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount) 
+anylib_count(int kind, PyObject *sstr, const void* sbuf, Py_ssize_t slen,
+             PyObject *str1, const void *buf1, Py_ssize_t len1, Py_ssize_t maxcount)
 {
     switch (kind) {
     case PyUnicode_1BYTE_KIND:
@@ -10760,9 +10760,9 @@ replace(PyObject *self, PyObject *str1,
         PyObject *str2, Py_ssize_t maxcount)
 {
     PyObject *u;
-    const char *sbuf = PyUnicode_DATA(self); 
-    const void *buf1 = PyUnicode_DATA(str1); 
-    const void *buf2 = PyUnicode_DATA(str2); 
+    const char *sbuf = PyUnicode_DATA(self);
+    const void *buf1 = PyUnicode_DATA(str1);
+    const void *buf2 = PyUnicode_DATA(str2);
     int srelease = 0, release1 = 0, release2 = 0;
     int skind = PyUnicode_KIND(self);
     int kind1 = PyUnicode_KIND(str1);
@@ -10773,12 +10773,12 @@ replace(PyObject *self, PyObject *str1,
     int mayshrink;
     Py_UCS4 maxchar, maxchar_str1, maxchar_str2;
 
-    if (slen < len1) 
-        goto nothing; 
- 
+    if (slen < len1)
+        goto nothing;
+
     if (maxcount < 0)
         maxcount = PY_SSIZE_T_MAX;
-    else if (maxcount == 0) 
+    else if (maxcount == 0)
         goto nothing;
 
     if (str1 == str2)
@@ -10823,7 +10823,7 @@ replace(PyObject *self, PyObject *str1,
 
             if (kind1 < rkind) {
                 /* widen substring */
-                buf1 = unicode_askind(kind1, buf1, len1, rkind); 
+                buf1 = unicode_askind(kind1, buf1, len1, rkind);
                 if (!buf1) goto error;
                 release1 = 1;
             }
@@ -10832,23 +10832,23 @@ replace(PyObject *self, PyObject *str1,
                 goto nothing;
             if (rkind > kind2) {
                 /* widen replacement */
-                buf2 = unicode_askind(kind2, buf2, len2, rkind); 
+                buf2 = unicode_askind(kind2, buf2, len2, rkind);
                 if (!buf2) goto error;
                 release2 = 1;
             }
             else if (rkind < kind2) {
                 /* widen self and buf1 */
                 rkind = kind2;
-                if (release1) { 
-                    assert(buf1 != PyUnicode_DATA(str1)); 
-                    PyMem_Free((void *)buf1); 
-                    buf1 = PyUnicode_DATA(str1); 
-                    release1 = 0; 
-                } 
-                sbuf = unicode_askind(skind, sbuf, slen, rkind); 
+                if (release1) {
+                    assert(buf1 != PyUnicode_DATA(str1));
+                    PyMem_Free((void *)buf1);
+                    buf1 = PyUnicode_DATA(str1);
+                    release1 = 0;
+                }
+                sbuf = unicode_askind(skind, sbuf, slen, rkind);
                 if (!sbuf) goto error;
                 srelease = 1;
-                buf1 = unicode_askind(kind1, buf1, len1, rkind); 
+                buf1 = unicode_askind(kind1, buf1, len1, rkind);
                 if (!buf1) goto error;
                 release1 = 1;
             }
@@ -10886,7 +10886,7 @@ replace(PyObject *self, PyObject *str1,
 
         if (kind1 < rkind) {
             /* widen substring */
-            buf1 = unicode_askind(kind1, buf1, len1, rkind); 
+            buf1 = unicode_askind(kind1, buf1, len1, rkind);
             if (!buf1) goto error;
             release1 = 1;
         }
@@ -10895,28 +10895,28 @@ replace(PyObject *self, PyObject *str1,
             goto nothing;
         if (kind2 < rkind) {
             /* widen replacement */
-            buf2 = unicode_askind(kind2, buf2, len2, rkind); 
+            buf2 = unicode_askind(kind2, buf2, len2, rkind);
             if (!buf2) goto error;
             release2 = 1;
         }
         else if (kind2 > rkind) {
             /* widen self and buf1 */
             rkind = kind2;
-            sbuf = unicode_askind(skind, sbuf, slen, rkind); 
+            sbuf = unicode_askind(skind, sbuf, slen, rkind);
             if (!sbuf) goto error;
             srelease = 1;
-            if (release1) { 
-                assert(buf1 != PyUnicode_DATA(str1)); 
-                PyMem_Free((void *)buf1); 
-                buf1 = PyUnicode_DATA(str1); 
-                release1 = 0; 
-            } 
-            buf1 = unicode_askind(kind1, buf1, len1, rkind); 
+            if (release1) {
+                assert(buf1 != PyUnicode_DATA(str1));
+                PyMem_Free((void *)buf1);
+                buf1 = PyUnicode_DATA(str1);
+                release1 = 0;
+            }
+            buf1 = unicode_askind(kind1, buf1, len1, rkind);
             if (!buf1) goto error;
             release1 = 1;
         }
         /* new_size = PyUnicode_GET_LENGTH(self) + n * (PyUnicode_GET_LENGTH(str2) -
-           PyUnicode_GET_LENGTH(str1)); */ 
+           PyUnicode_GET_LENGTH(str1)); */
         if (len1 < len2 && len2 - len1 > (PY_SSIZE_T_MAX - slen) / n) {
                 PyErr_SetString(PyExc_OverflowError,
                                 "replace string is too long");
@@ -10999,41 +10999,41 @@ replace(PyObject *self, PyObject *str1,
     }
 
   done:
-    assert(srelease == (sbuf != PyUnicode_DATA(self))); 
-    assert(release1 == (buf1 != PyUnicode_DATA(str1))); 
-    assert(release2 == (buf2 != PyUnicode_DATA(str2))); 
+    assert(srelease == (sbuf != PyUnicode_DATA(self)));
+    assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+    assert(release2 == (buf2 != PyUnicode_DATA(str2)));
     if (srelease)
-        PyMem_FREE((void *)sbuf); 
+        PyMem_FREE((void *)sbuf);
     if (release1)
-        PyMem_FREE((void *)buf1); 
+        PyMem_FREE((void *)buf1);
     if (release2)
-        PyMem_FREE((void *)buf2); 
+        PyMem_FREE((void *)buf2);
     assert(_PyUnicode_CheckConsistency(u, 1));
     return u;
 
   nothing:
     /* nothing to replace; return original string (when possible) */
-    assert(srelease == (sbuf != PyUnicode_DATA(self))); 
-    assert(release1 == (buf1 != PyUnicode_DATA(str1))); 
-    assert(release2 == (buf2 != PyUnicode_DATA(str2))); 
+    assert(srelease == (sbuf != PyUnicode_DATA(self)));
+    assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+    assert(release2 == (buf2 != PyUnicode_DATA(str2)));
     if (srelease)
-        PyMem_FREE((void *)sbuf); 
+        PyMem_FREE((void *)sbuf);
     if (release1)
-        PyMem_FREE((void *)buf1); 
+        PyMem_FREE((void *)buf1);
     if (release2)
-        PyMem_FREE((void *)buf2); 
+        PyMem_FREE((void *)buf2);
     return unicode_result_unchanged(self);
 
   error:
-    assert(srelease == (sbuf != PyUnicode_DATA(self))); 
-    assert(release1 == (buf1 != PyUnicode_DATA(str1))); 
-    assert(release2 == (buf2 != PyUnicode_DATA(str2))); 
-    if (srelease) 
-        PyMem_FREE((void *)sbuf); 
-    if (release1) 
-        PyMem_FREE((void *)buf1); 
-    if (release2) 
-        PyMem_FREE((void *)buf2); 
+    assert(srelease == (sbuf != PyUnicode_DATA(self)));
+    assert(release1 == (buf1 != PyUnicode_DATA(str1)));
+    assert(release2 == (buf2 != PyUnicode_DATA(str2)));
+    if (srelease)
+        PyMem_FREE((void *)sbuf);
+    if (release1)
+        PyMem_FREE((void *)buf1);
+    if (release2)
+        PyMem_FREE((void *)buf2);
     return NULL;
 }
 
@@ -11170,7 +11170,7 @@ unicode_compare(PyObject *str1, PyObject *str2)
     while (0)
 
     int kind1, kind2;
-    const void *data1, *data2; 
+    const void *data1, *data2;
     Py_ssize_t len1, len2, len;
 
     kind1 = PyUnicode_KIND(str1);
@@ -11271,7 +11271,7 @@ static int
 unicode_compare_eq(PyObject *str1, PyObject *str2)
 {
     int kind;
-    const void *data1, *data2; 
+    const void *data1, *data2;
     Py_ssize_t len;
     int cmp;
 
@@ -11305,8 +11305,8 @@ PyUnicode_Compare(PyObject *left, PyObject *right)
     }
     PyErr_Format(PyExc_TypeError,
                  "Can't compare %.100s and %.100s",
-                 Py_TYPE(left)->tp_name, 
-                 Py_TYPE(right)->tp_name); 
+                 Py_TYPE(left)->tp_name,
+                 Py_TYPE(right)->tp_name);
     return -1;
 }
 
@@ -11356,7 +11356,7 @@ PyUnicode_CompareWithASCIIString(PyObject* uni, const char* str)
         return 0;
     }
     else {
-        const void *data = PyUnicode_DATA(uni); 
+        const void *data = PyUnicode_DATA(uni);
         /* Compare Unicode string and source character set string */
         for (i = 0; (chr = PyUnicode_READ(kind, data, i)) && str[i]; i++)
             if (chr != (unsigned char)str[i])
@@ -11447,12 +11447,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
     if (PyUnicode_CHECK_INTERNED(left))
         return 0;
 
-#ifdef INTERNED_STRINGS 
+#ifdef INTERNED_STRINGS
     assert(_PyUnicode_HASH(right_uni) != -1);
-    Py_hash_t hash = _PyUnicode_HASH(left); 
+    Py_hash_t hash = _PyUnicode_HASH(left);
     if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
         return 0;
-#endif 
+#endif
 
     return unicode_compare_eq(left, right_uni);
 }
@@ -11506,7 +11506,7 @@ int
 PyUnicode_Contains(PyObject *str, PyObject *substr)
 {
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2;
     int result;
 
@@ -11537,7 +11537,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
         return result;
     }
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return -1;
     }
@@ -11556,9 +11556,9 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)
         Py_UNREACHABLE();
     }
 
-    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substr))); 
+    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substr)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return result;
 }
@@ -11578,7 +11578,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
     if (!PyUnicode_Check(right)) {
         PyErr_Format(PyExc_TypeError,
                      "can only concatenate str (not \"%.200s\") to str",
-                     Py_TYPE(right)->tp_name); 
+                     Py_TYPE(right)->tp_name);
         return NULL;
     }
     if (PyUnicode_READY(right) < 0)
@@ -11735,7 +11735,7 @@ unicode_count(PyObject *self, PyObject *args)
     Py_ssize_t end = PY_SSIZE_T_MAX;
     PyObject *result;
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2, iresult;
 
     if (!parse_args_finds_unicode("count", args, &substring, &start, &end))
@@ -11755,26 +11755,26 @@ unicode_count(PyObject *self, PyObject *args)
     buf1 = PyUnicode_DATA(self);
     buf2 = PyUnicode_DATA(substring);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return NULL;
     }
     switch (kind1) {
     case PyUnicode_1BYTE_KIND:
         iresult = ucs1lib_count(
-            ((const Py_UCS1*)buf1) + start, end - start, 
+            ((const Py_UCS1*)buf1) + start, end - start,
             buf2, len2, PY_SSIZE_T_MAX
             );
         break;
     case PyUnicode_2BYTE_KIND:
         iresult = ucs2lib_count(
-            ((const Py_UCS2*)buf1) + start, end - start, 
+            ((const Py_UCS2*)buf1) + start, end - start,
             buf2, len2, PY_SSIZE_T_MAX
             );
         break;
     case PyUnicode_4BYTE_KIND:
         iresult = ucs4lib_count(
-            ((const Py_UCS4*)buf1) + start, end - start, 
+            ((const Py_UCS4*)buf1) + start, end - start,
             buf2, len2, PY_SSIZE_T_MAX
             );
         break;
@@ -11784,9 +11784,9 @@ unicode_count(PyObject *self, PyObject *args)
 
     result = PyLong_FromSsize_t(iresult);
 
-    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring))); 
+    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(substring)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return result;
 }
@@ -11830,8 +11830,8 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
     Py_ssize_t i, j, line_pos, src_len, incr;
     Py_UCS4 ch;
     PyObject *u;
-    const void *src_data; 
-    void *dest_data; 
+    const void *src_data;
+    void *dest_data;
     int kind;
     int found;
 
@@ -11882,7 +11882,7 @@ unicode_expandtabs_impl(PyObject *self, int tabsize)
             if (tabsize > 0) {
                 incr = tabsize - (line_pos % tabsize);
                 line_pos += incr;
-                unicode_fill(kind, dest_data, ' ', j, incr); 
+                unicode_fill(kind, dest_data, ' ', j, incr);
                 j += incr;
             }
         }
@@ -11937,7 +11937,7 @@ unicode_find(PyObject *self, PyObject *args)
 static PyObject *
 unicode_getitem(PyObject *self, Py_ssize_t index)
 {
-    const void *data; 
+    const void *data;
     enum PyUnicode_Kind kind;
     Py_UCS4 ch;
 
@@ -11972,7 +11972,7 @@ unicode_hash(PyObject *self)
         return _PyUnicode_HASH(self);
     if (PyUnicode_READY(self) == -1)
         return -1;
- 
+
     x = _Py_HashBytes(PyUnicode_DATA(self),
                       PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self));
     _PyUnicode_HASH(self) = x;
@@ -11982,7 +11982,7 @@ unicode_hash(PyObject *self)
 PyDoc_STRVAR(index__doc__,
              "S.index(sub[, start[, end]]) -> int\n\
 \n\
-Return the lowest index in S where substring sub is found,\n\ 
+Return the lowest index in S where substring sub is found,\n\
 such that sub is contained within S[start:end].  Optional\n\
 arguments start and end are interpreted as in slice notation.\n\
 \n\
@@ -12050,7 +12050,7 @@ unicode_islower_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
     int cased;
 
     if (PyUnicode_READY(self) == -1)
@@ -12095,7 +12095,7 @@ unicode_isupper_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
     int cased;
 
     if (PyUnicode_READY(self) == -1)
@@ -12140,7 +12140,7 @@ unicode_istitle_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
     int cased, previous_is_cased;
 
     if (PyUnicode_READY(self) == -1)
@@ -12198,7 +12198,7 @@ unicode_isspace_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12238,7 +12238,7 @@ unicode_isalpha_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12276,7 +12276,7 @@ unicode_isalnum_impl(PyObject *self)
 /*[clinic end generated code: output=a5a23490ffc3660c input=5c6579bf2e04758c]*/
 {
     int kind;
-    const void *data; 
+    const void *data;
     Py_ssize_t len, i;
 
     if (PyUnicode_READY(self) == -1)
@@ -12319,7 +12319,7 @@ unicode_isdecimal_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12358,7 +12358,7 @@ unicode_isdigit_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12398,7 +12398,7 @@ unicode_isnumeric_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12422,22 +12422,22 @@ unicode_isnumeric_impl(PyObject *self)
     Py_RETURN_TRUE;
 }
 
-Py_ssize_t 
-_PyUnicode_ScanIdentifier(PyObject *self) 
+Py_ssize_t
+_PyUnicode_ScanIdentifier(PyObject *self)
 {
     Py_ssize_t i;
-    if (PyUnicode_READY(self) == -1) 
-        return -1; 
+    if (PyUnicode_READY(self) == -1)
+        return -1;
 
-    Py_ssize_t len = PyUnicode_GET_LENGTH(self); 
-    if (len == 0) { 
-        /* an empty string is not a valid identifier */ 
+    Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+    if (len == 0) {
+        /* an empty string is not a valid identifier */
         return 0;
     }
 
-    int kind = PyUnicode_KIND(self); 
-    const void *data = PyUnicode_DATA(self); 
-    Py_UCS4 ch = PyUnicode_READ(kind, data, 0); 
+    int kind = PyUnicode_KIND(self);
+    const void *data = PyUnicode_DATA(self);
+    Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
     /* PEP 3131 says that the first character must be in
        XID_Start and subsequent characters in XID_Continue,
        and for the ASCII range, the 2.x rules apply (i.e
@@ -12446,70 +12446,70 @@ _PyUnicode_ScanIdentifier(PyObject *self)
        definition of XID_Start and XID_Continue, it is sufficient
        to check just for these, except that _ must be allowed
        as starting an identifier.  */
-    if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) { 
+    if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
         return 0;
-    } 
+    }
+
+    for (i = 1; i < len; i++) {
+        ch = PyUnicode_READ(kind, data, i);
+        if (!_PyUnicode_IsXidContinue(ch)) {
+            return i;
+        }
+    }
+    return i;
+}
 
-    for (i = 1; i < len; i++) { 
-        ch = PyUnicode_READ(kind, data, i); 
-        if (!_PyUnicode_IsXidContinue(ch)) { 
-            return i; 
-        } 
-    } 
-    return i; 
-} 
- 
-int 
-PyUnicode_IsIdentifier(PyObject *self) 
-{ 
-    if (PyUnicode_IS_READY(self)) { 
-        Py_ssize_t i = _PyUnicode_ScanIdentifier(self); 
-        Py_ssize_t len = PyUnicode_GET_LENGTH(self); 
-        /* an empty string is not a valid identifier */ 
-        return len && i == len; 
-    } 
-    else { 
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
-        Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self); 
-        if (len == 0) { 
-            /* an empty string is not a valid identifier */ 
+int
+PyUnicode_IsIdentifier(PyObject *self)
+{
+    if (PyUnicode_IS_READY(self)) {
+        Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
+        Py_ssize_t len = PyUnicode_GET_LENGTH(self);
+        /* an empty string is not a valid identifier */
+        return len && i == len;
+    }
+    else {
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
+        Py_ssize_t i = 0, len = PyUnicode_GET_SIZE(self);
+        if (len == 0) {
+            /* an empty string is not a valid identifier */
+            return 0;
+        }
+
+        const wchar_t *wstr = _PyUnicode_WSTR(self);
+        Py_UCS4 ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+        if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+            && i < len
+            && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+        {
+            ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+            i++;
+        }
+#endif
+        if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
             return 0;
-        } 
- 
-        const wchar_t *wstr = _PyUnicode_WSTR(self); 
-        Py_UCS4 ch = wstr[i++]; 
-#if SIZEOF_WCHAR_T == 2 
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch) 
-            && i < len 
-            && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) 
-        { 
-            ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); 
-            i++; 
-        } 
-#endif 
-        if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) { 
-            return 0; 
-        } 
- 
-        while (i < len) { 
-            ch = wstr[i++]; 
-#if SIZEOF_WCHAR_T == 2 
-            if (Py_UNICODE_IS_HIGH_SURROGATE(ch) 
-                && i < len 
-                && Py_UNICODE_IS_LOW_SURROGATE(wstr[i])) 
-            { 
-                ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]); 
-                i++; 
-            } 
-#endif 
-            if (!_PyUnicode_IsXidContinue(ch)) { 
-                return 0; 
-            } 
-        } 
-        return 1; 
-_Py_COMP_DIAG_POP 
-    } 
+        }
+
+        while (i < len) {
+            ch = wstr[i++];
+#if SIZEOF_WCHAR_T == 2
+            if (Py_UNICODE_IS_HIGH_SURROGATE(ch)
+                && i < len
+                && Py_UNICODE_IS_LOW_SURROGATE(wstr[i]))
+            {
+                ch = Py_UNICODE_JOIN_SURROGATES(ch, wstr[i]);
+                i++;
+            }
+#endif
+            if (!_PyUnicode_IsXidContinue(ch)) {
+                return 0;
+            }
+        }
+        return 1;
+_Py_COMP_DIAG_POP
+    }
 }
 
 /*[clinic input]
@@ -12517,13 +12517,13 @@ str.isidentifier as unicode_isidentifier
 
 Return True if the string is a valid Python identifier, False otherwise.
 
-Call keyword.iskeyword(s) to test whether string s is a reserved identifier, 
-such as "def" or "class". 
+Call keyword.iskeyword(s) to test whether string s is a reserved identifier,
+such as "def" or "class".
 [clinic start generated code]*/
 
 static PyObject *
 unicode_isidentifier_impl(PyObject *self)
-/*[clinic end generated code: output=fe585a9666572905 input=2d807a104f21c0c5]*/ 
+/*[clinic end generated code: output=fe585a9666572905 input=2d807a104f21c0c5]*/
 {
     return PyBool_FromLong(PyUnicode_IsIdentifier(self));
 }
@@ -12543,7 +12543,7 @@ unicode_isprintable_impl(PyObject *self)
 {
     Py_ssize_t i, length;
     int kind;
-    const void *data; 
+    const void *data;
 
     if (PyUnicode_READY(self) == -1)
         return NULL;
@@ -12648,7 +12648,7 @@ static const char *stripfuncnames[] = {"lstrip", "rstrip", "strip"};
 PyObject *
 _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
 {
-    const void *data; 
+    const void *data;
     int kind;
     Py_ssize_t i, j, len;
     BLOOM_MASK sepmask;
@@ -12698,7 +12698,7 @@ _PyUnicode_XStrip(PyObject *self, int striptype, PyObject *sepobj)
 PyObject*
 PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
 {
-    const unsigned char *data; 
+    const unsigned char *data;
     int kind;
     Py_ssize_t length;
 
@@ -12721,7 +12721,7 @@ PyUnicode_Substring(PyObject *self, Py_ssize_t start, Py_ssize_t end)
     length = end - start;
     if (PyUnicode_IS_ASCII(self)) {
         data = PyUnicode_1BYTE_DATA(self);
-        return _PyUnicode_FromASCII((const char*)(data + start), length); 
+        return _PyUnicode_FromASCII((const char*)(data + start), length);
     }
     else {
         kind = PyUnicode_KIND(self);
@@ -12743,7 +12743,7 @@ do_strip(PyObject *self, int striptype)
     len = PyUnicode_GET_LENGTH(self);
 
     if (PyUnicode_IS_ASCII(self)) {
-        const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self); 
+        const Py_UCS1 *data = PyUnicode_1BYTE_DATA(self);
 
         i = 0;
         if (striptype != RIGHTSTRIP) {
@@ -12769,7 +12769,7 @@ do_strip(PyObject *self, int striptype)
     }
     else {
         int kind = PyUnicode_KIND(self);
-        const void *data = PyUnicode_DATA(self); 
+        const void *data = PyUnicode_DATA(self);
 
         i = 0;
         if (striptype != RIGHTSTRIP) {
@@ -12801,7 +12801,7 @@ do_strip(PyObject *self, int striptype)
 static PyObject *
 do_argstrip(PyObject *self, int striptype, PyObject *sep)
 {
-    if (sep != Py_None) { 
+    if (sep != Py_None) {
         if (PyUnicode_Check(sep))
             return _PyUnicode_XStrip(self, striptype, sep);
         else {
@@ -12822,14 +12822,14 @@ str.strip as unicode_strip
     chars: object = None
     /
 
-Return a copy of the string with leading and trailing whitespace removed. 
+Return a copy of the string with leading and trailing whitespace removed.
 
 If chars is given and not None, remove characters in chars instead.
 [clinic start generated code]*/
 
 static PyObject *
 unicode_strip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/ 
+/*[clinic end generated code: output=ca19018454345d57 input=385289c6f423b954]*/
 {
     return do_argstrip(self, BOTHSTRIP, chars);
 }
@@ -12838,7 +12838,7 @@ unicode_strip_impl(PyObject *self, PyObject *chars)
 /*[clinic input]
 str.lstrip as unicode_lstrip
 
-    chars: object = None 
+    chars: object = None
     /
 
 Return a copy of the string with leading whitespace removed.
@@ -12848,7 +12848,7 @@ If chars is given and not None, remove characters in chars instead.
 
 static PyObject *
 unicode_lstrip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=3b43683251f79ca7 input=529f9f3834448671]*/ 
+/*[clinic end generated code: output=3b43683251f79ca7 input=529f9f3834448671]*/
 {
     return do_argstrip(self, LEFTSTRIP, chars);
 }
@@ -12857,7 +12857,7 @@ unicode_lstrip_impl(PyObject *self, PyObject *chars)
 /*[clinic input]
 str.rstrip as unicode_rstrip
 
-    chars: object = None 
+    chars: object = None
     /
 
 Return a copy of the string with trailing whitespace removed.
@@ -12867,7 +12867,7 @@ If chars is given and not None, remove characters in chars instead.
 
 static PyObject *
 unicode_rstrip_impl(PyObject *self, PyObject *chars)
-/*[clinic end generated code: output=4a59230017cc3b7a input=62566c627916557f]*/ 
+/*[clinic end generated code: output=4a59230017cc3b7a input=62566c627916557f]*/
 {
     return do_argstrip(self, RIGHTSTRIP, chars);
 }
@@ -12902,8 +12902,8 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
     assert(PyUnicode_KIND(u) == PyUnicode_KIND(str));
 
     if (PyUnicode_GET_LENGTH(str) == 1) {
-        int kind = PyUnicode_KIND(str); 
-        Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0); 
+        int kind = PyUnicode_KIND(str);
+        Py_UCS4 fill_char = PyUnicode_READ(kind, PyUnicode_DATA(str), 0);
         if (kind == PyUnicode_1BYTE_KIND) {
             void *to = PyUnicode_DATA(u);
             memset(to, (unsigned char)fill_char, len);
@@ -12922,7 +12922,7 @@ unicode_repeat(PyObject *str, Py_ssize_t len)
     else {
         /* number of characters copied this far */
         Py_ssize_t done = PyUnicode_GET_LENGTH(str);
-        Py_ssize_t char_size = PyUnicode_KIND(str); 
+        Py_ssize_t char_size = PyUnicode_KIND(str);
         char *to = (char *) PyUnicode_DATA(u);
         memcpy(to, PyUnicode_DATA(str),
                   PyUnicode_GET_LENGTH(str) * char_size);
@@ -12975,62 +12975,62 @@ unicode_replace_impl(PyObject *self, PyObject *old, PyObject *new,
     return replace(self, old, new, count);
 }
 
-/*[clinic input] 
-str.removeprefix as unicode_removeprefix 
- 
-    prefix: unicode 
-    / 
- 
-Return a str with the given prefix string removed if present. 
- 
-If the string starts with the prefix string, return string[len(prefix):]. 
-Otherwise, return a copy of the original string. 
-[clinic start generated code]*/ 
- 
+/*[clinic input]
+str.removeprefix as unicode_removeprefix
+
+    prefix: unicode
+    /
+
+Return a str with the given prefix string removed if present.
+
+If the string starts with the prefix string, return string[len(prefix):].
+Otherwise, return a copy of the original string.
+[clinic start generated code]*/
+
+static PyObject *
+unicode_removeprefix_impl(PyObject *self, PyObject *prefix)
+/*[clinic end generated code: output=f1e5945e9763bcb9 input=27ec40b99a37eb88]*/
+{
+    int match = tailmatch(self, prefix, 0, PY_SSIZE_T_MAX, -1);
+    if (match == -1) {
+        return NULL;
+    }
+    if (match) {
+        return PyUnicode_Substring(self, PyUnicode_GET_LENGTH(prefix),
+                                   PyUnicode_GET_LENGTH(self));
+    }
+    return unicode_result_unchanged(self);
+}
+
+/*[clinic input]
+str.removesuffix as unicode_removesuffix
+
+    suffix: unicode
+    /
+
+Return a str with the given suffix string removed if present.
+
+If the string ends with the suffix string and that suffix is not empty,
+return string[:-len(suffix)]. Otherwise, return a copy of the original
+string.
+[clinic start generated code]*/
+
+static PyObject *
+unicode_removesuffix_impl(PyObject *self, PyObject *suffix)
+/*[clinic end generated code: output=d36629e227636822 input=12cc32561e769be4]*/
+{
+    int match = tailmatch(self, suffix, 0, PY_SSIZE_T_MAX, +1);
+    if (match == -1) {
+        return NULL;
+    }
+    if (match) {
+        return PyUnicode_Substring(self, 0, PyUnicode_GET_LENGTH(self)
+                                            - PyUnicode_GET_LENGTH(suffix));
+    }
+    return unicode_result_unchanged(self);
+}
+
 static PyObject *
-unicode_removeprefix_impl(PyObject *self, PyObject *prefix) 
-/*[clinic end generated code: output=f1e5945e9763bcb9 input=27ec40b99a37eb88]*/ 
-{ 
-    int match = tailmatch(self, prefix, 0, PY_SSIZE_T_MAX, -1); 
-    if (match == -1) { 
-        return NULL; 
-    } 
-    if (match) { 
-        return PyUnicode_Substring(self, PyUnicode_GET_LENGTH(prefix), 
-                                   PyUnicode_GET_LENGTH(self)); 
-    } 
-    return unicode_result_unchanged(self); 
-} 
- 
-/*[clinic input] 
-str.removesuffix as unicode_removesuffix 
- 
-    suffix: unicode 
-    / 
- 
-Return a str with the given suffix string removed if present. 
- 
-If the string ends with the suffix string and that suffix is not empty, 
-return string[:-len(suffix)]. Otherwise, return a copy of the original 
-string. 
-[clinic start generated code]*/ 
- 
-static PyObject * 
-unicode_removesuffix_impl(PyObject *self, PyObject *suffix) 
-/*[clinic end generated code: output=d36629e227636822 input=12cc32561e769be4]*/ 
-{ 
-    int match = tailmatch(self, suffix, 0, PY_SSIZE_T_MAX, +1); 
-    if (match == -1) { 
-        return NULL; 
-    } 
-    if (match) { 
-        return PyUnicode_Substring(self, 0, PyUnicode_GET_LENGTH(self) 
-                                            - PyUnicode_GET_LENGTH(suffix)); 
-    } 
-    return unicode_result_unchanged(self); 
-} 
- 
-static PyObject * 
 unicode_repr(PyObject *unicode)
 {
     PyObject *repr;
@@ -13038,8 +13038,8 @@ unicode_repr(PyObject *unicode)
     Py_ssize_t osize, squote, dquote, i, o;
     Py_UCS4 max, quote;
     int ikind, okind, unchanged;
-    const void *idata; 
-    void *odata; 
+    const void *idata;
+    void *odata;
 
     if (PyUnicode_READY(unicode) == -1)
         return NULL;
@@ -13332,7 +13332,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
 {
     PyObject* out;
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2;
 
     if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
@@ -13355,7 +13355,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
     buf1 = PyUnicode_DATA(str_obj);
     buf2 = PyUnicode_DATA(sep_obj);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return NULL;
     }
@@ -13377,9 +13377,9 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
         Py_UNREACHABLE();
     }
 
-    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj))); 
+    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return out;
 }
@@ -13390,7 +13390,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
 {
     PyObject* out;
     int kind1, kind2;
-    const void *buf1, *buf2; 
+    const void *buf1, *buf2;
     Py_ssize_t len1, len2;
 
     if (ensure_unicode(str_obj) < 0 || ensure_unicode(sep_obj) < 0)
@@ -13413,7 +13413,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
     buf1 = PyUnicode_DATA(str_obj);
     buf2 = PyUnicode_DATA(sep_obj);
     if (kind2 != kind1) {
-        buf2 = unicode_askind(kind2, buf2, len2, kind1); 
+        buf2 = unicode_askind(kind2, buf2, len2, kind1);
         if (!buf2)
             return NULL;
     }
@@ -13435,9 +13435,9 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
         Py_UNREACHABLE();
     }
 
-    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj))); 
+    assert((kind2 == kind1) == (buf2 == PyUnicode_DATA(sep_obj)));
     if (kind2 != kind1)
-        PyMem_Free((void *)buf2); 
+        PyMem_Free((void *)buf2);
 
     return out;
 }
@@ -13593,7 +13593,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
         return NULL;
     if (y != NULL) {
         int x_kind, y_kind, z_kind;
-        const void *x_data, *y_data, *z_data; 
+        const void *x_data, *y_data, *z_data;
 
         /* x must be a string too, of equal length */
         if (!PyUnicode_Check(x)) {
@@ -13642,7 +13642,7 @@ unicode_maketrans_impl(PyObject *x, PyObject *y, PyObject *z)
         }
     } else {
         int kind;
-        const void *data; 
+        const void *data;
 
         /* x must be a dict */
         if (!PyDict_CheckExact(x)) {
@@ -13743,7 +13743,7 @@ unicode_zfill_impl(PyObject *self, Py_ssize_t width)
     Py_ssize_t fill;
     PyObject *u;
     int kind;
-    const void *data; 
+    const void *data;
     Py_UCS4 chr;
 
     if (PyUnicode_READY(self) == -1)
@@ -13924,16 +13924,16 @@ _PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
     assert(writer->kind <= PyUnicode_1BYTE_KIND);
 }
 
-// Initialize _PyUnicodeWriter with initial buffer 
-static inline void 
-_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer) 
-{ 
-    memset(writer, 0, sizeof(*writer)); 
-    writer->buffer = buffer; 
-    _PyUnicodeWriter_Update(writer); 
-    writer->min_length = writer->size; 
-} 
- 
+// Initialize _PyUnicodeWriter with initial buffer
+static inline void
+_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
+{
+    memset(writer, 0, sizeof(*writer));
+    writer->buffer = buffer;
+    _PyUnicodeWriter_Update(writer);
+    writer->min_length = writer->size;
+}
+
 int
 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
                                  Py_ssize_t length, Py_UCS4 maxchar)
@@ -14024,7 +14024,7 @@ _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
     {
     case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
     case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
-    case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break; 
+    case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
     default:
         Py_UNREACHABLE();
     }
@@ -14122,7 +14122,7 @@ _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
     if (len == -1)
         len = strlen(ascii);
 
-    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128); 
+    assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
 
     if (writer->buffer == NULL && !writer->overallocate) {
         PyObject *str;
@@ -14181,7 +14181,7 @@ _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
 {
     Py_UCS4 maxchar;
 
-    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len); 
+    maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
     if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
         return -1;
     unicode_write_cstr(writer->buffer, writer->pos, str, len);
@@ -14308,7 +14308,7 @@ unicode_sizeof_impl(PyObject *self)
 }
 
 static PyObject *
-unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored)) 
+unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
 {
     PyObject *copy = _PyUnicode_Copy(v);
     if (!copy)
@@ -14346,8 +14346,8 @@ static PyMethodDef unicode_methods[] = {
     UNICODE_UPPER_METHODDEF
     {"startswith", (PyCFunction) unicode_startswith, METH_VARARGS, startswith__doc__},
     {"endswith", (PyCFunction) unicode_endswith, METH_VARARGS, endswith__doc__},
-    UNICODE_REMOVEPREFIX_METHODDEF 
-    UNICODE_REMOVESUFFIX_METHODDEF 
+    UNICODE_REMOVEPREFIX_METHODDEF
+    UNICODE_REMOVESUFFIX_METHODDEF
     UNICODE_ISASCII_METHODDEF
     UNICODE_ISLOWER_METHODDEF
     UNICODE_ISUPPER_METHODDEF
@@ -14361,7 +14361,7 @@ static PyMethodDef unicode_methods[] = {
     UNICODE_ISIDENTIFIER_METHODDEF
     UNICODE_ISPRINTABLE_METHODDEF
     UNICODE_ZFILL_METHODDEF
-    {"format", (PyCFunction)(void(*)(void)) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, 
+    {"format", (PyCFunction)(void(*)(void)) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
     {"format_map", (PyCFunction) do_string_format_map, METH_O, format_map__doc__},
     UNICODE___FORMAT___METHODDEF
     UNICODE_MAKETRANS_METHODDEF
@@ -14371,7 +14371,7 @@ static PyMethodDef unicode_methods[] = {
     {"_decimal2ascii", (PyCFunction) unicode__decimal2ascii, METH_NOARGS},
 #endif
 
-    {"__getnewargs__",  unicode_getnewargs, METH_NOARGS}, 
+    {"__getnewargs__",  unicode_getnewargs, METH_NOARGS},
     {NULL, NULL}
 };
 
@@ -14407,7 +14407,7 @@ unicode_subscript(PyObject* self, PyObject* item)
     if (PyUnicode_READY(self) == -1)
         return NULL;
 
-    if (_PyIndex_Check(item)) { 
+    if (_PyIndex_Check(item)) {
         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
         if (i == -1 && PyErr_Occurred())
             return NULL;
@@ -14415,11 +14415,11 @@ unicode_subscript(PyObject* self, PyObject* item)
             i += PyUnicode_GET_LENGTH(self);
         return unicode_getitem(self, i);
     } else if (PySlice_Check(item)) {
-        Py_ssize_t start, stop, step, slicelength, i; 
-        size_t cur; 
+        Py_ssize_t start, stop, step, slicelength, i;
+        size_t cur;
         PyObject *result;
-        const void *src_data; 
-        void *dest_data; 
+        const void *src_data;
+        void *dest_data;
         int src_kind, dest_kind;
         Py_UCS4 ch, max_char, kind_limit;
 
@@ -14490,7 +14490,7 @@ struct unicode_formatter_t {
 
     enum PyUnicode_Kind fmtkind;
     Py_ssize_t fmtcnt, fmtpos;
-    const void *fmtdata; 
+    const void *fmtdata;
     PyObject *fmtstr;
 
     _PyUnicodeWriter writer;
@@ -15164,7 +15164,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
 {
     Py_ssize_t len;
     enum PyUnicode_Kind kind;
-    const void *pbuf; 
+    const void *pbuf;
     Py_ssize_t pindex;
     Py_UCS4 signchar;
     Py_ssize_t buflen;
@@ -15274,7 +15274,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
     /* Pad left with the fill character if needed */
     if (arg->width > len && !(arg->flags & F_LJUST)) {
         sublen = arg->width - len;
-        unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen); 
+        unicode_fill(writer->kind, writer->data, fill, writer->pos, sublen);
         writer->pos += sublen;
         arg->width = len;
     }
@@ -15306,7 +15306,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx,
     /* Pad right with the fill character if needed */
     if (arg->width > len) {
         sublen = arg->width - len;
-        unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen); 
+        unicode_fill(writer->kind, writer->data, ' ', writer->pos, sublen);
         writer->pos += sublen;
     }
     return 0;
@@ -15581,52 +15581,52 @@ static PyObject *unicode_iter(PyObject *seq);
 
 PyTypeObject PyUnicode_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
-    "str",                        /* tp_name */ 
-    sizeof(PyUnicodeObject),      /* tp_basicsize */ 
-    0,                            /* tp_itemsize */ 
+    "str",                        /* tp_name */
+    sizeof(PyUnicodeObject),      /* tp_basicsize */
+    0,                            /* tp_itemsize */
     /* Slots */
-    (destructor)unicode_dealloc,  /* tp_dealloc */ 
-    0,                            /* tp_vectorcall_offset */ 
-    0,                            /* tp_getattr */ 
-    0,                            /* tp_setattr */ 
-    0,                            /* tp_as_async */ 
-    unicode_repr,                 /* tp_repr */ 
-    &unicode_as_number,           /* tp_as_number */ 
-    &unicode_as_sequence,         /* tp_as_sequence */ 
-    &unicode_as_mapping,          /* tp_as_mapping */ 
-    (hashfunc) unicode_hash,      /* tp_hash*/ 
-    0,                            /* tp_call*/ 
-    (reprfunc) unicode_str,       /* tp_str */ 
-    PyObject_GenericGetAttr,      /* tp_getattro */ 
-    0,                            /* tp_setattro */ 
-    0,                            /* tp_as_buffer */ 
+    (destructor)unicode_dealloc,  /* tp_dealloc */
+    0,                            /* tp_vectorcall_offset */
+    0,                            /* tp_getattr */
+    0,                            /* tp_setattr */
+    0,                            /* tp_as_async */
+    unicode_repr,                 /* tp_repr */
+    &unicode_as_number,           /* tp_as_number */
+    &unicode_as_sequence,         /* tp_as_sequence */
+    &unicode_as_mapping,          /* tp_as_mapping */
+    (hashfunc) unicode_hash,      /* tp_hash*/
+    0,                            /* tp_call*/
+    (reprfunc) unicode_str,       /* tp_str */
+    PyObject_GenericGetAttr,      /* tp_getattro */
+    0,                            /* tp_setattro */
+    0,                            /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
-    Py_TPFLAGS_UNICODE_SUBCLASS,   /* tp_flags */ 
-    unicode_doc,                  /* tp_doc */ 
-    0,                            /* tp_traverse */ 
-    0,                            /* tp_clear */ 
-    PyUnicode_RichCompare,        /* tp_richcompare */ 
-    0,                            /* tp_weaklistoffset */ 
-    unicode_iter,                 /* tp_iter */ 
-    0,                            /* tp_iternext */ 
-    unicode_methods,              /* tp_methods */ 
-    0,                            /* tp_members */ 
-    0,                            /* tp_getset */ 
-    &PyBaseObject_Type,           /* tp_base */ 
-    0,                            /* tp_dict */ 
-    0,                            /* tp_descr_get */ 
-    0,                            /* tp_descr_set */ 
-    0,                            /* tp_dictoffset */ 
-    0,                            /* tp_init */ 
-    0,                            /* tp_alloc */ 
-    unicode_new,                  /* tp_new */ 
-    PyObject_Del,                 /* tp_free */ 
+    Py_TPFLAGS_UNICODE_SUBCLASS,   /* tp_flags */
+    unicode_doc,                  /* tp_doc */
+    0,                            /* tp_traverse */
+    0,                            /* tp_clear */
+    PyUnicode_RichCompare,        /* tp_richcompare */
+    0,                            /* tp_weaklistoffset */
+    unicode_iter,                 /* tp_iter */
+    0,                            /* tp_iternext */
+    unicode_methods,              /* tp_methods */
+    0,                            /* tp_members */
+    0,                            /* tp_getset */
+    &PyBaseObject_Type,           /* tp_base */
+    0,                            /* tp_dict */
+    0,                            /* tp_descr_get */
+    0,                            /* tp_descr_set */
+    0,                            /* tp_dictoffset */
+    0,                            /* tp_init */
+    0,                            /* tp_alloc */
+    unicode_new,                  /* tp_new */
+    PyObject_Del,                 /* tp_free */
 };
 
 /* Initialize the Unicode implementation */
 
-PyStatus 
-_PyUnicode_Init(void) 
+PyStatus
+_PyUnicode_Init(void)
 {
     /* XXX - move this array to unicodectype.c ? */
     Py_UCS2 linebreak[] = {
@@ -15642,30 +15642,30 @@ _PyUnicode_Init(void)
 
     /* Init the implementation */
     _Py_INCREF_UNICODE_EMPTY();
-    if (!unicode_empty) { 
-        return _PyStatus_ERR("Can't create empty string"); 
-    } 
+    if (!unicode_empty) {
+        return _PyStatus_ERR("Can't create empty string");
+    }
     Py_DECREF(unicode_empty);
 
-    if (PyType_Ready(&PyUnicode_Type) < 0) { 
-        return _PyStatus_ERR("Can't initialize unicode type"); 
-    } 
+    if (PyType_Ready(&PyUnicode_Type) < 0) {
+        return _PyStatus_ERR("Can't initialize unicode type");
+    }
 
     /* initialize the linebreak bloom filter */
     bloom_linebreak = make_bloom_mask(
         PyUnicode_2BYTE_KIND, linebreak,
         Py_ARRAY_LENGTH(linebreak));
 
-    if (PyType_Ready(&EncodingMapType) < 0) { 
-         return _PyStatus_ERR("Can't initialize encoding map type"); 
-    } 
-    if (PyType_Ready(&PyFieldNameIter_Type) < 0) { 
-        return _PyStatus_ERR("Can't initialize field name iterator type"); 
-    } 
-    if (PyType_Ready(&PyFormatterIter_Type) < 0) { 
-        return _PyStatus_ERR("Can't initialize formatter iter type"); 
-    } 
-    return _PyStatus_OK(); 
+    if (PyType_Ready(&EncodingMapType) < 0) {
+         return _PyStatus_ERR("Can't initialize encoding map type");
+    }
+    if (PyType_Ready(&PyFieldNameIter_Type) < 0) {
+        return _PyStatus_ERR("Can't initialize field name iterator type");
+    }
+    if (PyType_Ready(&PyFormatterIter_Type) < 0) {
+        return _PyStatus_ERR("Can't initialize formatter iter type");
+    }
+    return _PyStatus_OK();
 }
 
 
@@ -15677,22 +15677,22 @@ PyUnicode_InternInPlace(PyObject **p)
     assert(s != NULL);
     assert(_PyUnicode_CHECK(s));
 #else
-    if (s == NULL || !PyUnicode_Check(s)) { 
+    if (s == NULL || !PyUnicode_Check(s)) {
         return;
-    } 
+    }
 #endif
- 
+
     /* If it's a subclass, we don't really know what putting
        it in the interned dict might do. */
-    if (!PyUnicode_CheckExact(s)) { 
+    if (!PyUnicode_CheckExact(s)) {
         return;
-    } 
- 
-    if (PyUnicode_CHECK_INTERNED(s)) { 
+    }
+
+    if (PyUnicode_CHECK_INTERNED(s)) {
         return;
-    } 
- 
-#ifdef INTERNED_STRINGS 
+    }
+
+#ifdef INTERNED_STRINGS
     if (interned == NULL) {
         interned = PyDict_New();
         if (interned == NULL) {
@@ -15700,26 +15700,26 @@ PyUnicode_InternInPlace(PyObject **p)
             return;
         }
     }
- 
-    PyObject *t; 
+
+    PyObject *t;
     t = PyDict_SetDefault(interned, s, s);
- 
+
     if (t == NULL) {
         PyErr_Clear();
         return;
     }
- 
+
     if (t != s) {
         Py_INCREF(t);
         Py_SETREF(*p, t);
         return;
     }
- 
+
     /* The two references in interned are not counted by refcnt.
        The deallocator will take care of this */
-    Py_SET_REFCNT(s, Py_REFCNT(s) - 2); 
+    Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
     _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
-#endif 
+#endif
 }
 
 void
@@ -15742,67 +15742,67 @@ PyUnicode_InternFromString(const char *cp)
     return s;
 }
 
- 
-#if defined(WITH_VALGRIND) || defined(__INSURE__) 
-static void 
-unicode_release_interned(void) 
+
+#if defined(WITH_VALGRIND) || defined(__INSURE__)
+static void
+unicode_release_interned(void)
 {
-    if (interned == NULL || !PyDict_Check(interned)) { 
+    if (interned == NULL || !PyDict_Check(interned)) {
         return;
-    } 
-    PyObject *keys = PyDict_Keys(interned); 
+    }
+    PyObject *keys = PyDict_Keys(interned);
     if (keys == NULL || !PyList_Check(keys)) {
         PyErr_Clear();
         return;
     }
 
-    /* Since unicode_release_interned() is intended to help a leak 
+    /* Since unicode_release_interned() is intended to help a leak
        detector, interned unicode strings are not forcibly deallocated;
        rather, we give them their stolen references back, and then clear
        and DECREF the interned dict. */
 
-    Py_ssize_t n = PyList_GET_SIZE(keys); 
-#ifdef INTERNED_STATS 
+    Py_ssize_t n = PyList_GET_SIZE(keys);
+#ifdef INTERNED_STATS
     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
             n);
- 
-    Py_ssize_t immortal_size = 0, mortal_size = 0; 
-#endif 
-    for (Py_ssize_t i = 0; i < n; i++) { 
-        PyObject *s = PyList_GET_ITEM(keys, i); 
+
+    Py_ssize_t immortal_size = 0, mortal_size = 0;
+#endif
+    for (Py_ssize_t i = 0; i < n; i++) {
+        PyObject *s = PyList_GET_ITEM(keys, i);
         if (PyUnicode_READY(s) == -1) {
             Py_UNREACHABLE();
         }
         switch (PyUnicode_CHECK_INTERNED(s)) {
         case SSTATE_INTERNED_IMMORTAL:
             Py_REFCNT(s) += 1;
-#ifdef INTERNED_STATS 
+#ifdef INTERNED_STATS
             immortal_size += PyUnicode_GET_LENGTH(s);
-#endif 
+#endif
             break;
         case SSTATE_INTERNED_MORTAL:
             Py_REFCNT(s) += 2;
-#ifdef INTERNED_STATS 
+#ifdef INTERNED_STATS
             mortal_size += PyUnicode_GET_LENGTH(s);
-#endif 
+#endif
             break;
-        case SSTATE_NOT_INTERNED: 
-            /* fall through */ 
+        case SSTATE_NOT_INTERNED:
+            /* fall through */
         default:
-            Py_UNREACHABLE(); 
+            Py_UNREACHABLE();
         }
         _PyUnicode_STATE(s).interned = SSTATE_NOT_INTERNED;
     }
-#ifdef INTERNED_STATS 
+#ifdef INTERNED_STATS
     fprintf(stderr, "total size of all interned strings: "
             "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
             "mortal/immortal\n", mortal_size, immortal_size);
-#endif 
+#endif
     Py_DECREF(keys);
     PyDict_Clear(interned);
     Py_CLEAR(interned);
 }
-#endif 
+#endif
 
 
 /********************* Unicode Iterator **************************/
@@ -15841,7 +15841,7 @@ unicodeiter_next(unicodeiterobject *it)
 
     if (it->it_index < PyUnicode_GET_LENGTH(seq)) {
         int kind = PyUnicode_KIND(seq);
-        const void *data = PyUnicode_DATA(seq); 
+        const void *data = PyUnicode_DATA(seq);
         Py_UCS4 chr = PyUnicode_READ(kind, data, it->it_index);
         item = PyUnicode_FromOrdinal(chr);
         if (item != NULL)
@@ -15855,7 +15855,7 @@ unicodeiter_next(unicodeiterobject *it)
 }
 
 static PyObject *
-unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) 
+unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
 {
     Py_ssize_t len = 0;
     if (it->it_seq)
@@ -15866,17 +15866,17 @@ unicodeiter_len(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
 PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
 
 static PyObject *
-unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) 
+unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
 {
-    _Py_IDENTIFIER(iter); 
+    _Py_IDENTIFIER(iter);
     if (it->it_seq != NULL) {
-        return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), 
+        return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
                              it->it_seq, it->it_index);
     } else {
         PyObject *u = (PyObject *)_PyUnicode_New(0);
         if (u == NULL)
             return NULL;
-        return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); 
+        return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
     }
 }
 
@@ -15917,10 +15917,10 @@ PyTypeObject PyUnicodeIter_Type = {
     0,                  /* tp_itemsize */
     /* methods */
     (destructor)unicodeiter_dealloc,    /* tp_dealloc */
-    0,                  /* tp_vectorcall_offset */ 
+    0,                  /* tp_vectorcall_offset */
     0,                  /* tp_getattr */
     0,                  /* tp_setattr */
-    0,                  /* tp_as_async */ 
+    0,                  /* tp_as_async */
     0,                  /* tp_repr */
     0,                  /* tp_as_number */
     0,                  /* tp_as_sequence */
@@ -16062,10 +16062,10 @@ PyUnicode_AsUnicodeCopy(PyObject *unicode)
         PyErr_BadArgument();
         return NULL;
     }
-_Py_COMP_DIAG_PUSH 
-_Py_COMP_DIAG_IGNORE_DEPR_DECLS 
+_Py_COMP_DIAG_PUSH
+_Py_COMP_DIAG_IGNORE_DEPR_DECLS
     u = PyUnicode_AsUnicodeAndSize(unicode, &len);
-_Py_COMP_DIAG_POP 
+_Py_COMP_DIAG_POP
     if (u == NULL)
         return NULL;
     /* Ensure we won't overflow the size. */
@@ -16084,242 +16084,242 @@ _Py_COMP_DIAG_POP
     return copy;
 }
 
- 
-static int 
-encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) 
-{ 
-    int res; 
-    res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT); 
-    if (res == -2) { 
-        PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name); 
-        return -1; 
-    } 
-    if (res < 0) { 
-        PyErr_NoMemory(); 
-        return -1; 
-    } 
-    return 0; 
-} 
- 
- 
-static int 
-config_get_codec_name(wchar_t **config_encoding) 
-{ 
-    char *encoding; 
-    if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) { 
-        return -1; 
-    } 
- 
-    PyObject *name_obj = NULL; 
-    PyObject *codec = _PyCodec_Lookup(encoding); 
-    PyMem_RawFree(encoding); 
- 
-    if (!codec) 
-        goto error; 
- 
-    name_obj = PyObject_GetAttrString(codec, "name"); 
-    Py_CLEAR(codec); 
-    if (!name_obj) { 
-        goto error; 
-    } 
- 
-    wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL); 
-    Py_DECREF(name_obj); 
-    if (wname == NULL) { 
-        goto error; 
-    } 
- 
-    wchar_t *raw_wname = _PyMem_RawWcsdup(wname); 
-    if (raw_wname == NULL) { 
-        PyMem_Free(wname); 
-        PyErr_NoMemory(); 
-        goto error; 
-    } 
- 
-    PyMem_RawFree(*config_encoding); 
-    *config_encoding = raw_wname; 
- 
-    PyMem_Free(wname); 
-    return 0; 
- 
-error: 
-    Py_XDECREF(codec); 
-    Py_XDECREF(name_obj); 
-    return -1; 
-} 
- 
- 
-static PyStatus 
-init_stdio_encoding(PyThreadState *tstate) 
-{ 
-    /* Update the stdio encoding to the normalized Python codec name. */ 
-    PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(tstate->interp); 
-    if (config_get_codec_name(&config->stdio_encoding) < 0) { 
-        return _PyStatus_ERR("failed to get the Python codec name " 
-                             "of the stdio encoding"); 
-    } 
-    return _PyStatus_OK(); 
-} 
- 
- 
-static int 
-init_fs_codec(PyInterpreterState *interp) 
-{ 
-    const PyConfig *config = _PyInterpreterState_GetConfig(interp); 
- 
-    _Py_error_handler error_handler; 
-    error_handler = get_error_handler_wide(config->filesystem_errors); 
-    if (error_handler == _Py_ERROR_UNKNOWN) { 
-        PyErr_SetString(PyExc_RuntimeError, "unknown filesystem error handler"); 
-        return -1; 
-    } 
- 
-    char *encoding, *errors; 
-    if (encode_wstr_utf8(config->filesystem_encoding, 
-                         &encoding, 
-                         "filesystem_encoding") < 0) { 
-        return -1; 
-    } 
- 
-    if (encode_wstr_utf8(config->filesystem_errors, 
-                         &errors, 
-                         "filesystem_errors") < 0) { 
-        PyMem_RawFree(encoding); 
-        return -1; 
-    } 
- 
-    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec; 
-    PyMem_RawFree(fs_codec->encoding); 
-    fs_codec->encoding = encoding; 
-    /* encoding has been normalized by init_fs_encoding() */ 
-    fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0); 
-    PyMem_RawFree(fs_codec->errors); 
-    fs_codec->errors = errors; 
-    fs_codec->error_handler = error_handler; 
- 
-#ifdef _Py_FORCE_UTF8_FS_ENCODING 
-    assert(fs_codec->utf8 == 1); 
-#endif 
- 
-    /* At this point, PyUnicode_EncodeFSDefault() and 
-       PyUnicode_DecodeFSDefault() can now use the Python codec rather than 
-       the C implementation of the filesystem encoding. */ 
- 
-    /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors 
-       global configuration variables. */ 
-    if (_Py_SetFileSystemEncoding(fs_codec->encoding, 
-                                  fs_codec->errors) < 0) { 
-        PyErr_NoMemory(); 
-        return -1; 
-    } 
-    return 0; 
-} 
- 
- 
-static PyStatus 
-init_fs_encoding(PyThreadState *tstate) 
-{ 
-    PyInterpreterState *interp = tstate->interp; 
- 
-    /* Update the filesystem encoding to the normalized Python codec name. 
-       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii" 
-       (Python codec name). */ 
-    PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(interp); 
-    if (config_get_codec_name(&config->filesystem_encoding) < 0) { 
-        _Py_DumpPathConfig(tstate); 
-        return _PyStatus_ERR("failed to get the Python codec " 
-                             "of the filesystem encoding"); 
-    } 
- 
-    if (init_fs_codec(interp) < 0) { 
-        return _PyStatus_ERR("cannot initialize filesystem codec"); 
-    } 
-    return _PyStatus_OK(); 
-} 
- 
- 
-PyStatus 
-_PyUnicode_InitEncodings(PyThreadState *tstate) 
-{ 
-    PyStatus status = init_fs_encoding(tstate); 
-    if (_PyStatus_EXCEPTION(status)) { 
-        return status; 
-    } 
- 
-    return init_stdio_encoding(tstate); 
-} 
- 
- 
-static void 
-_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec) 
-{ 
-    PyMem_RawFree(fs_codec->encoding); 
-    fs_codec->encoding = NULL; 
-    fs_codec->utf8 = 0; 
-    PyMem_RawFree(fs_codec->errors); 
-    fs_codec->errors = NULL; 
-    fs_codec->error_handler = _Py_ERROR_UNKNOWN; 
-} 
- 
- 
-#ifdef MS_WINDOWS 
-int 
-_PyUnicode_EnableLegacyWindowsFSEncoding(void) 
-{ 
-    PyInterpreterState *interp = _PyInterpreterState_GET(); 
-    PyConfig *config = (PyConfig *)_PyInterpreterState_GetConfig(interp); 
- 
-    /* Set the filesystem encoding to mbcs/replace (PEP 529) */ 
-    wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs"); 
-    wchar_t *errors = _PyMem_RawWcsdup(L"replace"); 
-    if (encoding == NULL || errors == NULL) { 
-        PyMem_RawFree(encoding); 
-        PyMem_RawFree(errors); 
-        PyErr_NoMemory(); 
-        return -1; 
-    } 
- 
-    PyMem_RawFree(config->filesystem_encoding); 
-    config->filesystem_encoding = encoding; 
-    PyMem_RawFree(config->filesystem_errors); 
-    config->filesystem_errors = errors; 
- 
-    return init_fs_codec(interp); 
-} 
-#endif 
- 
- 
-void 
-_PyUnicode_Fini(PyThreadState *tstate) 
-{ 
-    if (_Py_IsMainInterpreter(tstate)) { 
-#if defined(WITH_VALGRIND) || defined(__INSURE__) 
-        /* Insure++ is a memory analysis tool that aids in discovering 
-         * memory leaks and other memory problems.  On Python exit, the 
-         * interned string dictionaries are flagged as being in use at exit 
-         * (which it is).  Under normal circumstances, this is fine because 
-         * the memory will be automatically reclaimed by the system.  Under 
-         * memory debugging, it's a huge source of useless noise, so we 
-         * trade off slower shutdown for less distraction in the memory 
-         * reports.  -baw 
-         */ 
-        unicode_release_interned(); 
-#endif /* __INSURE__ */ 
- 
-        Py_CLEAR(unicode_empty); 
- 
-#ifdef LATIN1_SINGLETONS 
-        for (Py_ssize_t i = 0; i < 256; i++) { 
-            Py_CLEAR(unicode_latin1[i]); 
-        } 
-#endif 
-        unicode_clear_static_strings(); 
-    } 
- 
-    _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec); 
-} 
- 
- 
+
+static int
+encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
+{
+    int res;
+    res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
+    if (res == -2) {
+        PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
+        return -1;
+    }
+    if (res < 0) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    return 0;
+}
+
+
+static int
+config_get_codec_name(wchar_t **config_encoding)
+{
+    char *encoding;
+    if (encode_wstr_utf8(*config_encoding, &encoding, "stdio_encoding") < 0) {
+        return -1;
+    }
+
+    PyObject *name_obj = NULL;
+    PyObject *codec = _PyCodec_Lookup(encoding);
+    PyMem_RawFree(encoding);
+
+    if (!codec)
+        goto error;
+
+    name_obj = PyObject_GetAttrString(codec, "name");
+    Py_CLEAR(codec);
+    if (!name_obj) {
+        goto error;
+    }
+
+    wchar_t *wname = PyUnicode_AsWideCharString(name_obj, NULL);
+    Py_DECREF(name_obj);
+    if (wname == NULL) {
+        goto error;
+    }
+
+    wchar_t *raw_wname = _PyMem_RawWcsdup(wname);
+    if (raw_wname == NULL) {
+        PyMem_Free(wname);
+        PyErr_NoMemory();
+        goto error;
+    }
+
+    PyMem_RawFree(*config_encoding);
+    *config_encoding = raw_wname;
+
+    PyMem_Free(wname);
+    return 0;
+
+error:
+    Py_XDECREF(codec);
+    Py_XDECREF(name_obj);
+    return -1;
+}
+
+
+static PyStatus
+init_stdio_encoding(PyThreadState *tstate)
+{
+    /* Update the stdio encoding to the normalized Python codec name. */
+    PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(tstate->interp);
+    if (config_get_codec_name(&config->stdio_encoding) < 0) {
+        return _PyStatus_ERR("failed to get the Python codec name "
+                             "of the stdio encoding");
+    }
+    return _PyStatus_OK();
+}
+
+
+static int
+init_fs_codec(PyInterpreterState *interp)
+{
+    const PyConfig *config = _PyInterpreterState_GetConfig(interp);
+
+    _Py_error_handler error_handler;
+    error_handler = get_error_handler_wide(config->filesystem_errors);
+    if (error_handler == _Py_ERROR_UNKNOWN) {
+        PyErr_SetString(PyExc_RuntimeError, "unknown filesystem error handler");
+        return -1;
+    }
+
+    char *encoding, *errors;
+    if (encode_wstr_utf8(config->filesystem_encoding,
+                         &encoding,
+                         "filesystem_encoding") < 0) {
+        return -1;
+    }
+
+    if (encode_wstr_utf8(config->filesystem_errors,
+                         &errors,
+                         "filesystem_errors") < 0) {
+        PyMem_RawFree(encoding);
+        return -1;
+    }
+
+    struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
+    PyMem_RawFree(fs_codec->encoding);
+    fs_codec->encoding = encoding;
+    /* encoding has been normalized by init_fs_encoding() */
+    fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
+    PyMem_RawFree(fs_codec->errors);
+    fs_codec->errors = errors;
+    fs_codec->error_handler = error_handler;
+
+#ifdef _Py_FORCE_UTF8_FS_ENCODING
+    assert(fs_codec->utf8 == 1);
+#endif
+
+    /* At this point, PyUnicode_EncodeFSDefault() and
+       PyUnicode_DecodeFSDefault() can now use the Python codec rather than
+       the C implementation of the filesystem encoding. */
+
+    /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
+       global configuration variables. */
+    if (_Py_SetFileSystemEncoding(fs_codec->encoding,
+                                  fs_codec->errors) < 0) {
+        PyErr_NoMemory();
+        return -1;
+    }
+    return 0;
+}
+
+
+static PyStatus
+init_fs_encoding(PyThreadState *tstate)
+{
+    PyInterpreterState *interp = tstate->interp;
+
+    /* Update the filesystem encoding to the normalized Python codec name.
+       For example, replace "ANSI_X3.4-1968" (locale encoding) with "ascii"
+       (Python codec name). */
+    PyConfig *config = (PyConfig*)_PyInterpreterState_GetConfig(interp);
+    if (config_get_codec_name(&config->filesystem_encoding) < 0) {
+        _Py_DumpPathConfig(tstate);
+        return _PyStatus_ERR("failed to get the Python codec "
+                             "of the filesystem encoding");
+    }
+
+    if (init_fs_codec(interp) < 0) {
+        return _PyStatus_ERR("cannot initialize filesystem codec");
+    }
+    return _PyStatus_OK();
+}
+
+
+PyStatus
+_PyUnicode_InitEncodings(PyThreadState *tstate)
+{
+    PyStatus status = init_fs_encoding(tstate);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
+    return init_stdio_encoding(tstate);
+}
+
+
+static void
+_PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
+{
+    PyMem_RawFree(fs_codec->encoding);
+    fs_codec->encoding = NULL;
+    fs_codec->utf8 = 0;
+    PyMem_RawFree(fs_codec->errors);
+    fs_codec->errors = NULL;
+    fs_codec->error_handler = _Py_ERROR_UNKNOWN;
+}
+
+
+#ifdef MS_WINDOWS
+int
+_PyUnicode_EnableLegacyWindowsFSEncoding(void)
+{
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    PyConfig *config = (PyConfig *)_PyInterpreterState_GetConfig(interp);
+
+    /* Set the filesystem encoding to mbcs/replace (PEP 529) */
+    wchar_t *encoding = _PyMem_RawWcsdup(L"mbcs");
+    wchar_t *errors = _PyMem_RawWcsdup(L"replace");
+    if (encoding == NULL || errors == NULL) {
+        PyMem_RawFree(encoding);
+        PyMem_RawFree(errors);
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    PyMem_RawFree(config->filesystem_encoding);
+    config->filesystem_encoding = encoding;
+    PyMem_RawFree(config->filesystem_errors);
+    config->filesystem_errors = errors;
+
+    return init_fs_codec(interp);
+}
+#endif
+
+
+void
+_PyUnicode_Fini(PyThreadState *tstate)
+{
+    if (_Py_IsMainInterpreter(tstate)) {
+#if defined(WITH_VALGRIND) || defined(__INSURE__)
+        /* Insure++ is a memory analysis tool that aids in discovering
+         * memory leaks and other memory problems.  On Python exit, the
+         * interned string dictionaries are flagged as being in use at exit
+         * (which it is).  Under normal circumstances, this is fine because
+         * the memory will be automatically reclaimed by the system.  Under
+         * memory debugging, it's a huge source of useless noise, so we
+         * trade off slower shutdown for less distraction in the memory
+         * reports.  -baw
+         */
+        unicode_release_interned();
+#endif /* __INSURE__ */
+
+        Py_CLEAR(unicode_empty);
+
+#ifdef LATIN1_SINGLETONS
+        for (Py_ssize_t i = 0; i < 256; i++) {
+            Py_CLEAR(unicode_latin1[i]);
+        }
+#endif
+        unicode_clear_static_strings();
+    }
+
+    _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
+}
+
+
 /* A _string module, to export formatter_parser and formatter_field_name_split
    to the string.Formatter class implemented in Python. */
author	shadchin <[email protected]>	2022-02-10 16:44:39 +0300
committer	Daniil Cherednik <[email protected]>	2022-02-10 16:44:39 +0300
commit	e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree	64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/python3/src/Objects/unicodeobject.c
parent	2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)