Import Python 3.13.11

commit_hash:bbb53cefb159aa3e7afaa475fd19d5a03b66945f
author: shadchin <[email protected]> 2026-02-03 21:59:07 +0300
committer: shadchin <[email protected]> 2026-02-03 22:28:51 +0300
commit: bce46f28de392862d5c6c3b185d844ee7c623be3 (patch)
tree: 424878b5b90144f98970ce4a2745990c77330ad2 /contrib/tools/python3/Python/codecs.c
parent: 0e0ee9fa48ce9411b4038aa769493d22ff6c10a2 (diff)
1 files changed, 95 insertions, 91 deletions
diff --git a/contrib/tools/python3/Python/codecs.c b/contrib/tools/python3/Python/codecs.c
index b041339e0db..154ad3d9ee3 100644
--- a/contrib/tools/python3/Python/codecs.c
+++ b/contrib/tools/python3/Python/codecs.c
@@ -11,33 +11,19 @@ Copyright (c) Corporation for National Research Initiatives.
 #include "Python.h"
 #include "pycore_call.h"          // _PyObject_CallNoArgs()
 #include "pycore_interp.h"        // PyInterpreterState.codec_search_path
-#include "pycore_pyerrors.h"       // _PyErr_FormatNote()
+#include "pycore_lock.h"          // PyMutex
+#include "pycore_pyerrors.h"      // _PyErr_FormatNote()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_ucnhash.h"       // _PyUnicode_Name_CAPI
-#include <ctype.h>
 
 const char *Py_hexdigits = "0123456789abcdef";
 
 /* --- Codec Registry ----------------------------------------------------- */
 
-/* Import the standard encodings package which will register the first
-   codec search function.
-
-   This is done in a lazy way so that the Unicode implementation does
-   not downgrade startup time of scripts not needing it.
-
-   ImportErrors are silently ignored by this function. Only one try is
-   made.
-
-*/
-
-static int _PyCodecRegistry_Init(void); /* Forward */
-
 int PyCodec_Register(PyObject *search_function)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
-        goto onError;
+    assert(interp->codecs.initialized);
     if (search_function == NULL) {
         PyErr_BadArgument();
         goto onError;
@@ -46,7 +32,14 @@ int PyCodec_Register(PyObject *search_function)
         PyErr_SetString(PyExc_TypeError, "argument must be callable");
         goto onError;
     }
-    return PyList_Append(interp->codec_search_path, search_function);
+#ifdef Py_GIL_DISABLED
+    PyMutex_Lock(&interp->codecs.search_path_mutex);
+#endif
+    int ret = PyList_Append(interp->codecs.search_path, search_function);
+#ifdef Py_GIL_DISABLED
+    PyMutex_Unlock(&interp->codecs.search_path_mutex);
+#endif
+    return ret;
 
  onError:
     return -1;
@@ -55,23 +48,35 @@ int PyCodec_Register(PyObject *search_function)
 int
 PyCodec_Unregister(PyObject *search_function)
 {
-    PyInterpreterState *interp = PyInterpreterState_Get();
-    PyObject *codec_search_path = interp->codec_search_path;
-    /* Do nothing if codec_search_path is not created yet or was cleared. */
-    if (codec_search_path == NULL) {
+    PyInterpreterState *interp = _PyInterpreterState_GET();
+    if (interp->codecs.initialized != 1) {
+        /* Do nothing if codecs state was cleared (only possible during
+           interpreter shutdown). */
         return 0;
     }
 
+    PyObject *codec_search_path = interp->codecs.search_path;
     assert(PyList_CheckExact(codec_search_path));
-    Py_ssize_t n = PyList_GET_SIZE(codec_search_path);
-    for (Py_ssize_t i = 0; i < n; i++) {
-        PyObject *item = PyList_GET_ITEM(codec_search_path, i);
+    for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) {
+#ifdef Py_GIL_DISABLED
+        PyMutex_Lock(&interp->codecs.search_path_mutex);
+#endif
+        PyObject *item = PyList_GetItemRef(codec_search_path, i);
+        int ret = 1;
         if (item == search_function) {
-            if (interp->codec_search_cache != NULL) {
-                assert(PyDict_CheckExact(interp->codec_search_cache));
-                PyDict_Clear(interp->codec_search_cache);
-            }
-            return PyList_SetSlice(codec_search_path, i, i+1, NULL);
+            // We hold a reference to the item, so its destructor can't run
+            // while we hold search_path_mutex.
+            ret = PyList_SetSlice(codec_search_path, i, i+1, NULL);
+        }
+#ifdef Py_GIL_DISABLED
+        PyMutex_Unlock(&interp->codecs.search_path_mutex);
+#endif
+        Py_DECREF(item);
+        if (ret != 1) {
+            assert(interp->codecs.search_cache != NULL);
+            assert(PyDict_CheckExact(interp->codecs.search_cache));
+            PyDict_Clear(interp->codecs.search_cache);
+            return ret;
         }
     }
     return 0;
@@ -133,9 +138,10 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     }
 
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) {
+    if (!interp->codecs.initialized) {
         return NULL;
     }
+    assert(interp->codecs.initialized);
 
     /* Convert the encoding to a normalized Python string: all
        characters are converted to lower case, spaces and hyphens are
@@ -149,18 +155,17 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     _PyUnicode_InternMortal(interp, &v);
 
     /* First, try to lookup the name in the registry dictionary */
-    PyObject *result = PyDict_GetItemWithError(interp->codec_search_cache, v);
+    PyObject *result;
+    if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) {
+        goto onError;
+    }
     if (result != NULL) {
-        Py_INCREF(result);
         Py_DECREF(v);
         return result;
     }
-    else if (PyErr_Occurred()) {
-        goto onError;
-    }
 
     /* Next, scan the search functions in order of registration */
-    const Py_ssize_t len = PyList_Size(interp->codec_search_path);
+    const Py_ssize_t len = PyList_Size(interp->codecs.search_path);
     if (len < 0)
         goto onError;
     if (len == 0) {
@@ -174,14 +179,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     for (i = 0; i < len; i++) {
         PyObject *func;
 
-        func = PyList_GetItem(interp->codec_search_path, i);
+        func = PyList_GetItemRef(interp->codecs.search_path, i);
         if (func == NULL)
             goto onError;
         result = PyObject_CallOneArg(func, v);
+        Py_DECREF(func);
         if (result == NULL)
             goto onError;
         if (result == Py_None) {
-            Py_DECREF(result);
+            Py_CLEAR(result);
             continue;
         }
         if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) {
@@ -192,7 +198,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
         }
         break;
     }
-    if (i == len) {
+    if (result == NULL) {
         /* XXX Perhaps we should cache misses too ? */
         PyErr_Format(PyExc_LookupError,
                      "unknown encoding: %s", encoding);
@@ -202,7 +208,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
     _PyUnicode_InternImmortal(interp, &v);
 
     /* Cache and return the result */
-    if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) {
+    if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) {
         Py_DECREF(result);
         goto onError;
     }
@@ -520,7 +526,7 @@ PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
      * attribute.
      */
     if (!PyTuple_CheckExact(codec)) {
-        if (_PyObject_LookupAttr(codec, &_Py_ID(_is_text_encoding), &attr) < 0) {
+        if (PyObject_GetOptionalAttr(codec, &_Py_ID(_is_text_encoding), &attr) < 0) {
             Py_DECREF(codec);
             return NULL;
         }
@@ -606,13 +612,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
 int PyCodec_RegisterError(const char *name, PyObject *error)
 {
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
-        return -1;
+    assert(interp->codecs.initialized);
     if (!PyCallable_Check(error)) {
         PyErr_SetString(PyExc_TypeError, "handler must be callable");
         return -1;
     }
-    return PyDict_SetItemString(interp->codec_error_registry,
+    return PyDict_SetItemString(interp->codecs.error_registry,
                                 name, error);
 }
 
@@ -621,20 +626,18 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
    the error handling callback for strict encoding will be returned. */
 PyObject *PyCodec_LookupError(const char *name)
 {
-    PyObject *handler = NULL;
-
     PyInterpreterState *interp = _PyInterpreterState_GET();
-    if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
-        return NULL;
+    assert(interp->codecs.initialized);
 
     if (name==NULL)
         name = "strict";
-    handler = _PyDict_GetItemStringWithError(interp->codec_error_registry, name);
-    if (handler) {
-        Py_INCREF(handler);
+    PyObject *handler;
+    if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) {
+        return NULL;
     }
-    else if (!PyErr_Occurred()) {
+    if (handler == NULL) {
         PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
+        return NULL;
     }
     return handler;
 }
@@ -938,8 +941,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc)
     return Py_BuildValue("(Nn)", res, end);
 }
 
-static _PyUnicode_Name_CAPI *ucnhash_capi = NULL;
-
 PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
 {
     if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) {
@@ -960,13 +961,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc)
             return NULL;
         if (!(object = PyUnicodeEncodeError_GetObject(exc)))
             return NULL;
-        if (!ucnhash_capi) {
-            /* load the unicode data module */
-            ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import(
-                                            PyUnicodeData_CAPSULE_NAME, 1);
-            if (!ucnhash_capi) {
-                return NULL;
-            }
+        _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI();
+        if (ucnhash_capi == NULL) {
+            return NULL;
         }
         for (i = start, ressize = 0; i < end; ++i) {
             /* object is guaranteed to be "ready" */
@@ -1093,7 +1090,7 @@ get_standard_encoding(const char *encoding, int *bytelength)
             }
         }
     }
-    else if (strcmp(encoding, "CP_UTF8") == 0) {
+    else if (strcmp(encoding, "cp65001") == 0) {
         *bytelength = 3;
         return ENC_UTF8;
     }
@@ -1388,7 +1385,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
     return PyCodec_SurrogateEscapeErrors(exc);
 }
 
-static int _PyCodecRegistry_Init(void)
+PyStatus
+_PyCodec_InitRegistry(PyInterpreterState *interp)
 {
     static struct {
         const char *name;
@@ -1476,45 +1474,51 @@ static int _PyCodecRegistry_Init(void)
         }
     };
 
-    PyInterpreterState *interp = _PyInterpreterState_GET();
-    PyObject *mod;
-
-    if (interp->codec_search_path != NULL)
-        return 0;
-
-    interp->codec_search_path = PyList_New(0);
-    if (interp->codec_search_path == NULL) {
-        return -1;
+    assert(interp->codecs.initialized == 0);
+    interp->codecs.search_path = PyList_New(0);
+    if (interp->codecs.search_path == NULL) {
+        return PyStatus_NoMemory();
     }
-
-    interp->codec_search_cache = PyDict_New();
-    if (interp->codec_search_cache == NULL) {
-        return -1;
+    interp->codecs.search_cache = PyDict_New();
+    if (interp->codecs.search_cache == NULL) {
+        return PyStatus_NoMemory();
     }
-
-    interp->codec_error_registry = PyDict_New();
-    if (interp->codec_error_registry == NULL) {
-        return -1;
+    interp->codecs.error_registry = PyDict_New();
+    if (interp->codecs.error_registry == NULL) {
+        return PyStatus_NoMemory();
     }
-
     for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) {
         PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL);
-        if (!func) {
-            return -1;
+        if (func == NULL) {
+            return PyStatus_NoMemory();
         }
 
-        int res = PyCodec_RegisterError(methods[i].name, func);
+        int res = PyDict_SetItemString(interp->codecs.error_registry,
+                                       methods[i].name, func);
         Py_DECREF(func);
-        if (res) {
-            return -1;
+        if (res < 0) {
+            return PyStatus_Error("Failed to insert into codec error registry");
         }
     }
 
-    mod = PyImport_ImportModule("encodings");
+    interp->codecs.initialized = 1;
+
+    // Importing `encodings' will call back into this module to register codec
+    // search functions, so this is done after everything else is initialized.
+    PyObject *mod = PyImport_ImportModule("encodings");
     if (mod == NULL) {
-        return -1;
+        return PyStatus_Error("Failed to import encodings module");
     }
     Py_DECREF(mod);
-    interp->codecs_initialized = 1;
-    return 0;
+
+    return PyStatus_Ok();
+}
+
+void
+_PyCodec_Fini(PyInterpreterState *interp)
+{
+    Py_CLEAR(interp->codecs.search_path);
+    Py_CLEAR(interp->codecs.search_cache);
+    Py_CLEAR(interp->codecs.error_registry);
+    interp->codecs.initialized = 0;
 }
author	shadchin <[email protected]>	2026-02-03 21:59:07 +0300
committer	shadchin <[email protected]>	2026-02-03 22:28:51 +0300
commit	bce46f28de392862d5c6c3b185d844ee7c623be3 (patch)
tree	424878b5b90144f98970ce4a2745990c77330ad2 /contrib/tools/python3/Python/codecs.c
parent	0e0ee9fa48ce9411b4038aa769493d22ff6c10a2 (diff)