diff options
| author | shadchin <[email protected]> | 2026-02-03 21:59:07 +0300 |
|---|---|---|
| committer | shadchin <[email protected]> | 2026-02-03 22:28:51 +0300 |
| commit | bce46f28de392862d5c6c3b185d844ee7c623be3 (patch) | |
| tree | 424878b5b90144f98970ce4a2745990c77330ad2 /contrib/tools/python3/Python/codecs.c | |
| parent | 0e0ee9fa48ce9411b4038aa769493d22ff6c10a2 (diff) | |
Import Python 3.13.11
commit_hash:bbb53cefb159aa3e7afaa475fd19d5a03b66945f
Diffstat (limited to 'contrib/tools/python3/Python/codecs.c')
| -rw-r--r-- | contrib/tools/python3/Python/codecs.c | 186 |
1 files changed, 95 insertions, 91 deletions
diff --git a/contrib/tools/python3/Python/codecs.c b/contrib/tools/python3/Python/codecs.c index b041339e0db..154ad3d9ee3 100644 --- a/contrib/tools/python3/Python/codecs.c +++ b/contrib/tools/python3/Python/codecs.c @@ -11,33 +11,19 @@ Copyright (c) Corporation for National Research Initiatives. #include "Python.h" #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_interp.h" // PyInterpreterState.codec_search_path -#include "pycore_pyerrors.h" // _PyErr_FormatNote() +#include "pycore_lock.h" // PyMutex +#include "pycore_pyerrors.h" // _PyErr_FormatNote() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI -#include <ctype.h> const char *Py_hexdigits = "0123456789abcdef"; /* --- Codec Registry ----------------------------------------------------- */ -/* Import the standard encodings package which will register the first - codec search function. - - This is done in a lazy way so that the Unicode implementation does - not downgrade startup time of scripts not needing it. - - ImportErrors are silently ignored by this function. Only one try is - made. - -*/ - -static int _PyCodecRegistry_Init(void); /* Forward */ - int PyCodec_Register(PyObject *search_function) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) - goto onError; + assert(interp->codecs.initialized); if (search_function == NULL) { PyErr_BadArgument(); goto onError; @@ -46,7 +32,14 @@ int PyCodec_Register(PyObject *search_function) PyErr_SetString(PyExc_TypeError, "argument must be callable"); goto onError; } - return PyList_Append(interp->codec_search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + int ret = PyList_Append(interp->codecs.search_path, search_function); +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + return ret; onError: return -1; @@ -55,23 +48,35 @@ int PyCodec_Register(PyObject *search_function) int PyCodec_Unregister(PyObject *search_function) { - PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *codec_search_path = interp->codec_search_path; - /* Do nothing if codec_search_path is not created yet or was cleared. */ - if (codec_search_path == NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (interp->codecs.initialized != 1) { + /* Do nothing if codecs state was cleared (only possible during + interpreter shutdown). */ return 0; } + PyObject *codec_search_path = interp->codecs.search_path; assert(PyList_CheckExact(codec_search_path)); - Py_ssize_t n = PyList_GET_SIZE(codec_search_path); - for (Py_ssize_t i = 0; i < n; i++) { - PyObject *item = PyList_GET_ITEM(codec_search_path, i); + for (Py_ssize_t i = 0; i < PyList_GET_SIZE(codec_search_path); i++) { +#ifdef Py_GIL_DISABLED + PyMutex_Lock(&interp->codecs.search_path_mutex); +#endif + PyObject *item = PyList_GetItemRef(codec_search_path, i); + int ret = 1; if (item == search_function) { - if (interp->codec_search_cache != NULL) { - assert(PyDict_CheckExact(interp->codec_search_cache)); - PyDict_Clear(interp->codec_search_cache); - } - return PyList_SetSlice(codec_search_path, i, i+1, NULL); + // We hold a reference to the item, so its destructor can't run + // while we hold search_path_mutex. + ret = PyList_SetSlice(codec_search_path, i, i+1, NULL); + } +#ifdef Py_GIL_DISABLED + PyMutex_Unlock(&interp->codecs.search_path_mutex); +#endif + Py_DECREF(item); + if (ret != 1) { + assert(interp->codecs.search_cache != NULL); + assert(PyDict_CheckExact(interp->codecs.search_cache)); + PyDict_Clear(interp->codecs.search_cache); + return ret; } } return 0; @@ -133,9 +138,10 @@ PyObject *_PyCodec_Lookup(const char *encoding) } PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) { + if (!interp->codecs.initialized) { return NULL; } + assert(interp->codecs.initialized); /* Convert the encoding to a normalized Python string: all characters are converted to lower case, spaces and hyphens are @@ -149,18 +155,17 @@ PyObject *_PyCodec_Lookup(const char *encoding) _PyUnicode_InternMortal(interp, &v); /* First, try to lookup the name in the registry dictionary */ - PyObject *result = PyDict_GetItemWithError(interp->codec_search_cache, v); + PyObject *result; + if (PyDict_GetItemRef(interp->codecs.search_cache, v, &result) < 0) { + goto onError; + } if (result != NULL) { - Py_INCREF(result); Py_DECREF(v); return result; } - else if (PyErr_Occurred()) { - goto onError; - } /* Next, scan the search functions in order of registration */ - const Py_ssize_t len = PyList_Size(interp->codec_search_path); + const Py_ssize_t len = PyList_Size(interp->codecs.search_path); if (len < 0) goto onError; if (len == 0) { @@ -174,14 +179,15 @@ PyObject *_PyCodec_Lookup(const char *encoding) for (i = 0; i < len; i++) { PyObject *func; - func = PyList_GetItem(interp->codec_search_path, i); + func = PyList_GetItemRef(interp->codecs.search_path, i); if (func == NULL) goto onError; result = PyObject_CallOneArg(func, v); + Py_DECREF(func); if (result == NULL) goto onError; if (result == Py_None) { - Py_DECREF(result); + Py_CLEAR(result); continue; } if (!PyTuple_Check(result) || PyTuple_GET_SIZE(result) != 4) { @@ -192,7 +198,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) } break; } - if (i == len) { + if (result == NULL) { /* XXX Perhaps we should cache misses too ? */ PyErr_Format(PyExc_LookupError, "unknown encoding: %s", encoding); @@ -202,7 +208,7 @@ PyObject *_PyCodec_Lookup(const char *encoding) _PyUnicode_InternImmortal(interp, &v); /* Cache and return the result */ - if (PyDict_SetItem(interp->codec_search_cache, v, result) < 0) { + if (PyDict_SetItem(interp->codecs.search_cache, v, result) < 0) { Py_DECREF(result); goto onError; } @@ -520,7 +526,7 @@ PyObject * _PyCodec_LookupTextEncoding(const char *encoding, * attribute. */ if (!PyTuple_CheckExact(codec)) { - if (_PyObject_LookupAttr(codec, &_Py_ID(_is_text_encoding), &attr) < 0) { + if (PyObject_GetOptionalAttr(codec, &_Py_ID(_is_text_encoding), &attr) < 0) { Py_DECREF(codec); return NULL; } @@ -606,13 +612,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object, int PyCodec_RegisterError(const char *name, PyObject *error) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) - return -1; + assert(interp->codecs.initialized); if (!PyCallable_Check(error)) { PyErr_SetString(PyExc_TypeError, "handler must be callable"); return -1; } - return PyDict_SetItemString(interp->codec_error_registry, + return PyDict_SetItemString(interp->codecs.error_registry, name, error); } @@ -621,20 +626,18 @@ int PyCodec_RegisterError(const char *name, PyObject *error) the error handling callback for strict encoding will be returned. */ PyObject *PyCodec_LookupError(const char *name) { - PyObject *handler = NULL; - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->codec_search_path == NULL && _PyCodecRegistry_Init()) - return NULL; + assert(interp->codecs.initialized); if (name==NULL) name = "strict"; - handler = _PyDict_GetItemStringWithError(interp->codec_error_registry, name); - if (handler) { - Py_INCREF(handler); + PyObject *handler; + if (PyDict_GetItemStringRef(interp->codecs.error_registry, name, &handler) < 0) { + return NULL; } - else if (!PyErr_Occurred()) { + if (handler == NULL) { PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name); + return NULL; } return handler; } @@ -938,8 +941,6 @@ PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) return Py_BuildValue("(Nn)", res, end); } -static _PyUnicode_Name_CAPI *ucnhash_capi = NULL; - PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { @@ -960,13 +961,9 @@ PyObject *PyCodec_NameReplaceErrors(PyObject *exc) return NULL; if (!(object = PyUnicodeEncodeError_GetObject(exc))) return NULL; - if (!ucnhash_capi) { - /* load the unicode data module */ - ucnhash_capi = (_PyUnicode_Name_CAPI *)PyCapsule_Import( - PyUnicodeData_CAPSULE_NAME, 1); - if (!ucnhash_capi) { - return NULL; - } + _PyUnicode_Name_CAPI *ucnhash_capi = _PyUnicode_GetNameCAPI(); + if (ucnhash_capi == NULL) { + return NULL; } for (i = start, ressize = 0; i < end; ++i) { /* object is guaranteed to be "ready" */ @@ -1093,7 +1090,7 @@ get_standard_encoding(const char *encoding, int *bytelength) } } } - else if (strcmp(encoding, "CP_UTF8") == 0) { + else if (strcmp(encoding, "cp65001") == 0) { *bytelength = 3; return ENC_UTF8; } @@ -1388,7 +1385,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc) return PyCodec_SurrogateEscapeErrors(exc); } -static int _PyCodecRegistry_Init(void) +PyStatus +_PyCodec_InitRegistry(PyInterpreterState *interp) { static struct { const char *name; @@ -1476,45 +1474,51 @@ static int _PyCodecRegistry_Init(void) } }; - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *mod; - - if (interp->codec_search_path != NULL) - return 0; - - interp->codec_search_path = PyList_New(0); - if (interp->codec_search_path == NULL) { - return -1; + assert(interp->codecs.initialized == 0); + interp->codecs.search_path = PyList_New(0); + if (interp->codecs.search_path == NULL) { + return PyStatus_NoMemory(); } - - interp->codec_search_cache = PyDict_New(); - if (interp->codec_search_cache == NULL) { - return -1; + interp->codecs.search_cache = PyDict_New(); + if (interp->codecs.search_cache == NULL) { + return PyStatus_NoMemory(); } - - interp->codec_error_registry = PyDict_New(); - if (interp->codec_error_registry == NULL) { - return -1; + interp->codecs.error_registry = PyDict_New(); + if (interp->codecs.error_registry == NULL) { + return PyStatus_NoMemory(); } - for (size_t i = 0; i < Py_ARRAY_LENGTH(methods); ++i) { PyObject *func = PyCFunction_NewEx(&methods[i].def, NULL, NULL); - if (!func) { - return -1; + if (func == NULL) { + return PyStatus_NoMemory(); } - int res = PyCodec_RegisterError(methods[i].name, func); + int res = PyDict_SetItemString(interp->codecs.error_registry, + methods[i].name, func); Py_DECREF(func); - if (res) { - return -1; + if (res < 0) { + return PyStatus_Error("Failed to insert into codec error registry"); } } - mod = PyImport_ImportModule("encodings"); + interp->codecs.initialized = 1; + + // Importing `encodings' will call back into this module to register codec + // search functions, so this is done after everything else is initialized. + PyObject *mod = PyImport_ImportModule("encodings"); if (mod == NULL) { - return -1; + return PyStatus_Error("Failed to import encodings module"); } Py_DECREF(mod); - interp->codecs_initialized = 1; - return 0; + + return PyStatus_Ok(); +} + +void +_PyCodec_Fini(PyInterpreterState *interp) +{ + Py_CLEAR(interp->codecs.search_path); + Py_CLEAR(interp->codecs.search_cache); + Py_CLEAR(interp->codecs.error_registry); + interp->codecs.initialized = 0; } |
