diff options
author | shadchin <shadchin@yandex-team.com> | 2024-12-23 19:39:02 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.com> | 2024-12-23 19:54:20 +0300 |
commit | 65a5bf9d37a3b29eb394f560b9a09318196c40e8 (patch) | |
tree | e5cd68fb0682b2388e52d9806bb87adc348e21a8 /contrib/tools/python3/Objects/unicodeobject.c | |
parent | a1dd87a52878ab3e46e5fd2dba5ecbba6113d7e0 (diff) | |
download | ydb-65a5bf9d37a3b29eb394f560b9a09318196c40e8.tar.gz |
Update Python 3 to 3.12.8
commit_hash:c20045b8a987d8720e1f3328270357491d5530f3
Diffstat (limited to 'contrib/tools/python3/Objects/unicodeobject.c')
-rw-r--r-- | contrib/tools/python3/Objects/unicodeobject.c | 68 |
1 files changed, 58 insertions, 10 deletions
diff --git a/contrib/tools/python3/Objects/unicodeobject.c b/contrib/tools/python3/Objects/unicodeobject.c index 3235ae8ae0..8fe275d4c8 100644 --- a/contrib/tools/python3/Objects/unicodeobject.c +++ b/contrib/tools/python3/Objects/unicodeobject.c @@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2) } } +/* Return true if this interpreter should share the main interpreter's + intern_dict. That's important for interpreters which load basic + single-phase init extension modules (m_size == -1). There could be interned + immortal strings that are shared between interpreters, due to the + PyDict_Update(mdict, m_copy) call in import_find_extension(). + + It's not safe to deallocate those strings until all interpreters that + potentially use them are freed. By storing them in the main interpreter, we + ensure they get freed after all other interpreters are freed. +*/ +static bool +has_shared_intern_dict(PyInterpreterState *interp) +{ + PyInterpreterState *main_interp = _PyInterpreterState_Main(); + return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC; +} + static int init_interned_dict(PyInterpreterState *interp) { assert(get_interned_dict(interp) == NULL); - PyObject *interned = interned = PyDict_New(); - if (interned == NULL) { - return -1; + PyObject *interned; + if (has_shared_intern_dict(interp)) { + interned = get_interned_dict(_PyInterpreterState_Main()); + Py_INCREF(interned); + } + else { + interned = PyDict_New(); + if (interned == NULL) { + return -1; + } } _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; return 0; @@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp) { PyObject *interned = get_interned_dict(interp); if (interned != NULL) { - PyDict_Clear(interned); + if (!has_shared_intern_dict(interp)) { + // only clear if the dict belongs to this interpreter + PyDict_Clear(interned); + } Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } @@ -1445,12 +1472,12 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, assert(PyUnicode_Check(from)); assert(from_start + how_many <= PyUnicode_GET_LENGTH(from)); - assert(PyUnicode_Check(to)); - assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); - if (how_many == 0) return 0; + assert(PyUnicode_Check(to)); + assert(to_start + how_many <= PyUnicode_GET_LENGTH(to)); + from_kind = PyUnicode_KIND(from); from_data = PyUnicode_DATA(from); to_kind = PyUnicode_KIND(to); @@ -3996,6 +4023,18 @@ PyUnicode_AsUTF8(PyObject *unicode) return PyUnicode_AsUTF8AndSize(unicode, NULL); } +const char * +_PyUnicode_AsUTF8NoNUL(PyObject *unicode) +{ + Py_ssize_t size; + const char *s = PyUnicode_AsUTF8AndSize(unicode, &size); + if (s && strlen(s) != (size_t)size) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return NULL; + } + return s; +} + /* PyUnicode_GetSize() has been deprecated since Python 3.3 because it returned length of Py_UNICODE. @@ -15145,6 +15184,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); + if (has_shared_intern_dict(interp)) { + // the dict doesn't belong to this interpreter, skip the debug + // checks on it and just clear the pointer to it + clear_interned_dict(interp); + return; + } + #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -15439,7 +15485,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) int res; res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT); if (res == -2) { - PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name); + PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name); return -1; } if (res < 0) { @@ -15663,8 +15709,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() - assert(get_interned_dict(interp) == NULL); + if (!has_shared_intern_dict(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(get_interned_dict(interp) == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); |