aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Objects/unicodeobject.c
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.com>2024-12-23 19:39:02 +0300
committershadchin <shadchin@yandex-team.com>2024-12-23 19:54:20 +0300
commit65a5bf9d37a3b29eb394f560b9a09318196c40e8 (patch)
treee5cd68fb0682b2388e52d9806bb87adc348e21a8 /contrib/tools/python3/Objects/unicodeobject.c
parenta1dd87a52878ab3e46e5fd2dba5ecbba6113d7e0 (diff)
downloadydb-65a5bf9d37a3b29eb394f560b9a09318196c40e8.tar.gz
Update Python 3 to 3.12.8
commit_hash:c20045b8a987d8720e1f3328270357491d5530f3
Diffstat (limited to 'contrib/tools/python3/Objects/unicodeobject.c')
-rw-r--r--contrib/tools/python3/Objects/unicodeobject.c68
1 files changed, 58 insertions, 10 deletions
diff --git a/contrib/tools/python3/Objects/unicodeobject.c b/contrib/tools/python3/Objects/unicodeobject.c
index 3235ae8ae0..8fe275d4c8 100644
--- a/contrib/tools/python3/Objects/unicodeobject.c
+++ b/contrib/tools/python3/Objects/unicodeobject.c
@@ -287,13 +287,37 @@ hashtable_unicode_compare(const void *key1, const void *key2)
}
}
+/* Return true if this interpreter should share the main interpreter's
+ intern_dict. That's important for interpreters which load basic
+ single-phase init extension modules (m_size == -1). There could be interned
+ immortal strings that are shared between interpreters, due to the
+ PyDict_Update(mdict, m_copy) call in import_find_extension().
+
+ It's not safe to deallocate those strings until all interpreters that
+ potentially use them are freed. By storing them in the main interpreter, we
+ ensure they get freed after all other interpreters are freed.
+*/
+static bool
+has_shared_intern_dict(PyInterpreterState *interp)
+{
+ PyInterpreterState *main_interp = _PyInterpreterState_Main();
+ return interp != main_interp && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC;
+}
+
static int
init_interned_dict(PyInterpreterState *interp)
{
assert(get_interned_dict(interp) == NULL);
- PyObject *interned = interned = PyDict_New();
- if (interned == NULL) {
- return -1;
+ PyObject *interned;
+ if (has_shared_intern_dict(interp)) {
+ interned = get_interned_dict(_PyInterpreterState_Main());
+ Py_INCREF(interned);
+ }
+ else {
+ interned = PyDict_New();
+ if (interned == NULL) {
+ return -1;
+ }
}
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
return 0;
@@ -304,7 +328,10 @@ clear_interned_dict(PyInterpreterState *interp)
{
PyObject *interned = get_interned_dict(interp);
if (interned != NULL) {
- PyDict_Clear(interned);
+ if (!has_shared_intern_dict(interp)) {
+ // only clear if the dict belongs to this interpreter
+ PyDict_Clear(interned);
+ }
Py_DECREF(interned);
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
}
@@ -1445,12 +1472,12 @@ _copy_characters(PyObject *to, Py_ssize_t to_start,
assert(PyUnicode_Check(from));
assert(from_start + how_many <= PyUnicode_GET_LENGTH(from));
- assert(PyUnicode_Check(to));
- assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
-
if (how_many == 0)
return 0;
+ assert(PyUnicode_Check(to));
+ assert(to_start + how_many <= PyUnicode_GET_LENGTH(to));
+
from_kind = PyUnicode_KIND(from);
from_data = PyUnicode_DATA(from);
to_kind = PyUnicode_KIND(to);
@@ -3996,6 +4023,18 @@ PyUnicode_AsUTF8(PyObject *unicode)
return PyUnicode_AsUTF8AndSize(unicode, NULL);
}
+const char *
+_PyUnicode_AsUTF8NoNUL(PyObject *unicode)
+{
+ Py_ssize_t size;
+ const char *s = PyUnicode_AsUTF8AndSize(unicode, &size);
+ if (s && strlen(s) != (size_t)size) {
+ PyErr_SetString(PyExc_ValueError, "embedded null character");
+ return NULL;
+ }
+ return s;
+}
+
/*
PyUnicode_GetSize() has been deprecated since Python 3.3
because it returned length of Py_UNICODE.
@@ -15145,6 +15184,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
}
assert(PyDict_CheckExact(interned));
+ if (has_shared_intern_dict(interp)) {
+ // the dict doesn't belong to this interpreter, skip the debug
+ // checks on it and just clear the pointer to it
+ clear_interned_dict(interp);
+ return;
+ }
+
#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(interned));
@@ -15439,7 +15485,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name)
int res;
res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT);
if (res == -2) {
- PyErr_Format(PyExc_RuntimeWarning, "cannot decode %s", name);
+ PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name);
return -1;
}
if (res < 0) {
@@ -15663,8 +15709,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
- // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
- assert(get_interned_dict(interp) == NULL);
+ if (!has_shared_intern_dict(interp)) {
+ // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
+ assert(get_interned_dict(interp) == NULL);
+ }
_PyUnicode_FiniEncodings(&state->fs_codec);