diff options
author | shadchin <shadchin@yandex-team.com> | 2024-10-27 10:52:33 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.com> | 2024-10-27 11:03:47 +0300 |
commit | 1529383373617c6d14ad4972afdc46a5eb35f954 (patch) | |
tree | 229b7647fafadd4ee4b93d20e606c534ad697365 /contrib/tools/python3/Objects | |
parent | 41d598c624442bf6918407466dac3316b8277347 (diff) | |
download | ydb-1529383373617c6d14ad4972afdc46a5eb35f954.tar.gz |
Update Python 3 to 3.12.7
commit_hash:052a122399d67f1ea5dfbc5f6457e3e06200becf
Diffstat (limited to 'contrib/tools/python3/Objects')
-rw-r--r-- | contrib/tools/python3/Objects/codeobject.c | 6 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/dictobject.c | 3 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/memoryobject.c | 55 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/object.c | 3 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/odictobject.c | 33 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/typeobject.c | 9 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/typevarobject.c | 11 | ||||
-rw-r--r-- | contrib/tools/python3/Objects/unicodeobject.c | 512 |
8 files changed, 479 insertions, 153 deletions
diff --git a/contrib/tools/python3/Objects/codeobject.c b/contrib/tools/python3/Objects/codeobject.c index 5e358825d8..1681d97613 100644 --- a/contrib/tools/python3/Objects/codeobject.c +++ b/contrib/tools/python3/Objects/codeobject.c @@ -132,6 +132,7 @@ all_name_chars(PyObject *o) static int intern_strings(PyObject *tuple) { + PyInterpreterState *interp = _PyInterpreterState_GET(); Py_ssize_t i; for (i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { @@ -141,7 +142,7 @@ intern_strings(PyObject *tuple) "non-string found in code slot"); return -1; } - PyUnicode_InternInPlace(&_PyTuple_ITEMS(tuple)[i]); + _PyUnicode_InternImmortal(interp, &_PyTuple_ITEMS(tuple)[i]); } return 0; } @@ -150,6 +151,7 @@ intern_strings(PyObject *tuple) static int intern_string_constants(PyObject *tuple, int *modified) { + PyInterpreterState *interp = _PyInterpreterState_GET(); for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) { PyObject *v = PyTuple_GET_ITEM(tuple, i); if (PyUnicode_CheckExact(v)) { @@ -159,7 +161,7 @@ intern_string_constants(PyObject *tuple, int *modified) if (all_name_chars(v)) { PyObject *w = v; - PyUnicode_InternInPlace(&v); + _PyUnicode_InternMortal(interp, &v); if (w != v) { PyTuple_SET_ITEM(tuple, i, v); if (modified) { diff --git a/contrib/tools/python3/Objects/dictobject.c b/contrib/tools/python3/Objects/dictobject.c index a99f32a9c8..0712bedc83 100644 --- a/contrib/tools/python3/Objects/dictobject.c +++ b/contrib/tools/python3/Objects/dictobject.c @@ -3916,7 +3916,8 @@ PyDict_SetItemString(PyObject *v, const char *key, PyObject *item) kv = PyUnicode_FromString(key); if (kv == NULL) return -1; - PyUnicode_InternInPlace(&kv); /* XXX Should we really? */ + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, &kv); /* XXX Should we really? */ err = PyDict_SetItem(v, kv, item); Py_DECREF(kv); return err; diff --git a/contrib/tools/python3/Objects/memoryobject.c b/contrib/tools/python3/Objects/memoryobject.c index 3c88859acc..26871612ea 100644 --- a/contrib/tools/python3/Objects/memoryobject.c +++ b/contrib/tools/python3/Objects/memoryobject.c @@ -108,8 +108,6 @@ mbuf_release(_PyManagedBufferObject *self) if (self->flags&_Py_MANAGED_BUFFER_RELEASED) return; - /* NOTE: at this point self->exports can still be > 0 if this function - is called from mbuf_clear() to break up a reference cycle. */ self->flags |= _Py_MANAGED_BUFFER_RELEASED; /* PyBuffer_Release() decrements master->obj and sets it to NULL. */ @@ -1092,32 +1090,19 @@ PyBuffer_ToContiguous(void *buf, const Py_buffer *src, Py_ssize_t len, char orde /* Inform the managed buffer that this particular memoryview will not access the underlying buffer again. If no other memoryviews are registered with the managed buffer, the underlying buffer is released instantly and - marked as inaccessible for both the memoryview and the managed buffer. - - This function fails if the memoryview itself has exported buffers. */ -static int + marked as inaccessible for both the memoryview and the managed buffer. */ +static void _memory_release(PyMemoryViewObject *self) { + assert(self->exports == 0); if (self->flags & _Py_MEMORYVIEW_RELEASED) - return 0; + return; - if (self->exports == 0) { - self->flags |= _Py_MEMORYVIEW_RELEASED; - assert(self->mbuf->exports > 0); - if (--self->mbuf->exports == 0) - mbuf_release(self->mbuf); - return 0; + self->flags |= _Py_MEMORYVIEW_RELEASED; + assert(self->mbuf->exports > 0); + if (--self->mbuf->exports == 0) { + mbuf_release(self->mbuf); } - if (self->exports > 0) { - PyErr_Format(PyExc_BufferError, - "memoryview has %zd exported buffer%s", self->exports, - self->exports==1 ? "" : "s"); - return -1; - } - - PyErr_SetString(PyExc_SystemError, - "_memory_release(): negative export count"); - return -1; } /*[clinic input] @@ -1130,9 +1115,21 @@ static PyObject * memoryview_release_impl(PyMemoryViewObject *self) /*[clinic end generated code: output=d0b7e3ba95b7fcb9 input=bc71d1d51f4a52f0]*/ { - if (_memory_release(self) < 0) + if (self->exports == 0) { + _memory_release(self); + Py_RETURN_NONE; + } + + if (self->exports > 0) { + PyErr_Format(PyExc_BufferError, + "memoryview has %zd exported buffer%s", self->exports, + self->exports==1 ? "" : "s"); return NULL; - Py_RETURN_NONE; + } + + PyErr_SetString(PyExc_SystemError, + "memoryview: negative export count"); + return NULL; } static void @@ -1140,7 +1137,7 @@ memory_dealloc(PyMemoryViewObject *self) { assert(self->exports == 0); _PyObject_GC_UNTRACK(self); - (void)_memory_release(self); + _memory_release(self); Py_CLEAR(self->mbuf); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -1157,8 +1154,10 @@ memory_traverse(PyMemoryViewObject *self, visitproc visit, void *arg) static int memory_clear(PyMemoryViewObject *self) { - (void)_memory_release(self); - Py_CLEAR(self->mbuf); + if (self->exports == 0) { + _memory_release(self); + Py_CLEAR(self->mbuf); + } return 0; } diff --git a/contrib/tools/python3/Objects/object.c b/contrib/tools/python3/Objects/object.c index aac707d6a2..6b2e0aeaab 100644 --- a/contrib/tools/python3/Objects/object.c +++ b/contrib/tools/python3/Objects/object.c @@ -1170,7 +1170,8 @@ PyObject_SetAttr(PyObject *v, PyObject *name, PyObject *value) } Py_INCREF(name); - PyUnicode_InternInPlace(&name); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &name); if (tp->tp_setattro != NULL) { err = (*tp->tp_setattro)(v, name, value); Py_DECREF(name); diff --git a/contrib/tools/python3/Objects/odictobject.c b/contrib/tools/python3/Objects/odictobject.c index 39b0f68451..cf364c13b9 100644 --- a/contrib/tools/python3/Objects/odictobject.c +++ b/contrib/tools/python3/Objects/odictobject.c @@ -789,6 +789,7 @@ _odict_clear_nodes(PyODictObject *od) _odictnode_DEALLOC(node); node = next; } + od->od_state++; } /* There isn't any memory management of nodes past this point. */ @@ -799,24 +800,40 @@ _odict_keys_equal(PyODictObject *a, PyODictObject *b) { _ODictNode *node_a, *node_b; + // keep operands' state to detect undesired mutations + const size_t state_a = a->od_state; + const size_t state_b = b->od_state; + node_a = _odict_FIRST(a); node_b = _odict_FIRST(b); while (1) { - if (node_a == NULL && node_b == NULL) + if (node_a == NULL && node_b == NULL) { /* success: hit the end of each at the same time */ return 1; - else if (node_a == NULL || node_b == NULL) + } + else if (node_a == NULL || node_b == NULL) { /* unequal length */ return 0; + } else { - int res = PyObject_RichCompareBool( - (PyObject *)_odictnode_KEY(node_a), - (PyObject *)_odictnode_KEY(node_b), - Py_EQ); - if (res < 0) + PyObject *key_a = Py_NewRef(_odictnode_KEY(node_a)); + PyObject *key_b = Py_NewRef(_odictnode_KEY(node_b)); + int res = PyObject_RichCompareBool(key_a, key_b, Py_EQ); + Py_DECREF(key_a); + Py_DECREF(key_b); + if (res < 0) { return res; - else if (res == 0) + } + else if (a->od_state != state_a || b->od_state != state_b) { + PyErr_SetString(PyExc_RuntimeError, + "OrderedDict mutated during iteration"); + return -1; + } + else if (res == 0) { + // This check comes after the check on the state + // in order for the exception to be set correctly. return 0; + } /* otherwise it must match, so move on to the next one */ node_a = _odictnode_NEXT(node_a); diff --git a/contrib/tools/python3/Objects/typeobject.c b/contrib/tools/python3/Objects/typeobject.c index 1cac4b8b0f..017d951bdc 100644 --- a/contrib/tools/python3/Objects/typeobject.c +++ b/contrib/tools/python3/Objects/typeobject.c @@ -1080,8 +1080,10 @@ type_module(PyTypeObject *type, void *context) if (s != NULL) { mod = PyUnicode_FromStringAndSize( type->tp_name, (Py_ssize_t)(s - type->tp_name)); - if (mod != NULL) - PyUnicode_InternInPlace(&mod); + if (mod != NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &mod); + } } else { mod = Py_NewRef(&_Py_ID(builtins)); @@ -4929,7 +4931,8 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) } /* bpo-40521: Interned strings are shared by all subinterpreters */ if (!PyUnicode_CHECK_INTERNED(name)) { - PyUnicode_InternInPlace(&name); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &name); if (!PyUnicode_CHECK_INTERNED(name)) { PyErr_SetString(PyExc_MemoryError, "Out of memory interning an attribute name"); diff --git a/contrib/tools/python3/Objects/typevarobject.c b/contrib/tools/python3/Objects/typevarobject.c index db9c2191d6..4f0bc09637 100644 --- a/contrib/tools/python3/Objects/typevarobject.c +++ b/contrib/tools/python3/Objects/typevarobject.c @@ -1364,7 +1364,16 @@ typealias_alloc(PyObject *name, PyObject *type_params, PyObject *compute_value, return NULL; } ta->name = Py_NewRef(name); - ta->type_params = Py_IsNone(type_params) ? NULL : Py_XNewRef(type_params); + if ( + type_params == NULL + || Py_IsNone(type_params) + || (PyTuple_Check(type_params) && PyTuple_GET_SIZE(type_params) == 0) + ) { + ta->type_params = NULL; + } + else { + ta->type_params = Py_NewRef(type_params); + } ta->compute_value = Py_XNewRef(compute_value); ta->value = Py_XNewRef(value); ta->module = Py_XNewRef(module); diff --git a/contrib/tools/python3/Objects/unicodeobject.c b/contrib/tools/python3/Objects/unicodeobject.c index ad6defb629..3235ae8ae0 100644 --- a/contrib/tools/python3/Objects/unicodeobject.c +++ b/contrib/tools/python3/Objects/unicodeobject.c @@ -179,10 +179,7 @@ extern "C" { *_to++ = (to_type) *_iter++; \ } while (0) -#define LATIN1(ch) \ - (ch < 128 \ - ? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \ - : (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128]) +#define LATIN1 _Py_LATIN1_CHR #ifdef MS_WINDOWS /* On Windows, overallocate by 50% is the best factor */ @@ -225,18 +222,20 @@ static inline PyObject* unicode_new_empty(void) return Py_NewRef(empty); } -/* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. -*/ +/* This dictionary holds per-interpreter interned strings. + * See InternalDocs/string_interning.md for details. + */ static inline PyObject *get_interned_dict(PyInterpreterState *interp) { return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); } +/* This hashtable holds statically allocated interned strings. + * See InternalDocs/string_interning.md for details. + */ #define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings +/* Get number of all interned strings for the current interpreter. */ Py_ssize_t _PyUnicode_InternedSize(void) { @@ -244,6 +243,28 @@ _PyUnicode_InternedSize(void) return _Py_hashtable_len(INTERNED_STRINGS) + PyDict_GET_SIZE(dict); } +/* Get number of immortal interned strings for the current interpreter. */ +Py_ssize_t +_PyUnicode_InternedSize_Immortal(void) +{ + PyObject *dict = get_interned_dict(_PyInterpreterState_GET()); + PyObject *key, *value; + Py_ssize_t pos = 0; + Py_ssize_t count = 0; + + // It's tempting to keep a count and avoid a loop here. But, this function + // is intended for refleak tests. It spends extra work to report the true + // value, to help detect bugs in optimizations. + + while (PyDict_Next(dict, &pos, &key, &value)) { + assert(PyUnicode_CHECK_INTERNED(key) != SSTATE_INTERNED_IMMORTAL_STATIC); + if (PyUnicode_CHECK_INTERNED(key) == SSTATE_INTERNED_IMMORTAL) { + count++; + } + } + return _Py_hashtable_len(INTERNED_STRINGS) + count; +} + static Py_hash_t unicode_hash(PyObject *); static int unicode_compare_eq(PyObject *, PyObject *); @@ -269,20 +290,6 @@ hashtable_unicode_compare(const void *key1, const void *key2) static int init_interned_dict(PyInterpreterState *interp) { - if (_Py_IsMainInterpreter(interp)) { - assert(INTERNED_STRINGS == NULL); - _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree}; - INTERNED_STRINGS = _Py_hashtable_new_full( - hashtable_unicode_hash, - hashtable_unicode_compare, - NULL, - NULL, - &hashtable_alloc - ); - if (INTERNED_STRINGS == NULL) { - return -1; - } - } assert(get_interned_dict(interp) == NULL); PyObject *interned = interned = PyDict_New(); if (interned == NULL) { @@ -301,7 +308,55 @@ clear_interned_dict(PyInterpreterState *interp) Py_DECREF(interned); _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; } - if (_Py_IsMainInterpreter(interp) && INTERNED_STRINGS != NULL) { +} + +static PyStatus +init_global_interned_strings(PyInterpreterState *interp) +{ + assert(INTERNED_STRINGS == NULL); + _Py_hashtable_allocator_t hashtable_alloc = {PyMem_RawMalloc, PyMem_RawFree}; + + INTERNED_STRINGS = _Py_hashtable_new_full( + hashtable_unicode_hash, + hashtable_unicode_compare, + // Objects stored here are immortal and statically allocated, + // so we don't need key_destroy_func & value_destroy_func: + NULL, + NULL, + &hashtable_alloc + ); + if (INTERNED_STRINGS == NULL) { + PyErr_Clear(); + return _PyStatus_ERR("failed to create global interned dict"); + } + + /* Intern statically allocated string identifiers, deepfreeze strings, + * and one-byte latin-1 strings. + * This must be done before any module initialization so that statically + * allocated string identifiers are used instead of heap allocated strings. + * Deepfreeze uses the interned identifiers if present to save space + * else generates them and they are interned to speed up dict lookups. + */ + _PyUnicode_InitStaticStrings(interp); + + for (int i = 0; i < 256; i++) { + PyObject *s = LATIN1(i); + _PyUnicode_InternStatic(interp, &s); + assert(s == LATIN1(i)); + } +#ifdef Py_DEBUG + assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); + + for (int i = 0; i < 256; i++) { + assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); + } +#endif + return _PyStatus_OK(); +} + +static void clear_global_interned_strings(void) +{ + if (INTERNED_STRINGS != NULL) { _Py_hashtable_destroy(INTERNED_STRINGS); INTERNED_STRINGS = NULL; } @@ -634,6 +689,41 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) } CHECK(PyUnicode_READ(kind, data, ascii->length) == 0); } + + /* Check interning state */ +#ifdef Py_DEBUG + // Note that we do not check `_Py_IsImmortal(op)`, since stable ABI + // extensions can make immortal strings mortal (but with a high enough + // refcount). + // The other way is extremely unlikely (worth a potential failed assertion + // in a debug build), so we do check `!_Py_IsImmortal(op)`. + switch (PyUnicode_CHECK_INTERNED(op)) { + case SSTATE_NOT_INTERNED: + if (ascii->state.statically_allocated) { + // This state is for two exceptions: + // - strings are currently checked before they're interned + // - the 256 one-latin1-character strings + // are static but use SSTATE_NOT_INTERNED + } + else { + CHECK(!_Py_IsImmortal(op)); + } + break; + case SSTATE_INTERNED_MORTAL: + CHECK(!ascii->state.statically_allocated); + CHECK(!_Py_IsImmortal(op)); + break; + case SSTATE_INTERNED_IMMORTAL: + CHECK(!ascii->state.statically_allocated); + break; + case SSTATE_INTERNED_IMMORTAL_STATIC: + CHECK(ascii->state.statically_allocated); + break; + default: + Py_UNREACHABLE(); + } +#endif + return 1; #undef CHECK @@ -1592,16 +1682,65 @@ unicode_dealloc(PyObject *unicode) _Py_FatalRefcountError("deallocating an Unicode singleton"); } #endif - /* This should never get called, but we also don't want to SEGV if - * we accidentally decref an immortal string out of existence. Since - * the string is an immortal object, just re-set the reference count. - */ - if (PyUnicode_CHECK_INTERNED(unicode) - || _PyUnicode_STATE(unicode).statically_allocated) - { + if (_PyUnicode_STATE(unicode).statically_allocated) { + /* This should never get called, but we also don't want to SEGV if + * we accidentally decref an immortal string out of existence. Since + * the string is an immortal object, just re-set the reference count. + */ +#ifdef Py_DEBUG + Py_UNREACHABLE(); +#endif _Py_SetImmortal(unicode); return; } + switch (_PyUnicode_STATE(unicode).interned) { + case SSTATE_NOT_INTERNED: + break; + case SSTATE_INTERNED_MORTAL: + /* Remove the object from the intern dict. + * Before doing so, we set the refcount to 3: the key and value + * in the interned_dict, plus one to work with. + */ + assert(Py_REFCNT(unicode) == 0); + Py_SET_REFCNT(unicode, 3); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyObject *interned = get_interned_dict(interp); + assert(interned != NULL); + int r = PyDict_DelItem(interned, unicode); + if (r == -1) { + PyErr_WriteUnraisable(unicode); + // We don't know what happened to the string. It's probably + // best to leak it: + // - if it was not found, something is very wrong + // - if it was deleted, there are no more references to it + // so it can't cause trouble (except wasted memory) + // - if it wasn't deleted, it'll remain interned + _Py_SetImmortal(unicode); + _PyUnicode_STATE(unicode).interned = SSTATE_INTERNED_IMMORTAL; + return; + } + // Only our work reference should be left; remove it too. + assert(Py_REFCNT(unicode) == 1); + Py_SET_REFCNT(unicode, 0); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_DecRefTotal(_PyInterpreterState_GET()); +#endif + break; + default: + // As with `statically_allocated` above. +#ifdef Py_REF_DEBUG + Py_UNREACHABLE(); +#endif + _Py_SetImmortal(unicode); + return; + } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyObject_Free(_PyUnicode_UTF8(unicode)); } @@ -1763,7 +1902,8 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index, static PyObject* get_latin1_char(Py_UCS1 ch) { - return Py_NewRef(LATIN1(ch)); + PyObject *o = LATIN1(ch); + return o; } static PyObject* @@ -1948,7 +2088,7 @@ _PyUnicode_FromId(_Py_Identifier *id) if (!obj) { return NULL; } - PyUnicode_InternInPlace(&obj); + _PyUnicode_InternImmortal(interp, &obj); if (index >= ids->size) { // Overallocate to reduce the number of realloc @@ -10733,8 +10873,10 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (left == right_uni) return 1; - if (PyUnicode_CHECK_INTERNED(left)) + assert(PyUnicode_CHECK_INTERNED(right_uni)); + if (PyUnicode_CHECK_INTERNED(left)) { return 0; + } assert(_PyUnicode_HASH(right_uni) != -1); Py_hash_t hash = _PyUnicode_HASH(left); @@ -14709,28 +14851,26 @@ _PyUnicode_InitState(PyInterpreterState *interp) PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) { - // Initialize the global interned dict - if (init_interned_dict(interp)) { - PyErr_Clear(); - return _PyStatus_ERR("failed to create interned dict"); + if (_Py_IsMainInterpreter(interp)) { + PyStatus status = init_global_interned_strings(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } } + assert(INTERNED_STRINGS); - if (_Py_IsMainInterpreter(interp)) { - /* Intern statically allocated string identifiers and deepfreeze strings. - * This must be done before any module initialization so that statically - * allocated string identifiers are used instead of heap allocated strings. - * Deepfreeze uses the interned identifiers if present to save space - * else generates them and they are interned to speed up dict lookups. - */ - _PyUnicode_InitStaticStrings(interp); + return _PyStatus_OK(); +} -#ifdef Py_DEBUG - assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); - for (int i = 0; i < 256; i++) { - assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); - } -#endif +PyStatus +_PyUnicode_InitInternDict(PyInterpreterState *interp) +{ + assert(INTERNED_STRINGS); + + if (init_interned_dict(interp)) { + PyErr_Clear(); + return _PyStatus_ERR("failed to create interned dict"); } return _PyStatus_OK(); @@ -14755,104 +14895,243 @@ error: return _PyStatus_ERR("Can't initialize unicode types"); } +static /* non-null */ PyObject* +intern_static(PyInterpreterState *interp, PyObject *s /* stolen */) +{ + // Note that this steals a reference to `s`, but in many cases that + // stolen ref is returned, requiring no decref/incref. + + assert(s != NULL); + assert(_PyUnicode_CHECK(s)); + assert(_PyUnicode_STATE(s).statically_allocated); + assert(!PyUnicode_CHECK_INTERNED(s)); + +#ifdef Py_DEBUG + /* We must not add process-global interned string if there's already a + * per-interpreter interned_dict, which might contain duplicates. + */ + PyObject *interned = get_interned_dict(interp); + // assert(interned == NULL); +#endif + + /* Look in the global cache first. */ + PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); + /* We should only init each string once */ + assert(r == NULL); + /* but just in case (for the non-debug build), handle this */ + if (r != NULL && r != s) { + assert(_PyUnicode_STATE(r).interned == SSTATE_INTERNED_IMMORTAL_STATIC); + assert(_PyUnicode_CHECK(r)); + Py_DECREF(s); + return Py_NewRef(r); + } + + if (_Py_hashtable_set(INTERNED_STRINGS, s, s) < -1) { + Py_FatalError("failed to intern static string"); + } + + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL_STATIC; + return s; +} void -_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) +_PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) +{ + // This should only be called as part of runtime initialization + assert(!Py_IsInitialized()); + + *p = intern_static(interp, *p); + assert(*p); +} + +static void +immortalize_interned(PyObject *s) +{ + assert(PyUnicode_CHECK_INTERNED(s) == SSTATE_INTERNED_MORTAL); + assert(!_Py_IsImmortal(s)); +#ifdef Py_REF_DEBUG + /* The reference count value should be excluded from the RefTotal. + The decrements to these objects will not be registered so they + need to be accounted for in here. */ + for (Py_ssize_t i = 0; i < Py_REFCNT(s); i++) { + _Py_DecRefTotal(_PyInterpreterState_GET()); + } +#endif + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; + _Py_SetImmortal(s); +} + +static /* non-null */ PyObject* +intern_common(PyInterpreterState *interp, PyObject *s /* stolen */, + bool immortalize) { - PyObject *s = *p; + // Note that this steals a reference to `s`, but in many cases that + // stolen ref is returned, requiring no decref/incref. + #ifdef Py_DEBUG assert(s != NULL); assert(_PyUnicode_CHECK(s)); #else if (s == NULL || !PyUnicode_Check(s)) { - return; + return s; } #endif /* If it's a subclass, we don't really know what putting it in the interned dict might do. */ if (!PyUnicode_CheckExact(s)) { - return; + return s; } - if (PyUnicode_CHECK_INTERNED(s)) { - return; + /* Is it already interned? */ + switch (PyUnicode_CHECK_INTERNED(s)) { + case SSTATE_NOT_INTERNED: + // no, go on + break; + case SSTATE_INTERNED_MORTAL: + // yes but we might need to make it immortal + if (immortalize) { + immortalize_interned(s); + } + return s; + default: + // all done + return s; } - /* Look in the global cache first. */ - PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); - if (r != NULL && r != s) { - Py_SETREF(*p, Py_NewRef(r)); - return; + if (_PyUnicode_STATE(s).statically_allocated) { + return intern_static(interp, s); } - /* Handle statically allocated strings. */ - if (_PyUnicode_STATE(s).statically_allocated) { - assert(_Py_IsImmortal(s)); - if (_Py_hashtable_set(INTERNED_STRINGS, s, s) == 0) { - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; + /* If it's already immortal, intern it as such */ + if (_Py_IsImmortal(s)) { + immortalize = 1; + } + + /* if it's a short string, get the singleton */ + if (PyUnicode_GET_LENGTH(s) == 1 && + PyUnicode_KIND(s) == PyUnicode_1BYTE_KIND) { + PyObject *r = LATIN1(*(unsigned char*)PyUnicode_DATA(s)); + assert(PyUnicode_CHECK_INTERNED(r)); + Py_DECREF(s); + return r; + } +#ifdef Py_DEBUG + assert(!unicode_is_singleton(s)); +#endif + + /* Look in the global cache now. */ + { + PyObject *r = (PyObject *)_Py_hashtable_get(INTERNED_STRINGS, s); + if (r != NULL) { + assert(_PyUnicode_STATE(r).statically_allocated); + assert(r != s); // r must be statically_allocated; s is not + Py_DECREF(s); + return Py_NewRef(r); } - return; } - /* Look in the per-interpreter cache. */ + /* Do a setdefault on the per-interpreter cache. */ PyObject *interned = get_interned_dict(interp); assert(interned != NULL); - PyObject *t = PyDict_SetDefault(interned, s, s); + PyObject *t = PyDict_SetDefault(interned, s, s); // t is borrowed if (t == NULL) { PyErr_Clear(); - return; + return s; } if (t != s) { - Py_SETREF(*p, Py_NewRef(t)); - return; + // value was already present (not inserted) + Py_INCREF(t); + Py_DECREF(s); + if (immortalize && + PyUnicode_CHECK_INTERNED(t) == SSTATE_INTERNED_MORTAL) { + immortalize_interned(t); + } + return t; } - - if (_Py_IsImmortal(s)) { - // XXX Restrict this to the main interpreter? - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL_STATIC; - return; + else { + // value was newly inserted } + /* NOT_INTERNED -> INTERNED_MORTAL */ + + assert(_PyUnicode_STATE(s).interned == SSTATE_NOT_INTERNED); + + if (!_Py_IsImmortal(s)) { + /* The two references in interned dict (key and value) are not counted. + unicode_dealloc() and _PyUnicode_ClearInterned() take care of this. */ + Py_SET_REFCNT(s, Py_REFCNT(s) - 2); #ifdef Py_REF_DEBUG - /* The reference count value excluding the 2 references from the - interned dictionary should be excluded from the RefTotal. The - decrements to these objects will not be registered so they - need to be accounted for in here. */ - for (Py_ssize_t i = 0; i < Py_REFCNT(s) - 2; i++) { + /* let's be pedantic with the ref total */ + _Py_DecRefTotal(_PyInterpreterState_GET()); _Py_DecRefTotal(_PyInterpreterState_GET()); +#endif + } + _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; + + /* INTERNED_MORTAL -> INTERNED_IMMORTAL (if needed) */ + +#ifdef Py_DEBUG + if (_Py_IsImmortal(s)) { + assert(immortalize); } #endif - _Py_SetImmortal(s); - _PyUnicode_STATE(*p).interned = SSTATE_INTERNED_IMMORTAL; + if (immortalize) { + immortalize_interned(s); + } + + return s; +} + +void +_PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **p) +{ + *p = intern_common(interp, *p, 1); + assert(*p); +} + +void +_PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **p) +{ + *p = intern_common(interp, *p, 0); + assert(*p); +} + + +void +_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) +{ + _PyUnicode_InternImmortal(interp, p); + return; } void PyUnicode_InternInPlace(PyObject **p) { PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyUnicode_InternInPlace(interp, p); + _PyUnicode_InternMortal(interp, p); } -// Function kept for the stable ABI. +// Public-looking name kept for the stable ABI; user should not call this: PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); void PyUnicode_InternImmortal(PyObject **p) { - PyUnicode_InternInPlace(p); - // Leak a reference on purpose - Py_INCREF(*p); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, p); } PyObject * PyUnicode_InternFromString(const char *cp) { PyObject *s = PyUnicode_FromString(cp); - if (s == NULL) + if (s == NULL) { return NULL; - PyUnicode_InternInPlace(&s); + } + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternMortal(interp, &s); return s; } @@ -14866,20 +15145,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } assert(PyDict_CheckExact(interned)); - /* TODO: - * Currently, the runtime is not able to guarantee that it can exit without - * allocations that carry over to a future initialization of Python within - * the same process. i.e: - * ./python -X showrefcount -c 'import itertools' - * [237 refs, 237 blocks] - * - * Therefore, this should remain disabled for until there is a strict guarantee - * that no memory will be left after `Py_Finalize`. - */ -#ifdef Py_DEBUG - /* For all non-singleton interned strings, restore the two valid references - to that instance from within the intern string dictionary and let the - normal reference counting process clean up these instances. */ #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", PyDict_GET_SIZE(interned)); @@ -14893,13 +15158,32 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) int shared = 0; switch (PyUnicode_CHECK_INTERNED(s)) { case SSTATE_INTERNED_IMMORTAL: + /* Make immortal interned strings mortal again. + * + * Currently, the runtime is not able to guarantee that it can exit + * without allocations that carry over to a future initialization + * of Python within the same process. i.e: + * ./python -X showrefcount -c 'import itertools' + * [237 refs, 237 blocks] + * + * This should remain disabled (`Py_DEBUG` only) until there is a + * strict guarantee that no memory will be left after + * `Py_Finalize`. + */ +#ifdef Py_DEBUG // Skip the Immortal Instance check and restore // the two references (key and value) ignored // by PyUnicode_InternInPlace(). s->ob_refcnt = 2; +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif #ifdef INTERNED_STATS total_length += PyUnicode_GET_LENGTH(s); #endif +#endif // Py_DEBUG break; case SSTATE_INTERNED_IMMORTAL_STATIC: /* It is shared between interpreters, so we should unmark it @@ -14912,7 +15196,15 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) } break; case SSTATE_INTERNED_MORTAL: - /* fall through */ + // Restore 2 references held by the interned dict; these will + // be decref'd by clear_interned_dict's PyDict_Clear. + Py_SET_REFCNT(s, Py_REFCNT(s) + 2); +#ifdef Py_REF_DEBUG + /* let's be pedantic with the ref total */ + _Py_IncRefTotal(_PyInterpreterState_GET()); + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + break; case SSTATE_NOT_INTERNED: /* fall through */ default: @@ -14933,8 +15225,10 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) for (Py_ssize_t i=0; i < ids->size; i++) { Py_XINCREF(ids->array[i]); } -#endif /* Py_DEBUG */ clear_interned_dict(interp); + if (_Py_IsMainInterpreter(interp)) { + clear_global_interned_strings(); + } } |