diff options
Diffstat (limited to 'contrib/tools/python3/Python/pystate.c')
| -rw-r--r-- | contrib/tools/python3/Python/pystate.c | 1969 |
1 files changed, 1005 insertions, 964 deletions
diff --git a/contrib/tools/python3/Python/pystate.c b/contrib/tools/python3/Python/pystate.c index c829f644318..2b1bff7f108 100644 --- a/contrib/tools/python3/Python/pystate.c +++ b/contrib/tools/python3/Python/pystate.c @@ -2,18 +2,24 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" +#include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" #include "pycore_code.h" // stats +#include "pycore_critical_section.h" // _PyCriticalSection_Resume() #include "pycore_dtoa.h" // _dtoa_state_INIT() +#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_frame.h" -#include "pycore_initconfig.h" +#include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyType_InitCache() -#include "pycore_pyerrors.h" -#include "pycore_pylifecycle.h" +#include "pycore_object_stack.h" // _PyObjectStackChunk_ClearFreeList() +#include "pycore_parking_lot.h" // _PyParkingLot_AfterFork() +#include "pycore_pyerrors.h" // _PyErr_Clear() +#include "pycore_pylifecycle.h" // _PyAST_Fini() #include "pycore_pymem.h" // _PyMem_SetDefaultAllocator() #include "pycore_pystate.h" #include "pycore_runtime_init.h" // _PyRuntimeState_INIT -#include "pycore_sysmodule.h" +#include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() /* -------------------------------------------------------------------------- CAUTION @@ -26,16 +32,12 @@ to avoid the expense of doing their own locking). -------------------------------------------------------------------------- */ #ifdef HAVE_DLOPEN -#ifdef HAVE_DLFCN_H -#include <dlfcn.h> -#endif -#if !HAVE_DECL_RTLD_LAZY -#define RTLD_LAZY 1 -#endif -#endif - -#ifdef __cplusplus -extern "C" { +# ifdef HAVE_DLFCN_H +# include <dlfcn.h> +# endif +# if !HAVE_DECL_RTLD_LAZY +# define RTLD_LAZY 1 +# endif #endif @@ -66,7 +68,7 @@ _Py_thread_local PyThreadState *_Py_tss_tstate = NULL; #endif static inline PyThreadState * -current_fast_get(_PyRuntimeState *Py_UNUSED(runtime)) +current_fast_get(void) { #ifdef HAVE_THREAD_LOCAL return _Py_tss_tstate; @@ -100,14 +102,14 @@ current_fast_clear(_PyRuntimeState *Py_UNUSED(runtime)) } #define tstate_verify_not_active(tstate) \ - if (tstate == current_fast_get((tstate)->interp->runtime)) { \ + if (tstate == current_fast_get()) { \ _Py_FatalErrorFormat(__func__, "tstate %p is still current", tstate); \ } PyThreadState * _PyThreadState_GetCurrent(void) { - return current_fast_get(&_PyRuntime); + return current_fast_get(); } @@ -237,6 +239,8 @@ tstate_is_bound(PyThreadState *tstate) static void bind_gilstate_tstate(PyThreadState *); static void unbind_gilstate_tstate(PyThreadState *); +static void tstate_mimalloc_bind(PyThreadState *); + static void bind_tstate(PyThreadState *tstate) { @@ -257,6 +261,15 @@ bind_tstate(PyThreadState *tstate) tstate->native_thread_id = PyThread_get_thread_native_id(); #endif +#ifdef Py_GIL_DISABLED + // Initialize biased reference counting inter-thread queue. Note that this + // needs to be initialized from the active thread. + _Py_brc_init_thread(tstate); +#endif + + // mimalloc state needs to be initialized from the active thread. + tstate_mimalloc_bind(tstate); + tstate->_status.bound = 1; } @@ -359,10 +372,9 @@ holds_gil(PyThreadState *tstate) return PyGILState_Check(); } #endif - _PyRuntimeState *runtime = tstate->interp->runtime; /* Must be the tstate for this thread */ - assert(tstate == gilstate_tss_get(runtime)); - return tstate == current_fast_get(runtime); + assert(tstate == gilstate_tss_get(tstate->interp->runtime)); + return tstate == current_fast_get(); } @@ -382,61 +394,34 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS Note that we initialize "initial" relative to _PyRuntime, to ensure pre-initialized pointers point to the active runtime state (and not "initial"). */ -static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime); +static const _PyRuntimeState initial = _PyRuntimeState_INIT(_PyRuntime, ""); _Py_COMP_DIAG_POP -#define NUMLOCKS 9 #define LOCKS_INIT(runtime) \ { \ &(runtime)->interpreters.mutex, \ - &(runtime)->xidregistry.mutex, \ - &(runtime)->getargs.mutex, \ - &(runtime)->unicode_state.ids.lock, \ + &(runtime)->xi.registry.mutex, \ + &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ - &(runtime)->ceval.pending_mainthread.lock, \ + &(runtime)->ceval.pending_mainthread.mutex, \ &(runtime)->atexit.mutex, \ &(runtime)->audit_hooks.mutex, \ &(runtime)->allocators.mutex, \ + &(runtime)->_main_interpreter.types.mutex, \ + &(runtime)->_main_interpreter.code_state.mutex, \ } -static int -alloc_for_runtime(PyThread_type_lock locks[NUMLOCKS]) -{ - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - for (int i = 0; i < NUMLOCKS; i++) { - PyThread_type_lock lock = PyThread_allocate_lock(); - if (lock == NULL) { - for (int j = 0; j < i; j++) { - PyThread_free_lock(locks[j]); - locks[j] = NULL; - } - break; - } - locks[i] = lock; - } - - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - return 0; -} - static void init_runtime(_PyRuntimeState *runtime, void *open_code_hook, void *open_code_userdata, _Py_AuditHookEntry *audit_hook_head, - Py_ssize_t unicode_next_index, - PyThread_type_lock locks[NUMLOCKS]) + Py_ssize_t unicode_next_index) { - if (runtime->_initialized) { - Py_FatalError("runtime already initialized"); - } - assert(!runtime->preinitializing && - !runtime->preinitialized && - !runtime->core_initialized && - !runtime->initialized); + assert(!runtime->preinitializing); + assert(!runtime->preinitialized); + assert(!runtime->core_initialized); + assert(!runtime->initialized); + assert(!runtime->_initialized); runtime->open_code_hook = open_code_hook; runtime->open_code_userdata = open_code_userdata; @@ -444,17 +429,15 @@ init_runtime(_PyRuntimeState *runtime, PyPreConfig_InitPythonConfig(&runtime->preconfig); - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - for (int i = 0; i < NUMLOCKS; i++) { - assert(locks[i] != NULL); - *lockptrs[i] = locks[i]; - } - // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); runtime->unicode_state.ids.next_index = unicode_next_index; +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + _Py_EmscriptenTrampoline_Init(runtime); +#endif + runtime->_initialized = 1; } @@ -471,15 +454,13 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) // is called multiple times. Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index; - PyThread_type_lock locks[NUMLOCKS]; - if (alloc_for_runtime(locks) != 0) { - return _PyStatus_NO_MEMORY(); - } - if (runtime->_initialized) { // Py_Initialize() must be running again. // Reset to _PyRuntimeState_INIT. memcpy(runtime, &initial, sizeof(*runtime)); + // Preserve the cookie from the original runtime. + memcpy(runtime->debug_offsets.cookie, _Py_Debug_Cookie, 8); + assert(!runtime->_initialized); } if (gilstate_tss_init(runtime) != 0) { @@ -493,13 +474,11 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) } init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head, - unicode_next_index, locks); + unicode_next_index); return _PyStatus_OK(); } -static void _xidregistry_clear(struct _xidregistry *); - void _PyRuntimeState_Fini(_PyRuntimeState *runtime) { @@ -508,8 +487,6 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) assert(runtime->object_state.interpreter_leaks == 0); #endif - _xidregistry_clear(&runtime->xidregistry); - if (gilstate_tss_initialized(runtime)) { gilstate_tss_fini(runtime); } @@ -517,27 +494,6 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) if (PyThread_tss_is_created(&runtime->trashTSSkey)) { PyThread_tss_delete(&runtime->trashTSSkey); } - - /* Force the allocator used by _PyRuntimeState_Init(). */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); -#define FREE_LOCK(LOCK) \ - if (LOCK != NULL) { \ - PyThread_free_lock(LOCK); \ - LOCK = NULL; \ - } - - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - for (int i = 0; i < NUMLOCKS; i++) { - FREE_LOCK(*lockptrs[i]); - } - -#undef FREE_LOCK - if (runtime->sys_path_0 != NULL) { - PyMem_RawFree(runtime->sys_path_0); - runtime->sys_path_0 = NULL; - } - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); } #ifdef HAVE_FORK @@ -549,29 +505,30 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) // This was initially set in _PyRuntimeState_Init(). runtime->main_thread = PyThread_get_thread_ident(); - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); + // Clears the parking lot. Any waiting threads are dead. This must be + // called before releasing any locks that use the parking lot. + _PyParkingLot_AfterFork(); - PyThread_type_lock *lockptrs[NUMLOCKS] = LOCKS_INIT(runtime); - int reinit_err = 0; - for (int i = 0; i < NUMLOCKS; i++) { - reinit_err += _PyThread_at_fork_reinit(lockptrs[i]); + // Re-initialize global locks + PyMutex *locks[] = LOCKS_INIT(runtime); + for (size_t i = 0; i < Py_ARRAY_LENGTH(locks); i++) { + _PyMutex_at_fork_reinit(locks[i]); } - /* PyOS_AfterFork_Child(), which calls this function, later calls - _PyInterpreterState_DeleteExceptMain(), so we only need to update - the main interpreter here. */ - assert(runtime->interpreters.main != NULL); - runtime->interpreters.main->xidregistry.mutex = runtime->xidregistry.mutex; +#ifdef Py_GIL_DISABLED + for (PyInterpreterState *interp = runtime->interpreters.head; + interp != NULL; interp = interp->next) + { + for (int i = 0; i < NUM_WEAKREF_LIST_LOCKS; i++) { + _PyMutex_at_fork_reinit(&interp->weakref_locks[i]); + } + } +#endif - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); + _PyTypes_AfterFork(); /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does * not force the default allocator. */ - reinit_err += _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex); - - if (reinit_err < 0) { + if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { return _PyStatus_ERR("Failed to reinitialize runtime locks"); } @@ -587,6 +544,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) return _PyStatus_NO_MEMORY(); } + _PyThread_AfterFork(&runtime->threads); + return _PyStatus_OK(); } #endif @@ -607,24 +566,6 @@ _PyInterpreterState_Enable(_PyRuntimeState *runtime) { struct pyinterpreters *interpreters = &runtime->interpreters; interpreters->next_id = 0; - - /* Py_Finalize() calls _PyRuntimeState_Fini() which clears the mutex. - Create a new mutex if needed. */ - if (interpreters->mutex == NULL) { - /* Force default allocator, since _PyRuntimeState_Fini() must - use the same allocator than this function. */ - PyMemAllocatorEx old_alloc; - _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - interpreters->mutex = PyThread_allocate_lock(); - - PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - if (interpreters->mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for interpreter"); - } - } - return _PyStatus_OK(); } @@ -641,10 +582,17 @@ free_interpreter(PyInterpreterState *interp) // The main interpreter is statically allocated so // should not be freed. if (interp != &_PyRuntime._main_interpreter) { + if (_PyMem_obmalloc_state_on_heap(interp)) { + // interpreter has its own obmalloc state, free it + PyMem_RawFree(interp->obmalloc); + interp->obmalloc = NULL; + } PyMem_RawFree(interp); } } - +#ifndef NDEBUG +static inline int check_interpreter_whence(long); +#endif /* Get the interpreter state to a minimal consistent state. Further init happens in pylifecycle.c before it can be used. All fields not initialized here are expected to be zeroed out, @@ -664,16 +612,20 @@ free_interpreter(PyInterpreterState *interp) main interpreter. We fix those fields here, in addition to the other dynamically initialized fields. */ -static void +static PyStatus init_interpreter(PyInterpreterState *interp, _PyRuntimeState *runtime, int64_t id, PyInterpreterState *next, - PyThread_type_lock pending_lock) + long whence) { if (interp->_initialized) { - Py_FatalError("interpreter already initialized"); + return _PyStatus_ERR("interpreter already initialized"); } + assert(interp->_whence == _PyInterpreterState_WHENCE_NOTSET); + assert(check_interpreter_whence(whence) == 0); + interp->_whence = whence; + assert(runtime != NULL); interp->runtime = runtime; @@ -684,21 +636,19 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - /* Initialize obmalloc, but only for subinterpreters, - since the main interpreter is initialized statically. */ - if (interp != &runtime->_main_interpreter) { - poolp temp[OBMALLOC_USED_POOLS_SIZE] = \ - _obmalloc_pools_INIT(interp->obmalloc.pools); - memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); - } + interp->threads_preallocated = &interp->_initial_thread; // We would call _PyObject_InitState() at this point // if interp->feature_flags were alredy set. - _PyEval_InitState(interp, pending_lock); + _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); PyConfig_InitPythonConfig(&interp->config); _PyType_InitCache(interp); +#ifdef Py_GIL_DISABLED + _Py_brc_init_state(interp); +#endif + llist_init(&interp->mem_free_queue.head); for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; } @@ -708,54 +658,49 @@ init_interpreter(PyInterpreterState *interp, } } - interp->sys_profile_initialized = false; - interp->sys_trace_initialized = false; +#ifdef _Py_TIER2 + (void)_Py_SetOptimizer(interp, NULL); + interp->executor_list_head = NULL; +#endif if (interp != &runtime->_main_interpreter) { /* Fix the self-referential, statically initialized fields. */ interp->dtoa = (struct _dtoa_state)_dtoa_state_INIT(interp); } - interp->f_opcode_trace_set = false; - - assert(runtime->xidregistry.mutex != NULL); - interp->xidregistry.mutex = runtime->xidregistry.mutex; interp->_initialized = 1; + return _PyStatus_OK(); } -PyInterpreterState * -PyInterpreterState_New(void) + +PyStatus +_PyInterpreterState_New(PyThreadState *tstate, PyInterpreterState **pinterp) { - PyInterpreterState *interp; - _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); + *pinterp = NULL; - /* tstate is NULL when Py_InitializeFromConfig() calls - PyInterpreterState_New() to create the main interpreter. */ - if (_PySys_Audit(tstate, "cpython.PyInterpreterState_New", NULL) < 0) { - return NULL; - } + // Don't get runtime from tstate since tstate can be NULL + _PyRuntimeState *runtime = &_PyRuntime; - PyThread_type_lock pending_lock = PyThread_allocate_lock(); - if (pending_lock == NULL) { - if (tstate != NULL) { - _PyErr_NoMemory(tstate); + // tstate is NULL when pycore_create_interpreter() calls + // _PyInterpreterState_New() to create the main interpreter. + if (tstate != NULL) { + if (_PySys_Audit(tstate, "cpython.PyInterpreterState_New", NULL) < 0) { + return _PyStatus_ERR("sys.audit failed"); } - return NULL; } - /* Don't get runtime from tstate since tstate can be NULL. */ - struct pyinterpreters *interpreters = &runtime->interpreters; - /* We completely serialize creation of multiple interpreters, since it simplifies things here and blocking concurrent calls isn't a problem. Regardless, we must fully block subinterpreter creation until after the main interpreter is created. */ HEAD_LOCK(runtime); + struct pyinterpreters *interpreters = &runtime->interpreters; int64_t id = interpreters->next_id; interpreters->next_id += 1; // Allocate the interpreter and add it to the runtime state. + PyInterpreterState *interp; + PyStatus status; PyInterpreterState *old_head = interpreters->head; if (old_head == NULL) { // We are creating the main interpreter. @@ -774,39 +719,58 @@ PyInterpreterState_New(void) interp = alloc_interpreter(); if (interp == NULL) { + status = _PyStatus_NO_MEMORY(); goto error; } // Set to _PyInterpreterState_INIT. - memcpy(interp, &initial._main_interpreter, - sizeof(*interp)); + memcpy(interp, &initial._main_interpreter, sizeof(*interp)); if (id < 0) { /* overflow or Py_Initialize() not called yet! */ - if (tstate != NULL) { - _PyErr_SetString(tstate, PyExc_RuntimeError, - "failed to get an interpreter ID"); - } + status = _PyStatus_ERR("failed to get an interpreter ID"); goto error; } } interpreters->head = interp; - init_interpreter(interp, runtime, id, old_head, pending_lock); + long whence = _PyInterpreterState_WHENCE_UNKNOWN; + status = init_interpreter(interp, runtime, + id, old_head, whence); + if (_PyStatus_EXCEPTION(status)) { + goto error; + } HEAD_UNLOCK(runtime); - return interp; + + assert(interp != NULL); + *pinterp = interp; + return _PyStatus_OK(); error: HEAD_UNLOCK(runtime); - PyThread_free_lock(pending_lock); if (interp != NULL) { free_interpreter(interp); } - return NULL; + return status; } +PyInterpreterState * +PyInterpreterState_New(void) +{ + // tstate can be NULL + PyThreadState *tstate = current_fast_get(); + + PyInterpreterState *interp; + PyStatus status = _PyInterpreterState_New(tstate, &interp); + if (_PyStatus_EXCEPTION(status)) { + Py_ExitStatusException(status); + } + assert(interp != NULL); + return interp; +} + static void interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) { @@ -848,6 +812,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) tstate->_status.cleared = 0; } +#ifdef _Py_TIER2 + _PyOptimizerObject *old = _Py_SetOptimizer(interp, NULL); + assert(old != NULL); + Py_DECREF(old); +#endif + /* It is possible that any of the objects below have a finalizer that runs Python code or otherwise relies on a thread state or even the interpreter state. For now we trust that isn't @@ -857,6 +827,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->audit_hooks); + // At this time, all the threads should be cleared so we don't need atomic + // operations for instrumentation_version or eval_breaker. + interp->ceval.instrumentation_version = 0; + tstate->eval_breaker = 0; + for (int i = 0; i < _PY_MONITORING_UNGROUPED_EVENTS; i++) { interp->monitors.tools[i] = 0; } @@ -865,16 +840,12 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) Py_CLEAR(interp->monitoring_callables[t][e]); } } - interp->sys_profile_initialized = false; - interp->sys_trace_initialized = false; for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) { Py_CLEAR(interp->monitoring_tool_names[t]); } PyConfig_Clear(&interp->config); - Py_CLEAR(interp->codec_search_path); - Py_CLEAR(interp->codec_search_cache); - Py_CLEAR(interp->codec_error_registry); + _PyCodec_Fini(interp); assert(interp->imports.modules == NULL); assert(interp->imports.modules_by_index == NULL); @@ -909,11 +880,6 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) PyDict_Clear(interp->builtins); Py_CLEAR(interp->sysdict); Py_CLEAR(interp->builtins); - Py_CLEAR(interp->interpreter_trampoline); - - _xidregistry_clear(&interp->xidregistry); - /* The lock is owned by the runtime, so we don't free it here. */ - interp->xidregistry.mutex = NULL; if (tstate->interp == interp) { /* We are now safe to fix tstate->_status.cleared. */ @@ -938,10 +904,11 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) interp->code_watchers[i] = NULL; } interp->active_code_watchers = 0; - interp->f_opcode_trace_set = false; // XXX Once we have one allocator per interpreter (i.e. // per-interpreter GC) we must ensure that all of the interpreter's // objects have been cleaned up at the point. + + // If we had a freelist of thread states, we would clear it here. } @@ -951,7 +918,7 @@ PyInterpreterState_Clear(PyInterpreterState *interp) // Use the current Python thread state to call audit hooks and to collect // garbage. It can be different than the current Python thread state // of 'interp'. - PyThreadState *current_tstate = current_fast_get(interp->runtime); + PyThreadState *current_tstate = current_fast_get(); _PyImport_ClearCore(interp); interpreter_clear(interp, current_tstate); } @@ -966,6 +933,7 @@ _PyInterpreterState_Clear(PyThreadState *tstate) static inline void tstate_deactivate(PyThreadState *tstate); +static void tstate_set_detached(PyThreadState *tstate, int detached_state); static void zapthreads(PyInterpreterState *interp); void @@ -976,18 +944,14 @@ PyInterpreterState_Delete(PyInterpreterState *interp) // XXX Clearing the "current" thread state should happen before // we start finalizing the interpreter (or the current thread state). - PyThreadState *tcur = current_fast_get(runtime); + PyThreadState *tcur = current_fast_get(); if (tcur != NULL && interp == tcur->interp) { /* Unset current thread. After this, many C API calls become crashy. */ - current_fast_clear(runtime); - tstate_deactivate(tcur); - _PyEval_ReleaseLock(interp, NULL); + _PyThreadState_Detach(tcur); } zapthreads(interp); - _PyEval_FiniState(&interp->ceval); - // XXX These two calls should be done at the end of clear_interpreter(), // but currently some objects get decref'ed after that. #ifdef Py_REF_DEBUG @@ -1021,6 +985,13 @@ PyInterpreterState_Delete(PyInterpreterState *interp) if (interp->id_mutex != NULL) { PyThread_free_lock(interp->id_mutex); } + + _Py_qsbr_fini(interp); + + _PyObject_FiniState(interp); + + PyConfig_Clear(&interp->config); + free_interpreter(interp); } @@ -1072,37 +1043,88 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime) } #endif +static inline void +set_main_thread(PyInterpreterState *interp, PyThreadState *tstate) +{ + _Py_atomic_store_ptr_relaxed(&interp->threads.main, tstate); +} + +static inline PyThreadState * +get_main_thread(PyInterpreterState *interp) +{ + return _Py_atomic_load_ptr_relaxed(&interp->threads.main); +} int _PyInterpreterState_SetRunningMain(PyInterpreterState *interp) { - if (interp->threads_main != NULL) { - PyErr_SetString(PyExc_RuntimeError, - "interpreter already running"); + if (get_main_thread(interp) != NULL) { + // In 3.14+ we use _PyErr_SetInterpreterAlreadyRunning(). + PyErr_SetString(PyExc_InterpreterError, "interpreter already running"); return -1; } - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); if (tstate->interp != interp) { PyErr_SetString(PyExc_RuntimeError, "current tstate has wrong interpreter"); return -1; } - interp->threads_main = tstate; + set_main_thread(interp, tstate); + return 0; } void _PyInterpreterState_SetNotRunningMain(PyInterpreterState *interp) { - assert(interp->threads_main == current_fast_get(&_PyRuntime)); - interp->threads_main = NULL; + assert(get_main_thread(interp) == current_fast_get()); + set_main_thread(interp, NULL); } int _PyInterpreterState_IsRunningMain(PyInterpreterState *interp) { - return (interp->threads_main != NULL); + if (get_main_thread(interp) != NULL) { + return 1; + } + // Embedders might not know to call _PyInterpreterState_SetRunningMain(), + // so their main thread wouldn't show it is running the main interpreter's + // program. (Py_Main() doesn't have this problem.) For now this isn't + // critical. If it were, we would need to infer "running main" from other + // information, like if it's the main interpreter. We used to do that + // but the naive approach led to some inconsistencies that caused problems. + return 0; +} + +int +_PyThreadState_IsRunningMain(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + // See the note in _PyInterpreterState_IsRunningMain() about + // possible false negatives here for embedders. + return get_main_thread(interp) == tstate; +} + +// This has been removed in 3.14. +int +_PyInterpreterState_FailIfRunningMain(PyInterpreterState *interp) +{ + if (get_main_thread(interp) != NULL) { + PyErr_SetString(PyExc_InterpreterError, + "interpreter already running"); + return -1; + } + return 0; +} + +void +_PyInterpreterState_ReinitRunningMain(PyThreadState *tstate) +{ + PyInterpreterState *interp = tstate->interp; + if (get_main_thread(interp) != tstate) { + set_main_thread(interp, NULL); + } } @@ -1110,6 +1132,106 @@ _PyInterpreterState_IsRunningMain(PyInterpreterState *interp) // accessors //---------- +int +_PyInterpreterState_IsReady(PyInterpreterState *interp) +{ + return interp->_ready; +} + +#ifndef NDEBUG +static inline int +check_interpreter_whence(long whence) +{ + if(whence < 0) { + return -1; + } + if (whence > _PyInterpreterState_WHENCE_MAX) { + return -1; + } + return 0; +} +#endif + +long +_PyInterpreterState_GetWhence(PyInterpreterState *interp) +{ + assert(check_interpreter_whence(interp->_whence) == 0); + return interp->_whence; +} + +void +_PyInterpreterState_SetWhence(PyInterpreterState *interp, long whence) +{ + assert(interp->_whence != _PyInterpreterState_WHENCE_NOTSET); + assert(check_interpreter_whence(whence) == 0); + interp->_whence = whence; +} + + +PyObject * +PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *interp) +{ + PyObject *modules = _PyImport_GetModules(interp); + if (modules == NULL) { + PyErr_SetString(PyExc_RuntimeError, "interpreter not initialized"); + return NULL; + } + return PyMapping_GetItemString(modules, "__main__"); +} + + +PyObject * +PyInterpreterState_GetDict(PyInterpreterState *interp) +{ + if (interp->dict == NULL) { + interp->dict = PyDict_New(); + if (interp->dict == NULL) { + PyErr_Clear(); + } + } + /* Returning NULL means no per-interpreter dict is available. */ + return interp->dict; +} + + +//---------- +// interp ID +//---------- + +int64_t +_PyInterpreterState_ObjectToID(PyObject *idobj) +{ + if (!_PyIndex_Check(idobj)) { + PyErr_Format(PyExc_TypeError, + "interpreter ID must be an int, got %.100s", + Py_TYPE(idobj)->tp_name); + return -1; + } + + // This may raise OverflowError. + // For now, we don't worry about if LLONG_MAX < INT64_MAX. + long long id = PyLong_AsLongLong(idobj); + if (id == -1 && PyErr_Occurred()) { + return -1; + } + + if (id < 0) { + PyErr_Format(PyExc_ValueError, + "interpreter ID must be a non-negative int, got %R", + idobj); + return -1; + } +#if LLONG_MAX > INT64_MAX + else if (id > INT64_MAX) { + PyErr_SetString(PyExc_OverflowError, "int too big to convert"); + return -1; + } +#endif + else { + return (int64_t)id; + } +} + int64_t PyInterpreterState_GetID(PyInterpreterState *interp) { @@ -1120,6 +1242,20 @@ PyInterpreterState_GetID(PyInterpreterState *interp) return interp->id; } +PyObject * +_PyInterpreterState_GetIDObject(PyInterpreterState *interp) +{ + if (_PyInterpreterState_IDInitref(interp) != 0) { + return NULL; + }; + int64_t interpid = interp->id; + if (interpid < 0) { + return NULL; + } + assert(interpid < LLONG_MAX); + return PyLong_FromLongLong(interpid); +} + int _PyInterpreterState_IDInitref(PyInterpreterState *interp) @@ -1165,8 +1301,9 @@ _PyInterpreterState_IDDecref(PyInterpreterState *interp) PyThread_release_lock(interp->id_mutex); if (refcount == 0 && interp->requires_idref) { - // XXX Using the "head" thread isn't strictly correct. - PyThreadState *tstate = PyInterpreterState_ThreadHead(interp); + PyThreadState *tstate = + _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); + // XXX Possible GILState issues? PyThreadState *save_tstate = _PyThreadState_Swap(runtime, tstate); Py_EndInterpreter(tstate); @@ -1186,30 +1323,6 @@ _PyInterpreterState_RequireIDRef(PyInterpreterState *interp, int required) interp->requires_idref = required ? 1 : 0; } -PyObject * -_PyInterpreterState_GetMainModule(PyInterpreterState *interp) -{ - PyObject *modules = _PyImport_GetModules(interp); - if (modules == NULL) { - PyErr_SetString(PyExc_RuntimeError, "interpreter not initialized"); - return NULL; - } - return PyMapping_GetItemString(modules, "__main__"); -} - -PyObject * -PyInterpreterState_GetDict(PyInterpreterState *interp) -{ - if (interp->dict == NULL) { - interp->dict = PyDict_New(); - if (interp->dict == NULL) { - PyErr_Clear(); - } - } - /* Returning NULL means no per-interpreter dict is available. */ - return interp->dict; -} - //----------------------------- // look up an interpreter state @@ -1220,10 +1333,10 @@ PyInterpreterState_GetDict(PyInterpreterState *interp) The GIL must be held. */ -PyInterpreterState * +PyInterpreterState* PyInterpreterState_Get(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { @@ -1265,12 +1378,22 @@ _PyInterpreterState_LookUpID(int64_t requested_id) HEAD_UNLOCK(runtime); } if (interp == NULL && !PyErr_Occurred()) { - PyErr_Format(PyExc_RuntimeError, + PyErr_Format(PyExc_InterpreterNotFoundError, "unrecognized interpreter ID %lld", requested_id); } return interp; } +PyInterpreterState * +_PyInterpreterState_LookUpIDObject(PyObject *requested_id) +{ + int64_t id = _PyInterpreterState_ObjectToID(requested_id); + if (id < 0) { + return NULL; + } + return _PyInterpreterState_LookUpID(id); +} + /********************************/ /* the per-thread runtime state */ @@ -1309,18 +1432,47 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous) return res; } -static PyThreadState * -alloc_threadstate(void) +static void +reset_threadstate(_PyThreadStateImpl *tstate) { - return PyMem_RawCalloc(1, sizeof(PyThreadState)); + // Set to _PyThreadState_INIT directly? + memcpy(tstate, + &initial._main_interpreter._initial_thread, + sizeof(*tstate)); +} + +static _PyThreadStateImpl * +alloc_threadstate(PyInterpreterState *interp) +{ + _PyThreadStateImpl *tstate; + + // Try the preallocated tstate first. + tstate = _Py_atomic_exchange_ptr(&interp->threads_preallocated, NULL); + + // Fall back to the allocator. + if (tstate == NULL) { + tstate = PyMem_RawCalloc(1, sizeof(_PyThreadStateImpl)); + if (tstate == NULL) { + return NULL; + } + reset_threadstate(tstate); + } + return tstate; } static void -free_threadstate(PyThreadState *tstate) +free_threadstate(_PyThreadStateImpl *tstate) { + PyInterpreterState *interp = tstate->base.interp; // The initial thread state of the interpreter is allocated // as part of the interpreter state so should not be freed. - if (tstate != &tstate->interp->_initial_thread) { + if (tstate == &interp->_initial_thread) { + // Make it available again. + reset_threadstate(tstate); + assert(interp->threads_preallocated == NULL); + _Py_atomic_store_ptr(&interp->threads_preallocated, tstate); + } + else { PyMem_RawFree(tstate); } } @@ -1334,20 +1486,27 @@ free_threadstate(PyThreadState *tstate) */ static void -init_threadstate(PyThreadState *tstate, - PyInterpreterState *interp, uint64_t id) +init_threadstate(_PyThreadStateImpl *_tstate, + PyInterpreterState *interp, uint64_t id, int whence) { + PyThreadState *tstate = (PyThreadState *)_tstate; if (tstate->_status.initialized) { Py_FatalError("thread state already initialized"); } assert(interp != NULL); tstate->interp = interp; + tstate->eval_breaker = + _Py_atomic_load_uintptr_relaxed(&interp->ceval.instrumentation_version); // next/prev are set in add_threadstate(). assert(tstate->next == NULL); assert(tstate->prev == NULL); + assert(tstate->_whence == _PyThreadState_WHENCE_NOTSET); + assert(whence >= 0 && whence <= _PyThreadState_WHENCE_EXEC); + tstate->_whence = whence; + assert(id > 0); tstate->id = id; @@ -1355,7 +1514,7 @@ init_threadstate(PyThreadState *tstate, tstate->py_recursion_limit = interp->ceval.recursion_limit, tstate->py_recursion_remaining = interp->ceval.recursion_limit, - tstate->c_recursion_remaining = C_RECURSION_LIMIT; + tstate->c_recursion_remaining = Py_C_RECURSION_LIMIT; tstate->exc_info = &tstate->exc_state; @@ -1363,11 +1522,24 @@ init_threadstate(PyThreadState *tstate, // This is cleared when PyGILState_Ensure() creates the thread state. tstate->gilstate_counter = 1; - tstate->cframe = &tstate->root_cframe; + tstate->current_frame = NULL; tstate->datastack_chunk = NULL; tstate->datastack_top = NULL; tstate->datastack_limit = NULL; tstate->what_event = -1; + tstate->previous_executor = NULL; + tstate->dict_global_version = 0; + + _tstate->asyncio_running_loop = NULL; + + tstate->delete_later = NULL; + + llist_init(&_tstate->mem_free_queue); + + if (interp->stoptheworld.requested || _PyRuntime.stoptheworld.requested) { + // Start in the suspended state if there is an ongoing stop-the-world. + tstate->state = _Py_THREAD_SUSPENDED; + } tstate->_status.initialized = 1; } @@ -1377,8 +1549,6 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, PyThreadState *next) { assert(interp->threads.head != tstate); - assert((next != NULL && tstate->id != 1) || - (next == NULL && tstate->id == 1)); if (next != NULL) { assert(next->prev == NULL || next->prev == tstate); next->prev = tstate; @@ -1389,60 +1559,63 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, } static PyThreadState * -new_threadstate(PyInterpreterState *interp) +new_threadstate(PyInterpreterState *interp, int whence) { - PyThreadState *tstate; - _PyRuntimeState *runtime = interp->runtime; - // We don't need to allocate a thread state for the main interpreter - // (the common case), but doing it later for the other case revealed a - // reentrancy problem (deadlock). So for now we always allocate before - // taking the interpreters lock. See GH-96071. - PyThreadState *new_tstate = alloc_threadstate(); - int used_newtstate; - if (new_tstate == NULL) { + // Allocate the thread state. + _PyThreadStateImpl *tstate = alloc_threadstate(interp); + if (tstate == NULL) { + return NULL; + } + +#ifdef Py_GIL_DISABLED + Py_ssize_t qsbr_idx = _Py_qsbr_reserve(interp); + if (qsbr_idx < 0) { + free_threadstate(tstate); return NULL; } +#endif + /* We serialize concurrent creation to protect global state. */ - HEAD_LOCK(runtime); + HEAD_LOCK(interp->runtime); + // Initialize the new thread state. interp->threads.next_unique_id += 1; uint64_t id = interp->threads.next_unique_id; + init_threadstate(tstate, interp, id, whence); - // Allocate the thread state and add it to the interpreter. + // Add the new thread state to the interpreter. PyThreadState *old_head = interp->threads.head; - if (old_head == NULL) { - // It's the interpreter's initial thread state. - assert(id == 1); - used_newtstate = 0; - tstate = &interp->_initial_thread; - } - else { - // Every valid interpreter must have at least one thread. - assert(id > 1); - assert(old_head->prev == NULL); - used_newtstate = 1; - tstate = new_tstate; - // Set to _PyThreadState_INIT. - memcpy(tstate, - &initial._main_interpreter._initial_thread, - sizeof(*tstate)); + add_threadstate(interp, (PyThreadState *)tstate, old_head); + + HEAD_UNLOCK(interp->runtime); +#ifdef Py_GIL_DISABLED + if (id > 1) { + if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) { + // Immortalize objects marked as using deferred reference counting + // once a non-main thread is created, if we haven't already done so. + _PyGC_ImmortalizeDeferredObjects(interp); + } } +#endif - init_threadstate(tstate, interp, id); - add_threadstate(interp, tstate, old_head); +#ifdef Py_GIL_DISABLED + // Must be called with lock unlocked to avoid lock ordering deadlocks. + _Py_qsbr_register(tstate, interp, qsbr_idx); +#endif - HEAD_UNLOCK(runtime); - if (!used_newtstate) { - // Must be called with lock unlocked to avoid re-entrancy deadlock. - PyMem_RawFree(new_tstate); - } - return tstate; + return (PyThreadState *)tstate; } PyThreadState * PyThreadState_New(PyInterpreterState *interp) { - PyThreadState *tstate = new_threadstate(interp); + return _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_UNKNOWN); +} + +PyThreadState * +_PyThreadState_NewBound(PyInterpreterState *interp, int whence) +{ + PyThreadState *tstate = new_threadstate(interp, whence); if (tstate) { bind_tstate(tstate); // This makes sure there's a gilstate tstate bound @@ -1456,21 +1629,21 @@ PyThreadState_New(PyInterpreterState *interp) // This must be followed by a call to _PyThreadState_Bind(); PyThreadState * -_PyThreadState_New(PyInterpreterState *interp) +_PyThreadState_New(PyInterpreterState *interp, int whence) { - return new_threadstate(interp); + return new_threadstate(interp, whence); } // We keep this for stable ABI compabibility. -PyThreadState * +PyAPI_FUNC(PyThreadState*) _PyThreadState_Prealloc(PyInterpreterState *interp) { - return _PyThreadState_New(interp); + return _PyThreadState_New(interp, _PyThreadState_WHENCE_UNKNOWN); } // We keep this around for (accidental) stable ABI compatibility. // Realistically, no extensions are using it. -void +PyAPI_FUNC(void) _PyThreadState_Init(PyThreadState *tstate) { Py_FatalError("_PyThreadState_Init() is for internal use only"); @@ -1493,6 +1666,8 @@ void PyThreadState_Clear(PyThreadState *tstate) { assert(tstate->_status.initialized && !tstate->_status.cleared); + assert(current_fast_get()->interp == tstate->interp); + assert(!_PyThreadState_IsRunningMain(tstate)); // XXX assert(!tstate->_status.bound || tstate->_status.unbound); tstate->_status.finalizing = 1; // just in case @@ -1505,7 +1680,7 @@ PyThreadState_Clear(PyThreadState *tstate) int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose; - if (verbose && tstate->cframe->current_frame != NULL) { + if (verbose && tstate->current_frame != NULL) { /* bpo-20526: After the main thread calls _PyInterpreterState_SetFinalizing() in Py_FinalizeEx() (or in Py_EndInterpreter() for subinterpreters), @@ -1532,6 +1707,11 @@ PyThreadState_Clear(PyThreadState *tstate) /* Don't clear tstate->pyframe: it is a borrowed reference */ + Py_CLEAR(tstate->threading_local_key); + Py_CLEAR(tstate->threading_local_sentinel); + + Py_CLEAR(((_PyThreadStateImpl *)tstate)->asyncio_running_loop); + Py_CLEAR(tstate->dict); Py_CLEAR(tstate->async_exc); @@ -1546,11 +1726,11 @@ PyThreadState_Clear(PyThreadState *tstate) } if (tstate->c_profilefunc != NULL) { - tstate->interp->sys_profiling_threads--; + _Py_atomic_add_ssize(&tstate->interp->sys_profiling_threads, -1); tstate->c_profilefunc = NULL; } if (tstate->c_tracefunc != NULL) { - tstate->interp->sys_tracing_threads--; + _Py_atomic_add_ssize(&tstate->interp->sys_tracing_threads, -1); tstate->c_tracefunc = NULL; } Py_CLEAR(tstate->c_profileobj); @@ -1561,9 +1741,19 @@ PyThreadState_Clear(PyThreadState *tstate) Py_CLEAR(tstate->context); - if (tstate->on_delete != NULL) { - tstate->on_delete(tstate->on_delete_data); - } +#ifdef Py_GIL_DISABLED + // Each thread should clear own freelists in free-threading builds. + struct _Py_object_freelists *freelists = _Py_object_freelists_GET(); + _PyObject_ClearFreeLists(freelists, 1); + + // Remove ourself from the biased reference counting table of threads. + _Py_brc_remove_thread(tstate); +#endif + + // Merge our queue of pointers to be freed into the interpreter queue. + _PyMem_AbandonDelayed(tstate); + + _PyThreadState_ClearMimallocHeaps(tstate); tstate->_status.cleared = 1; @@ -1571,11 +1761,16 @@ PyThreadState_Clear(PyThreadState *tstate) // XXX Do it as early in the function as possible. } +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw); + /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void -tstate_delete_common(PyThreadState *tstate) +tstate_delete_common(PyThreadState *tstate, int release_gil) { assert(tstate->_status.cleared && !tstate->_status.finalized); + tstate_verify_not_active(tstate); + assert(!_PyThreadState_IsRunningMain(tstate)); PyInterpreterState *interp = tstate->interp; if (interp == NULL) { @@ -1593,6 +1788,24 @@ tstate_delete_common(PyThreadState *tstate) if (tstate->next) { tstate->next->prev = tstate->prev; } + if (tstate->state != _Py_THREAD_SUSPENDED) { + // Any ongoing stop-the-world request should not wait for us because + // our thread is getting deleted. + if (interp->stoptheworld.requested) { + decrement_stoptheworld_countdown(&interp->stoptheworld); + } + if (runtime->stoptheworld.requested) { + decrement_stoptheworld_countdown(&runtime->stoptheworld); + } + } + +#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) + // Add our portion of the total refcount to the interpreter's total. + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + tstate->interp->object_state.reftotal += tstate_impl->reftotal; + tstate_impl->reftotal = 0; +#endif + HEAD_UNLOCK(runtime); // XXX Unbind in PyThreadState_Clear(), or earlier @@ -1607,6 +1820,14 @@ tstate_delete_common(PyThreadState *tstate) // XXX Move to PyThreadState_Clear()? clear_datastack(tstate); + if (release_gil) { + _PyEval_ReleaseLock(tstate->interp, tstate, 1); + } + +#ifdef Py_GIL_DISABLED + _Py_qsbr_unregister(tstate); +#endif + tstate->_status.finalized = 1; } @@ -1618,8 +1839,8 @@ zapthreads(PyInterpreterState *interp) when the threads are all really dead (XXX famous last words). */ while ((tstate = interp->threads.head) != NULL) { tstate_verify_not_active(tstate); - tstate_delete_common(tstate); - free_threadstate(tstate); + tstate_delete_common(tstate, 0); + free_threadstate((_PyThreadStateImpl *)tstate); } } @@ -1629,8 +1850,8 @@ PyThreadState_Delete(PyThreadState *tstate) { _Py_EnsureTstateNotNULL(tstate); tstate_verify_not_active(tstate); - tstate_delete_common(tstate); - free_threadstate(tstate); + tstate_delete_common(tstate, 0); + free_threadstate((_PyThreadStateImpl *)tstate); } @@ -1638,38 +1859,45 @@ void _PyThreadState_DeleteCurrent(PyThreadState *tstate) { _Py_EnsureTstateNotNULL(tstate); - tstate_delete_common(tstate); +#ifdef Py_GIL_DISABLED + _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif current_fast_clear(tstate->interp->runtime); - _PyEval_ReleaseLock(tstate->interp, NULL); - free_threadstate(tstate); + tstate_delete_common(tstate, 1); // release GIL as part of call + free_threadstate((_PyThreadStateImpl *)tstate); } void PyThreadState_DeleteCurrent(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _PyThreadState_DeleteCurrent(tstate); } -/* - * Delete all thread states except the one passed as argument. - * Note that, if there is a current thread state, it *must* be the one - * passed as argument. Also, this won't touch any other interpreters - * than the current one, since we don't know which thread state should - * be kept in those other interpreters. - */ -void -_PyThreadState_DeleteExcept(PyThreadState *tstate) +// Unlinks and removes all thread states from `tstate->interp`, with the +// exception of the one passed as an argument. However, it does not delete +// these thread states. Instead, it returns the removed thread states as a +// linked list. +// +// Note that if there is a current thread state, it *must* be the one +// passed as argument. Also, this won't touch any interpreters other +// than the current one, since we don't know which thread state should +// be kept in those other interpreters. +PyThreadState * +_PyThreadState_RemoveExcept(PyThreadState *tstate) { assert(tstate != NULL); PyInterpreterState *interp = tstate->interp; _PyRuntimeState *runtime = interp->runtime; +#ifdef Py_GIL_DISABLED + assert(runtime->stoptheworld.world_stopped); +#endif + HEAD_LOCK(runtime); /* Remove all thread states, except tstate, from the linked list of - thread states. This will allow calling PyThreadState_Clear() - without holding the lock. */ + thread states. */ PyThreadState *list = interp->threads.head; if (list == tstate) { list = tstate->next; @@ -1684,14 +1912,24 @@ _PyThreadState_DeleteExcept(PyThreadState *tstate) interp->threads.head = tstate; HEAD_UNLOCK(runtime); - /* Clear and deallocate all stale thread states. Even if this - executes Python code, we should be safe since it executes - in the current thread, not one of the stale threads. */ + return list; +} + +// Deletes the thread states in the linked list `list`. +// +// This is intended to be used in conjunction with _PyThreadState_RemoveExcept. +void +_PyThreadState_DeleteList(PyThreadState *list) +{ + // The world can't be stopped because we PyThreadState_Clear() can + // call destructors. + assert(!_PyRuntime.stoptheworld.world_stopped); + PyThreadState *p, *next; for (p = list; p; p = next) { next = p->next; PyThreadState_Clear(p); - free_threadstate(p); + free_threadstate((_PyThreadStateImpl *)p); } } @@ -1723,7 +1961,7 @@ _PyThreadState_GetDict(PyThreadState *tstate) PyObject * PyThreadState_GetDict(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return NULL; } @@ -1794,6 +2032,333 @@ tstate_deactivate(PyThreadState *tstate) // It will still be used in PyGILState_Ensure(). } +static int +tstate_try_attach(PyThreadState *tstate) +{ +#ifdef Py_GIL_DISABLED + int expected = _Py_THREAD_DETACHED; + return _Py_atomic_compare_exchange_int(&tstate->state, + &expected, + _Py_THREAD_ATTACHED); +#else + assert(tstate->state == _Py_THREAD_DETACHED); + tstate->state = _Py_THREAD_ATTACHED; + return 1; +#endif +} + +static void +tstate_set_detached(PyThreadState *tstate, int detached_state) +{ + assert(_Py_atomic_load_int_relaxed(&tstate->state) == _Py_THREAD_ATTACHED); +#ifdef Py_GIL_DISABLED + _Py_atomic_store_int(&tstate->state, detached_state); +#else + tstate->state = detached_state; +#endif +} + +static void +tstate_wait_attach(PyThreadState *tstate) +{ + do { + int expected = _Py_THREAD_SUSPENDED; + + // Wait until we're switched out of SUSPENDED to DETACHED. + _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state), + /*timeout=*/-1, NULL, /*detach=*/0); + + // Once we're back in DETACHED we can re-attach + } while (!tstate_try_attach(tstate)); +} + +void +_PyThreadState_Attach(PyThreadState *tstate) +{ +#if defined(Py_DEBUG) + // This is called from PyEval_RestoreThread(). Similar + // to it, we need to ensure errno doesn't change. + int err = errno; +#endif + + _Py_EnsureTstateNotNULL(tstate); + if (current_fast_get() != NULL) { + Py_FatalError("non-NULL old thread state"); + } + + + while (1) { + _PyEval_AcquireLock(tstate); + + // XXX assert(tstate_is_alive(tstate)); + current_fast_set(&_PyRuntime, tstate); + tstate_activate(tstate); + + if (!tstate_try_attach(tstate)) { + tstate_wait_attach(tstate); + } + +#ifdef Py_GIL_DISABLED + if (_PyEval_IsGILEnabled(tstate) && !tstate->_status.holds_gil) { + // The GIL was enabled between our call to _PyEval_AcquireLock() + // and when we attached (the GIL can't go from enabled to disabled + // here because only a thread holding the GIL can disable + // it). Detach and try again. + tstate_set_detached(tstate, _Py_THREAD_DETACHED); + tstate_deactivate(tstate); + current_fast_clear(&_PyRuntime); + continue; + } + _Py_qsbr_attach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif + break; + } + + // Resume previous critical section. This acquires the lock(s) from the + // top-most critical section. + if (tstate->critical_section != 0) { + _PyCriticalSection_Resume(tstate); + } + +#if defined(Py_DEBUG) + errno = err; +#endif +} + +static void +detach_thread(PyThreadState *tstate, int detached_state) +{ + // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); + assert(_Py_atomic_load_int_relaxed(&tstate->state) == _Py_THREAD_ATTACHED); + assert(tstate == current_fast_get()); + if (tstate->critical_section != 0) { + _PyCriticalSection_SuspendAll(tstate); + } +#ifdef Py_GIL_DISABLED + _Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr); +#endif + tstate_deactivate(tstate); + tstate_set_detached(tstate, detached_state); + current_fast_clear(&_PyRuntime); + _PyEval_ReleaseLock(tstate->interp, tstate, 0); +} + +void +_PyThreadState_Detach(PyThreadState *tstate) +{ + detach_thread(tstate, _Py_THREAD_DETACHED); +} + +void +_PyThreadState_Suspend(PyThreadState *tstate) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + assert(_Py_atomic_load_int_relaxed(&tstate->state) == _Py_THREAD_ATTACHED); + + struct _stoptheworld_state *stw = NULL; + HEAD_LOCK(runtime); + if (runtime->stoptheworld.requested) { + stw = &runtime->stoptheworld; + } + else if (tstate->interp->stoptheworld.requested) { + stw = &tstate->interp->stoptheworld; + } + HEAD_UNLOCK(runtime); + + if (stw == NULL) { + // Switch directly to "detached" if there is no active stop-the-world + // request. + detach_thread(tstate, _Py_THREAD_DETACHED); + return; + } + + // Switch to "suspended" state. + detach_thread(tstate, _Py_THREAD_SUSPENDED); + + // Decrease the count of remaining threads needing to park. + HEAD_LOCK(runtime); + decrement_stoptheworld_countdown(stw); + HEAD_UNLOCK(runtime); +} + +// Decrease stop-the-world counter of remaining number of threads that need to +// pause. If we are the final thread to pause, notify the requesting thread. +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw) +{ + assert(stw->thread_countdown > 0); + if (--stw->thread_countdown == 0) { + _PyEvent_Notify(&stw->stop_event); + } +} + +#ifdef Py_GIL_DISABLED +// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events, +// we start with the first interpreter and then iterate over all interpreters. +// For per-interpreter stop-the-world events, we only operate on the one +// interpreter. +static PyInterpreterState * +interp_for_stop_the_world(struct _stoptheworld_state *stw) +{ + return (stw->is_global + ? PyInterpreterState_Head() + : _Py_CONTAINER_OF(stw, PyInterpreterState, stoptheworld)); +} + +// Loops over threads for a stop-the-world event. +// For global: all threads in all interpreters +// For per-interpreter: all threads in the interpreter +#define _Py_FOR_EACH_THREAD(stw, i, t) \ + for (i = interp_for_stop_the_world((stw)); \ + i != NULL; i = ((stw->is_global) ? i->next : NULL)) \ + for (t = i->threads.head; t; t = t->next) + + +// Try to transition threads atomically from the "detached" state to the +// "gc stopped" state. Returns true if all threads are in the "gc stopped" +static bool +park_detached_threads(struct _stoptheworld_state *stw) +{ + int num_parked = 0; + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + int state = _Py_atomic_load_int_relaxed(&t->state); + if (state == _Py_THREAD_DETACHED) { + // Atomically transition to "suspended" if in "detached" state. + if (_Py_atomic_compare_exchange_int(&t->state, + &state, _Py_THREAD_SUSPENDED)) { + num_parked++; + } + } + else if (state == _Py_THREAD_ATTACHED && t != stw->requester) { + _Py_set_eval_breaker_bit(t, _PY_EVAL_PLEASE_STOP_BIT); + } + } + stw->thread_countdown -= num_parked; + assert(stw->thread_countdown >= 0); + return num_parked > 0 && stw->thread_countdown == 0; +} + +static void +stop_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + PyMutex_Lock(&stw->mutex); + if (stw->is_global) { + _PyRWMutex_Lock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RLock(&runtime->stoptheworld_mutex); + } + + HEAD_LOCK(runtime); + stw->requested = 1; + stw->thread_countdown = 0; + stw->stop_event = (PyEvent){0}; // zero-initialize (unset) + stw->requester = _PyThreadState_GET(); // may be NULL + + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + if (t != stw->requester) { + // Count all the other threads (we don't wait on ourself). + stw->thread_countdown++; + } + } + + if (stw->thread_countdown == 0) { + HEAD_UNLOCK(runtime); + stw->world_stopped = 1; + return; + } + + for (;;) { + // Switch threads that are detached to the GC stopped state + bool stopped_all_threads = park_detached_threads(stw); + HEAD_UNLOCK(runtime); + + if (stopped_all_threads) { + break; + } + + PyTime_t wait_ns = 1000*1000; // 1ms (arbitrary, may need tuning) + int detach = 0; + if (PyEvent_WaitTimed(&stw->stop_event, wait_ns, detach)) { + assert(stw->thread_countdown == 0); + break; + } + + HEAD_LOCK(runtime); + } + stw->world_stopped = 1; +} + +static void +start_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + assert(PyMutex_IsLocked(&stw->mutex)); + + HEAD_LOCK(runtime); + stw->requested = 0; + stw->world_stopped = 0; + // Switch threads back to the detached state. + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + if (t != stw->requester) { + assert(_Py_atomic_load_int_relaxed(&t->state) == + _Py_THREAD_SUSPENDED); + _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED); + _PyParkingLot_UnparkAll(&t->state); + } + } + stw->requester = NULL; + HEAD_UNLOCK(runtime); + if (stw->is_global) { + _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex); + } + PyMutex_Unlock(&stw->mutex); +} +#endif // Py_GIL_DISABLED + +void +_PyEval_StopTheWorldAll(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyEval_StartTheWorldAll(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyEval_StopTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&interp->stoptheworld); +#endif +} + +void +_PyEval_StartTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&interp->stoptheworld); +#endif +} //---------- // other API @@ -1834,88 +2399,46 @@ PyThreadState_SetAsyncExc(unsigned long id, PyObject *exc) * deadlock, we need to release head_mutex before * the decref. */ - PyObject *old_exc = tstate->async_exc; - tstate->async_exc = Py_XNewRef(exc); + Py_XINCREF(exc); + PyObject *old_exc = _Py_atomic_exchange_ptr(&tstate->async_exc, exc); HEAD_UNLOCK(runtime); Py_XDECREF(old_exc); - _PyEval_SignalAsyncExc(tstate->interp); + _Py_set_eval_breaker_bit(tstate, _PY_ASYNC_EXCEPTION_BIT); return 1; } HEAD_UNLOCK(runtime); return 0; } - //--------------------------------- // API for the current thread state //--------------------------------- PyThreadState * -_PyThreadState_UncheckedGet(void) +PyThreadState_GetUnchecked(void) { - return current_fast_get(&_PyRuntime); + return current_fast_get(); } PyThreadState * PyThreadState_Get(void) { - PyThreadState *tstate = current_fast_get(&_PyRuntime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); return tstate; } - -static void -_swap_thread_states(_PyRuntimeState *runtime, - PyThreadState *oldts, PyThreadState *newts) -{ - // XXX Do this only if oldts != NULL? - current_fast_clear(runtime); - - if (oldts != NULL) { - // XXX assert(tstate_is_alive(oldts) && tstate_is_bound(oldts)); - tstate_deactivate(oldts); - } - - if (newts != NULL) { - // XXX assert(tstate_is_alive(newts)); - assert(tstate_is_bound(newts)); - current_fast_set(runtime, newts); - tstate_activate(newts); - } -} - -PyThreadState * -_PyThreadState_SwapNoGIL(PyThreadState *newts) -{ -#if defined(Py_DEBUG) - /* This can be called from PyEval_RestoreThread(). Similar - to it, we need to ensure errno doesn't change. - */ - int err = errno; -#endif - - PyThreadState *oldts = current_fast_get(&_PyRuntime); - _swap_thread_states(&_PyRuntime, oldts, newts); - -#if defined(Py_DEBUG) - errno = err; -#endif - return oldts; -} - PyThreadState * _PyThreadState_Swap(_PyRuntimeState *runtime, PyThreadState *newts) { - PyThreadState *oldts = current_fast_get(runtime); + PyThreadState *oldts = current_fast_get(); if (oldts != NULL) { - _PyEval_ReleaseLock(oldts->interp, oldts); + _PyThreadState_Detach(oldts); } - _swap_thread_states(runtime, oldts, newts); if (newts != NULL) { - _PyEval_AcquireLock(newts); + _PyThreadState_Attach(newts); } return oldts; } @@ -1942,6 +2465,20 @@ _PyThreadState_Bind(PyThreadState *tstate) } } +#if defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API) +uintptr_t +_Py_GetThreadLocal_Addr(void) +{ +#ifdef HAVE_THREAD_LOCAL + // gh-112535: Use the address of the thread-local PyThreadState variable as + // a unique identifier for the current thread. Each thread has a unique + // _Py_tss_tstate variable with a unique address. + return (uintptr_t)&_Py_tss_tstate; +#else +# error "no supported thread-local variable storage classifier" +#endif +} +#endif /***********************************/ /* routines for advanced debuggers */ @@ -1991,7 +2528,7 @@ PyObject * _PyThread_CurrentFrames(void) { _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); if (_PySys_Audit(tstate, "sys._current_frames", NULL) < 0) { return NULL; } @@ -2007,12 +2544,13 @@ _PyThread_CurrentFrames(void) * Because these lists can mutate even when the GIL is held, we * need to grab head_mutex for the duration. */ + _PyEval_StopTheWorldAll(runtime); HEAD_LOCK(runtime); PyInterpreterState *i; for (i = runtime->interpreters.head; i != NULL; i = i->next) { PyThreadState *t; for (t = i->threads.head; t != NULL; t = t->next) { - _PyInterpreterFrame *frame = t->cframe->current_frame; + _PyInterpreterFrame *frame = t->current_frame; frame = _PyFrame_GetFirstComplete(frame); if (frame == NULL) { continue; @@ -2040,6 +2578,7 @@ fail: done: HEAD_UNLOCK(runtime); + _PyEval_StartTheWorldAll(runtime); return result; } @@ -2052,7 +2591,7 @@ PyObject * _PyThread_CurrentExceptions(void) { _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); @@ -2071,6 +2610,7 @@ _PyThread_CurrentExceptions(void) * Because these lists can mutate even when the GIL is held, we * need to grab head_mutex for the duration. */ + _PyEval_StopTheWorldAll(runtime); HEAD_LOCK(runtime); PyInterpreterState *i; for (i = runtime->interpreters.head; i != NULL; i = i->next) { @@ -2103,6 +2643,7 @@ fail: done: HEAD_UNLOCK(runtime); + _PyEval_StartTheWorldAll(runtime); return result; } @@ -2142,7 +2683,7 @@ _PyGILState_Fini(PyInterpreterState *interp) // XXX Drop this. -PyStatus +void _PyGILState_SetTstate(PyThreadState *tstate) { /* must init with valid states */ @@ -2152,7 +2693,7 @@ _PyGILState_SetTstate(PyThreadState *tstate) if (!_Py_IsMainInterpreter(tstate->interp)) { /* Currently, PyGILState is shared by all interpreters. The main * interpreter is responsible to initialize it. */ - return _PyStatus_OK(); + return; } #ifndef NDEBUG @@ -2162,8 +2703,6 @@ _PyGILState_SetTstate(PyThreadState *tstate) assert(gilstate_tss_get(runtime) == tstate); assert(tstate->gilstate_counter == 1); #endif - - return _PyStatus_OK(); } PyInterpreterState * @@ -2196,12 +2735,13 @@ PyGILState_Check(void) return 1; } - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); if (tstate == NULL) { return 0; } - return (tstate == gilstate_tss_get(runtime)); + PyThreadState *tcur = gilstate_tss_get(runtime); + return (tstate == tcur); } PyGILState_STATE @@ -2224,7 +2764,9 @@ PyGILState_Ensure(void) int has_gil; if (tcur == NULL) { /* Create a new Python thread state for this thread */ - tcur = new_threadstate(runtime->gilstate.autoInterpreterState); + // XXX Use PyInterpreterState_EnsureThreadState()? + tcur = new_threadstate(runtime->gilstate.autoInterpreterState, + _PyThreadState_WHENCE_GILSTATE); if (tcur == NULL) { Py_FatalError("Couldn't create thread-state for new thread"); } @@ -2298,7 +2840,7 @@ PyGILState_Release(PyGILState_STATE oldstate) * habit of coming back). */ assert(tstate->gilstate_counter == 0); - assert(current_fast_get(runtime) == tstate); + assert(current_fast_get() == tstate); _PyThreadState_DeleteCurrent(tstate); } /* Release the lock if necessary */ @@ -2308,585 +2850,6 @@ PyGILState_Release(PyGILState_STATE oldstate) } -/**************************/ -/* cross-interpreter data */ -/**************************/ - -/* cross-interpreter data */ - -static inline void -_xidata_init(_PyCrossInterpreterData *data) -{ - // If the value is being reused - // then _xidata_clear() should have been called already. - assert(data->data == NULL); - assert(data->obj == NULL); - *data = (_PyCrossInterpreterData){0}; - data->interp = -1; -} - -static inline void -_xidata_clear(_PyCrossInterpreterData *data) -{ - // _PyCrossInterpreterData only has two members that need to be - // cleaned up, if set: "data" must be freed and "obj" must be decref'ed. - // In both cases the original (owning) interpreter must be used, - // which is the caller's responsibility to ensure. - if (data->data != NULL) { - if (data->free != NULL) { - data->free(data->data); - } - data->data = NULL; - } - Py_CLEAR(data->obj); -} - -void -_PyCrossInterpreterData_Init(_PyCrossInterpreterData *data, - PyInterpreterState *interp, - void *shared, PyObject *obj, - xid_newobjectfunc new_object) -{ - assert(data != NULL); - assert(new_object != NULL); - _xidata_init(data); - data->data = shared; - if (obj != NULL) { - assert(interp != NULL); - // released in _PyCrossInterpreterData_Clear() - data->obj = Py_NewRef(obj); - } - // Ideally every object would know its owning interpreter. - // Until then, we have to rely on the caller to identify it - // (but we don't need it in all cases). - data->interp = (interp != NULL) ? interp->id : -1; - data->new_object = new_object; -} - -int -_PyCrossInterpreterData_InitWithSize(_PyCrossInterpreterData *data, - PyInterpreterState *interp, - const size_t size, PyObject *obj, - xid_newobjectfunc new_object) -{ - assert(size > 0); - // For now we always free the shared data in the same interpreter - // where it was allocated, so the interpreter is required. - assert(interp != NULL); - _PyCrossInterpreterData_Init(data, interp, NULL, obj, new_object); - data->data = PyMem_RawMalloc(size); - if (data->data == NULL) { - return -1; - } - data->free = PyMem_RawFree; - return 0; -} - -void -_PyCrossInterpreterData_Clear(PyInterpreterState *interp, - _PyCrossInterpreterData *data) -{ - assert(data != NULL); - // This must be called in the owning interpreter. - assert(interp == NULL || data->interp == interp->id); - _xidata_clear(data); -} - -static int -_check_xidata(PyThreadState *tstate, _PyCrossInterpreterData *data) -{ - // data->data can be anything, including NULL, so we don't check it. - - // data->obj may be NULL, so we don't check it. - - if (data->interp < 0) { - _PyErr_SetString(tstate, PyExc_SystemError, "missing interp"); - return -1; - } - - if (data->new_object == NULL) { - _PyErr_SetString(tstate, PyExc_SystemError, "missing new_object func"); - return -1; - } - - // data->free may be NULL, so we don't check it. - - return 0; -} - -crossinterpdatafunc _PyCrossInterpreterData_Lookup(PyObject *); - -/* This is a separate func from _PyCrossInterpreterData_Lookup in order - to keep the registry code separate. */ -static crossinterpdatafunc -_lookup_getdata(PyObject *obj) -{ - crossinterpdatafunc getdata = _PyCrossInterpreterData_Lookup(obj); - if (getdata == NULL && PyErr_Occurred() == 0) - PyErr_Format(PyExc_ValueError, - "%S does not support cross-interpreter data", obj); - return getdata; -} - -int -_PyObject_CheckCrossInterpreterData(PyObject *obj) -{ - crossinterpdatafunc getdata = _lookup_getdata(obj); - if (getdata == NULL) { - return -1; - } - return 0; -} - -int -_PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) -{ - _PyRuntimeState *runtime = &_PyRuntime; - PyThreadState *tstate = current_fast_get(runtime); -#ifdef Py_DEBUG - // The caller must hold the GIL - _Py_EnsureTstateNotNULL(tstate); -#endif - PyInterpreterState *interp = tstate->interp; - - // Reset data before re-populating. - *data = (_PyCrossInterpreterData){0}; - data->interp = -1; - - // Call the "getdata" func for the object. - Py_INCREF(obj); - crossinterpdatafunc getdata = _lookup_getdata(obj); - if (getdata == NULL) { - Py_DECREF(obj); - return -1; - } - int res = getdata(tstate, obj, data); - Py_DECREF(obj); - if (res != 0) { - return -1; - } - - // Fill in the blanks and validate the result. - data->interp = interp->id; - if (_check_xidata(tstate, data) != 0) { - (void)_PyCrossInterpreterData_Release(data); - return -1; - } - - return 0; -} - -PyObject * -_PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *data) -{ - return data->new_object(data); -} - -static int -_release_xidata_pending(void *data) -{ - _xidata_clear((_PyCrossInterpreterData *)data); - return 0; -} - -static int -_xidata_release_and_rawfree_pending(void *data) -{ - _xidata_clear((_PyCrossInterpreterData *)data); - PyMem_RawFree(data); - return 0; -} - -static int -_xidata_release(_PyCrossInterpreterData *data, int rawfree) -{ - if ((data->data == NULL || data->free == NULL) && data->obj == NULL) { - // Nothing to release! - if (rawfree) { - PyMem_RawFree(data); - } - else { - data->data = NULL; - } - return 0; - } - - // Switch to the original interpreter. - PyInterpreterState *interp = _PyInterpreterState_LookUpID(data->interp); - if (interp == NULL) { - // The interpreter was already destroyed. - // This function shouldn't have been called. - // XXX Someone leaked some memory... - assert(PyErr_Occurred()); - if (rawfree) { - PyMem_RawFree(data); - } - return -1; - } - - // "Release" the data and/or the object. - if (interp == current_fast_get(interp->runtime)->interp) { - _xidata_clear(data); - if (rawfree) { - PyMem_RawFree(data); - } - } - else { - int (*func)(void *) = _release_xidata_pending; - if (rawfree) { - func = _xidata_release_and_rawfree_pending; - } - // XXX Emit a warning if this fails? - _PyEval_AddPendingCall(interp, func, data, 0); - } - return 0; -} - -int -_PyCrossInterpreterData_Release(_PyCrossInterpreterData *data) -{ - return _xidata_release(data, 0); -} - -int -_PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *data) -{ - return _xidata_release(data, 1); -} - -/* registry of {type -> crossinterpdatafunc} */ - -/* For now we use a global registry of shareable classes. An - alternative would be to add a tp_* slot for a class's - crossinterpdatafunc. It would be simpler and more efficient. */ - -static int -_xidregistry_add_type(struct _xidregistry *xidregistry, - PyTypeObject *cls, crossinterpdatafunc getdata) -{ - struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem)); - if (newhead == NULL) { - return -1; - } - *newhead = (struct _xidregitem){ - // We do not keep a reference, to avoid keeping the class alive. - .cls = cls, - .refcount = 1, - .getdata = getdata, - }; - if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { - // XXX Assign a callback to clear the entry from the registry? - newhead->weakref = PyWeakref_NewRef((PyObject *)cls, NULL); - if (newhead->weakref == NULL) { - PyMem_RawFree(newhead); - return -1; - } - } - newhead->next = xidregistry->head; - if (newhead->next != NULL) { - newhead->next->prev = newhead; - } - xidregistry->head = newhead; - return 0; -} - -static struct _xidregitem * -_xidregistry_remove_entry(struct _xidregistry *xidregistry, - struct _xidregitem *entry) -{ - struct _xidregitem *next = entry->next; - if (entry->prev != NULL) { - assert(entry->prev->next == entry); - entry->prev->next = next; - } - else { - assert(xidregistry->head == entry); - xidregistry->head = next; - } - if (next != NULL) { - next->prev = entry->prev; - } - Py_XDECREF(entry->weakref); - PyMem_RawFree(entry); - return next; -} - -static void -_xidregistry_clear(struct _xidregistry *xidregistry) -{ - struct _xidregitem *cur = xidregistry->head; - xidregistry->head = NULL; - while (cur != NULL) { - struct _xidregitem *next = cur->next; - Py_XDECREF(cur->weakref); - PyMem_RawFree(cur); - cur = next; - } -} - -static struct _xidregitem * -_xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls) -{ - struct _xidregitem *cur = xidregistry->head; - while (cur != NULL) { - if (cur->weakref != NULL) { - // cur is/was a heap type. - PyObject *registered = PyWeakref_GetObject(cur->weakref); - assert(registered != NULL); - if (registered == Py_None) { - // The weakly ref'ed object was freed. - cur = _xidregistry_remove_entry(xidregistry, cur); - continue; - } - assert(PyType_Check(registered)); - assert(cur->cls == (PyTypeObject *)registered); - assert(cur->cls->tp_flags & Py_TPFLAGS_HEAPTYPE); - //Py_DECREF(registered); - } - if (cur->cls == cls) { - return cur; - } - cur = cur->next; - } - return NULL; -} - -static inline struct _xidregistry * -_get_xidregistry(PyInterpreterState *interp, PyTypeObject *cls) -{ - struct _xidregistry *xidregistry = &interp->runtime->xidregistry; - if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { - assert(interp->xidregistry.mutex == xidregistry->mutex); - xidregistry = &interp->xidregistry; - } - return xidregistry; -} - -static void _register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry); - -static inline void -_ensure_builtins_xid(PyInterpreterState *interp, struct _xidregistry *xidregistry) -{ - if (xidregistry != &interp->xidregistry) { - assert(xidregistry == &interp->runtime->xidregistry); - if (xidregistry->head == NULL) { - _register_builtins_for_crossinterpreter_data(xidregistry); - } - } -} - -int -_PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, - crossinterpdatafunc getdata) -{ - if (!PyType_Check(cls)) { - PyErr_Format(PyExc_ValueError, "only classes may be registered"); - return -1; - } - if (getdata == NULL) { - PyErr_Format(PyExc_ValueError, "missing 'getdata' func"); - return -1; - } - - int res = 0; - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry(interp, cls); - PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK); - - _ensure_builtins_xid(interp, xidregistry); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - if (matched != NULL) { - assert(matched->getdata == getdata); - matched->refcount += 1; - goto finally; - } - - res = _xidregistry_add_type(xidregistry, cls, getdata); - -finally: - PyThread_release_lock(xidregistry->mutex); - return res; -} - -int -_PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls) -{ - int res = 0; - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry(interp, cls); - PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - if (matched != NULL) { - assert(matched->refcount > 0); - matched->refcount -= 1; - if (matched->refcount == 0) { - (void)_xidregistry_remove_entry(xidregistry, matched); - } - res = 1; - } - - PyThread_release_lock(xidregistry->mutex); - return res; -} - - -/* Cross-interpreter objects are looked up by exact match on the class. - We can reassess this policy when we move from a global registry to a - tp_* slot. */ - -crossinterpdatafunc -_PyCrossInterpreterData_Lookup(PyObject *obj) -{ - PyTypeObject *cls = Py_TYPE(obj); - - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry(interp, cls); - PyThread_acquire_lock(xidregistry->mutex, WAIT_LOCK); - - _ensure_builtins_xid(interp, xidregistry); - - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL; - - PyThread_release_lock(xidregistry->mutex); - return func; -} - -/* cross-interpreter data for builtin types */ - -struct _shared_bytes_data { - char *bytes; - Py_ssize_t len; -}; - -static PyObject * -_new_bytes_object(_PyCrossInterpreterData *data) -{ - struct _shared_bytes_data *shared = (struct _shared_bytes_data *)(data->data); - return PyBytes_FromStringAndSize(shared->bytes, shared->len); -} - -static int -_bytes_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - if (_PyCrossInterpreterData_InitWithSize( - data, tstate->interp, sizeof(struct _shared_bytes_data), obj, - _new_bytes_object - ) < 0) - { - return -1; - } - struct _shared_bytes_data *shared = (struct _shared_bytes_data *)data->data; - if (PyBytes_AsStringAndSize(obj, &shared->bytes, &shared->len) < 0) { - _PyCrossInterpreterData_Clear(tstate->interp, data); - return -1; - } - return 0; -} - -struct _shared_str_data { - int kind; - const void *buffer; - Py_ssize_t len; -}; - -static PyObject * -_new_str_object(_PyCrossInterpreterData *data) -{ - struct _shared_str_data *shared = (struct _shared_str_data *)(data->data); - return PyUnicode_FromKindAndData(shared->kind, shared->buffer, shared->len); -} - -static int -_str_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - if (_PyCrossInterpreterData_InitWithSize( - data, tstate->interp, sizeof(struct _shared_str_data), obj, - _new_str_object - ) < 0) - { - return -1; - } - struct _shared_str_data *shared = (struct _shared_str_data *)data->data; - shared->kind = PyUnicode_KIND(obj); - shared->buffer = PyUnicode_DATA(obj); - shared->len = PyUnicode_GET_LENGTH(obj); - return 0; -} - -static PyObject * -_new_long_object(_PyCrossInterpreterData *data) -{ - return PyLong_FromSsize_t((Py_ssize_t)(data->data)); -} - -static int -_long_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - /* Note that this means the size of shareable ints is bounded by - * sys.maxsize. Hence on 32-bit architectures that is half the - * size of maximum shareable ints on 64-bit. - */ - Py_ssize_t value = PyLong_AsSsize_t(obj); - if (value == -1 && PyErr_Occurred()) { - if (PyErr_ExceptionMatches(PyExc_OverflowError)) { - PyErr_SetString(PyExc_OverflowError, "try sending as bytes"); - } - return -1; - } - _PyCrossInterpreterData_Init(data, tstate->interp, (void *)value, NULL, - _new_long_object); - // data->obj and data->free remain NULL - return 0; -} - -static PyObject * -_new_none_object(_PyCrossInterpreterData *data) -{ - // XXX Singleton refcounts are problematic across interpreters... - return Py_NewRef(Py_None); -} - -static int -_none_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) -{ - _PyCrossInterpreterData_Init(data, tstate->interp, NULL, NULL, - _new_none_object); - // data->data, data->obj and data->free remain NULL - return 0; -} - -static void -_register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry) -{ - // None - if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { - Py_FatalError("could not register None for cross-interpreter sharing"); - } - - // int - if (_xidregistry_add_type(xidregistry, &PyLong_Type, _long_shared) != 0) { - Py_FatalError("could not register int for cross-interpreter sharing"); - } - - // bytes - if (_xidregistry_add_type(xidregistry, &PyBytes_Type, _bytes_shared) != 0) { - Py_FatalError("could not register bytes for cross-interpreter sharing"); - } - - // str - if (_xidregistry_add_type(xidregistry, &PyUnicode_Type, _str_shared) != 0) { - Py_FatalError("could not register str for cross-interpreter sharing"); - } -} - - /*************/ /* Other API */ /*************/ @@ -2906,11 +2869,18 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, _PyFrameEvalFunction eval_frame) { if (eval_frame == _PyEval_EvalFrameDefault) { - interp->eval_frame = NULL; + eval_frame = NULL; } - else { - interp->eval_frame = eval_frame; + if (eval_frame == interp->eval_frame) { + return; } +#ifdef _Py_TIER2 + if (eval_frame != NULL) { + _Py_Executors_InvalidateAll(interp, 1); + } +#endif + RARE_EVENT_INC(set_eval_frame_func); + interp->eval_frame = eval_frame; } @@ -2924,7 +2894,7 @@ _PyInterpreterState_GetConfig(PyInterpreterState *interp) int _PyInterpreterState_GetConfigCopy(PyConfig *config) { - PyInterpreterState *interp = PyInterpreterState_Get(); + PyInterpreterState *interp = _PyInterpreterState_GET(); PyStatus status = _PyConfig_Copy(config, &interp->config); if (PyStatus_Exception(status)) { @@ -2938,9 +2908,8 @@ _PyInterpreterState_GetConfigCopy(PyConfig *config) const PyConfig* _Py_GetConfig(void) { - _PyRuntimeState *runtime = &_PyRuntime; assert(PyGILState_Check()); - PyThreadState *tstate = current_fast_get(runtime); + PyThreadState *tstate = current_fast_get(); _Py_EnsureTstateNotNULL(tstate); return _PyInterpreterState_GetConfig(tstate->interp); } @@ -3072,7 +3041,79 @@ _PyThreadState_MustExit(PyThreadState *tstate) return 1; } +/********************/ +/* mimalloc support */ +/********************/ + +static void +tstate_mimalloc_bind(PyThreadState *tstate) +{ +#ifdef Py_GIL_DISABLED + struct _mimalloc_thread_state *mts = &((_PyThreadStateImpl*)tstate)->mimalloc; -#ifdef __cplusplus + // Initialize the mimalloc thread state. This must be called from the + // same thread that will use the thread state. The "mem" heap doubles as + // the "backing" heap. + mi_tld_t *tld = &mts->tld; + _mi_tld_init(tld, &mts->heaps[_Py_MIMALLOC_HEAP_MEM]); + llist_init(&mts->page_list); + + // Exiting threads push any remaining in-use segments to the abandoned + // pool to be re-claimed later by other threads. We use per-interpreter + // pools to keep Python objects from different interpreters separate. + tld->segments.abandoned = &tstate->interp->mimalloc.abandoned_pool; + + // Don't fill in the first N bytes up to ob_type in debug builds. We may + // access ob_tid and the refcount fields in the dict and list lock-less + // accesses, so they must remain valid for a while after deallocation. + size_t base_offset = offsetof(PyObject, ob_type); + if (_PyMem_DebugEnabled()) { + // The debug allocator adds two words at the beginning of each block. + base_offset += 2 * sizeof(size_t); + } + size_t debug_offsets[_Py_MIMALLOC_HEAP_COUNT] = { + [_Py_MIMALLOC_HEAP_OBJECT] = base_offset, + [_Py_MIMALLOC_HEAP_GC] = base_offset, + [_Py_MIMALLOC_HEAP_GC_PRE] = base_offset + 2 * sizeof(PyObject *), + }; + + // Initialize each heap + for (uint8_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { + _mi_heap_init_ex(&mts->heaps[i], tld, _mi_arena_id_none(), false, i); + mts->heaps[i].debug_offset = (uint8_t)debug_offsets[i]; + } + + // Heaps that store Python objects should use QSBR to delay freeing + // mimalloc pages while there may be concurrent lock-free readers. + mts->heaps[_Py_MIMALLOC_HEAP_OBJECT].page_use_qsbr = true; + mts->heaps[_Py_MIMALLOC_HEAP_GC].page_use_qsbr = true; + mts->heaps[_Py_MIMALLOC_HEAP_GC_PRE].page_use_qsbr = true; + + // By default, object allocations use _Py_MIMALLOC_HEAP_OBJECT. + // _PyObject_GC_New() and similar functions temporarily override this to + // use one of the GC heaps. + mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + + _Py_atomic_store_int(&mts->initialized, 1); +#endif } + +void +_PyThreadState_ClearMimallocHeaps(PyThreadState *tstate) +{ +#ifdef Py_GIL_DISABLED + if (!tstate->_status.bound) { + // The mimalloc heaps are only initialized when the thread is bound. + return; + } + + _PyThreadStateImpl *tstate_impl = (_PyThreadStateImpl *)tstate; + for (Py_ssize_t i = 0; i < _Py_MIMALLOC_HEAP_COUNT; i++) { + // Abandon all segments in use by this thread. This pushes them to + // a shared pool to later be reclaimed by other threads. It's important + // to do this before the thread state is destroyed so that objects + // remain visible to the GC. + _mi_heap_collect_abandon(&tstate_impl->mimalloc.heaps[i]); + } #endif +} |
