diff options
Diffstat (limited to 'contrib/tools/python3/Python/import.c')
| -rw-r--r-- | contrib/tools/python3/Python/import.c | 1967 |
1 files changed, 1454 insertions, 513 deletions
diff --git a/contrib/tools/python3/Python/import.c b/contrib/tools/python3/Python/import.c index fe70169202c..32a25e4478c 100644 --- a/contrib/tools/python3/Python/import.c +++ b/contrib/tools/python3/Python/import.c @@ -1,7 +1,7 @@ /* Module definition and import implementation */ #include "Python.h" - +#include "pycore_ceval.h" #include "pycore_hashtable.h" // _Py_hashtable_new_full() #include "pycore_import.h" // _PyImport_BootstrapImp() #include "pycore_initconfig.h" // _PyStatus_OK() @@ -14,17 +14,17 @@ #include "pycore_pymem.h" // _PyMem_SetDefaultAllocator() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_time.h" // _PyTime_AsMicroseconds() +#include "pycore_weakref.h" // _PyWeakref_GET_REF() + #include "marshal.h" // PyMarshal_ReadObjectFromString() -#include "importdl.h" // _PyImport_DynLoadFiletab +#include "pycore_importdl.h" // _PyImport_DynLoadFiletab #include "pydtrace.h" // PyDTrace_IMPORT_FIND_LOAD_START_ENABLED() #include <stdbool.h> // bool #ifdef HAVE_FCNTL_H #include <fcntl.h> #endif -#ifdef __cplusplus -extern "C" { -#endif /*[clinic input] @@ -35,6 +35,17 @@ module _imp #include "clinic/import.c.h" +#ifndef NDEBUG +static bool +is_interpreter_isolated(PyInterpreterState *interp) +{ + return !_Py_IsMainInterpreter(interp) + && !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) + && interp->ceval.own_gil; +} +#endif + + /*******************************/ /* process-global import state */ /*******************************/ @@ -83,11 +94,7 @@ static struct _inittab *inittab_copy = NULL; (interp)->imports.import_func #define IMPORT_LOCK(interp) \ - (interp)->imports.lock.mutex -#define IMPORT_LOCK_THREAD(interp) \ - (interp)->imports.lock.thread -#define IMPORT_LOCK_LEVEL(interp) \ - (interp)->imports.lock.level + (interp)->imports.lock #define FIND_AND_LOAD(interp) \ (interp)->imports.find_and_load @@ -104,74 +111,21 @@ static struct _inittab *inittab_copy = NULL; void _PyImport_AcquireLock(PyInterpreterState *interp) { - unsigned long me = PyThread_get_thread_ident(); - if (me == PYTHREAD_INVALID_THREAD_ID) - return; /* Too bad */ - if (IMPORT_LOCK(interp) == NULL) { - IMPORT_LOCK(interp) = PyThread_allocate_lock(); - if (IMPORT_LOCK(interp) == NULL) - return; /* Nothing much we can do. */ - } - if (IMPORT_LOCK_THREAD(interp) == me) { - IMPORT_LOCK_LEVEL(interp)++; - return; - } - if (IMPORT_LOCK_THREAD(interp) != PYTHREAD_INVALID_THREAD_ID || - !PyThread_acquire_lock(IMPORT_LOCK(interp), 0)) - { - PyThreadState *tstate = PyEval_SaveThread(); - PyThread_acquire_lock(IMPORT_LOCK(interp), WAIT_LOCK); - PyEval_RestoreThread(tstate); - } - assert(IMPORT_LOCK_LEVEL(interp) == 0); - IMPORT_LOCK_THREAD(interp) = me; - IMPORT_LOCK_LEVEL(interp) = 1; + _PyRecursiveMutex_Lock(&IMPORT_LOCK(interp)); } -int +void _PyImport_ReleaseLock(PyInterpreterState *interp) { - unsigned long me = PyThread_get_thread_ident(); - if (me == PYTHREAD_INVALID_THREAD_ID || IMPORT_LOCK(interp) == NULL) - return 0; /* Too bad */ - if (IMPORT_LOCK_THREAD(interp) != me) - return -1; - IMPORT_LOCK_LEVEL(interp)--; - assert(IMPORT_LOCK_LEVEL(interp) >= 0); - if (IMPORT_LOCK_LEVEL(interp) == 0) { - IMPORT_LOCK_THREAD(interp) = PYTHREAD_INVALID_THREAD_ID; - PyThread_release_lock(IMPORT_LOCK(interp)); - } - return 1; + _PyRecursiveMutex_Unlock(&IMPORT_LOCK(interp)); } -#ifdef HAVE_FORK -/* This function is called from PyOS_AfterFork_Child() to ensure that newly - created child processes do not share locks with the parent. - We now acquire the import lock around fork() calls but on some platforms - (Solaris 9 and earlier? see isue7242) that still left us with problems. */ -PyStatus +void _PyImport_ReInitLock(PyInterpreterState *interp) { - if (IMPORT_LOCK(interp) != NULL) { - if (_PyThread_at_fork_reinit(&IMPORT_LOCK(interp)) < 0) { - return _PyStatus_ERR("failed to create a new lock"); - } - } - - if (IMPORT_LOCK_LEVEL(interp) > 1) { - /* Forked as a side effect of import */ - unsigned long me = PyThread_get_thread_ident(); - PyThread_acquire_lock(IMPORT_LOCK(interp), WAIT_LOCK); - IMPORT_LOCK_THREAD(interp) = me; - IMPORT_LOCK_LEVEL(interp)--; - } else { - IMPORT_LOCK_THREAD(interp) = PYTHREAD_INVALID_THREAD_ID; - IMPORT_LOCK_LEVEL(interp) = 0; - } - return _PyStatus_OK(); + // gh-126688: Thread id may change after fork() on some operating systems. + IMPORT_LOCK(interp).thread = PyThread_get_thread_ident_ex(); } -#endif /***************/ @@ -201,65 +155,60 @@ _PyImport_ClearModules(PyInterpreterState *interp) Py_SETREF(MODULES(interp), NULL); } -PyObject * -PyImport_GetModuleDict(void) +static inline PyObject * +get_modules_dict(PyThreadState *tstate, bool fatal) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (MODULES(interp) == NULL) { - Py_FatalError("interpreter has no modules dictionary"); + /* Technically, it would make sense to incref the dict, + * since sys.modules could be swapped out and decref'ed to 0 + * before the caller is done using it. However, that is highly + * unlikely, especially since we can rely on a global lock + * (i.e. the GIL) for thread-safety. */ + PyObject *modules = MODULES(tstate->interp); + if (modules == NULL) { + if (fatal) { + Py_FatalError("interpreter has no modules dictionary"); + } + _PyErr_SetString(tstate, PyExc_RuntimeError, + "unable to get sys.modules"); + return NULL; } - return MODULES(interp); + return modules; } -// This is only kept around for extensions that use _Py_IDENTIFIER. PyObject * -_PyImport_GetModuleId(_Py_Identifier *nameid) +PyImport_GetModuleDict(void) { - PyObject *name = _PyUnicode_FromId(nameid); /* borrowed */ - if (name == NULL) { - return NULL; - } - return PyImport_GetModule(name); + PyThreadState *tstate = _PyThreadState_GET(); + return get_modules_dict(tstate, true); } int _PyImport_SetModule(PyObject *name, PyObject *m) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *modules = MODULES(interp); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *modules = get_modules_dict(tstate, true); return PyObject_SetItem(modules, name, m); } int _PyImport_SetModuleString(const char *name, PyObject *m) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyObject *modules = MODULES(interp); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *modules = get_modules_dict(tstate, true); return PyMapping_SetItemString(modules, name, m); } static PyObject * import_get_module(PyThreadState *tstate, PyObject *name) { - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, false); if (modules == NULL) { - _PyErr_SetString(tstate, PyExc_RuntimeError, - "unable to get sys.modules"); return NULL; } PyObject *m; Py_INCREF(modules); - if (PyDict_CheckExact(modules)) { - m = PyDict_GetItemWithError(modules, name); /* borrowed */ - Py_XINCREF(m); - } - else { - m = PyObject_GetItem(modules, name); - if (m == NULL && _PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { - _PyErr_Clear(tstate); - } - } + (void)PyMapping_GetOptionalItem(modules, name, &m); Py_DECREF(modules); return m; } @@ -274,18 +223,21 @@ import_ensure_initialized(PyInterpreterState *interp, PyObject *mod, PyObject *n NOTE: because of this, initializing must be set *before* stuffing the new module in sys.modules. */ - spec = PyObject_GetAttr(mod, &_Py_ID(__spec__)); - int busy = _PyModuleSpec_IsInitializing(spec); - Py_XDECREF(spec); - if (busy) { - /* Wait until module is done importing. */ - PyObject *value = _PyObject_CallMethodOneArg( - IMPORTLIB(interp), &_Py_ID(_lock_unlock_module), name); - if (value == NULL) { - return -1; - } - Py_DECREF(value); + int rc = PyObject_GetOptionalAttr(mod, &_Py_ID(__spec__), &spec); + if (rc > 0) { + rc = _PyModuleSpec_IsInitializing(spec); + Py_DECREF(spec); + } + if (rc <= 0) { + return rc; + } + /* Wait until module is done importing. */ + PyObject *value = PyObject_CallMethodOneArg( + IMPORTLIB(interp), &_Py_ID(_lock_unlock_module), name); + if (value == NULL) { + return -1; } + Py_DECREF(value); return 0; } @@ -315,26 +267,13 @@ PyImport_GetModule(PyObject *name) static PyObject * import_add_module(PyThreadState *tstate, PyObject *name) { - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, false); if (modules == NULL) { - _PyErr_SetString(tstate, PyExc_RuntimeError, - "no import module dictionary"); return NULL; } PyObject *m; - if (PyDict_CheckExact(modules)) { - m = Py_XNewRef(PyDict_GetItemWithError(modules, name)); - } - else { - m = PyObject_GetItem(modules, name); - // For backward-compatibility we copy the behavior - // of PyDict_GetItemWithError(). - if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { - _PyErr_Clear(tstate); - } - } - if (_PyErr_Occurred(tstate)) { + if (PyMapping_GetOptionalItem(modules, name, &m) < 0) { return NULL; } if (m != NULL && PyModule_Check(m)) { @@ -353,18 +292,51 @@ import_add_module(PyThreadState *tstate, PyObject *name) } PyObject * +PyImport_AddModuleRef(const char *name) +{ + PyObject *name_obj = PyUnicode_FromString(name); + if (name_obj == NULL) { + return NULL; + } + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *module = import_add_module(tstate, name_obj); + Py_DECREF(name_obj); + return module; +} + + +PyObject * PyImport_AddModuleObject(PyObject *name) { PyThreadState *tstate = _PyThreadState_GET(); PyObject *mod = import_add_module(tstate, name); - if (mod) { - PyObject *ref = PyWeakref_NewRef(mod, NULL); - Py_DECREF(mod); - if (ref == NULL) { - return NULL; - } - mod = PyWeakref_GetObject(ref); - Py_DECREF(ref); + if (!mod) { + return NULL; + } + + // gh-86160: PyImport_AddModuleObject() returns a borrowed reference. + // Create a weak reference to produce a borrowed reference, since it can + // become NULL. sys.modules type can be different than dict and it is not + // guaranteed that it keeps a strong reference to the module. It can be a + // custom mapping with __getitem__() which returns a new object or removes + // returned object, or __setitem__ which does nothing. There is so much + // unknown. With weakref we can be sure that we get either a reference to + // live object or NULL. + // + // Use PyImport_AddModuleRef() to avoid these issues. + PyObject *ref = PyWeakref_NewRef(mod, NULL); + Py_DECREF(mod); + if (ref == NULL) { + return NULL; + } + mod = _PyWeakref_GET_REF(ref); + Py_DECREF(ref); + Py_XDECREF(mod); + + if (mod == NULL && !PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, + "sys.modules does not hold a strong reference " + "to the module"); } return mod; /* borrowed reference */ } @@ -393,10 +365,10 @@ remove_module(PyThreadState *tstate, PyObject *name) { PyObject *exc = _PyErr_GetRaisedException(tstate); - PyObject *modules = MODULES(tstate->interp); + PyObject *modules = get_modules_dict(tstate, true); if (PyDict_CheckExact(modules)) { - PyObject *mod = _PyDict_Pop(modules, name, Py_None); - Py_XDECREF(mod); + // Error is reported to the caller + (void)PyDict_Pop(modules, name, NULL); } else if (PyMapping_DelItem(modules, name) < 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_KeyError)) { @@ -415,32 +387,65 @@ remove_module(PyThreadState *tstate, PyObject *name) Py_ssize_t _PyImport_GetNextModuleIndex(void) { - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); - LAST_MODULE_INDEX++; - Py_ssize_t index = LAST_MODULE_INDEX; - PyThread_release_lock(EXTENSIONS.mutex); + return _Py_atomic_add_ssize(&LAST_MODULE_INDEX, 1) + 1; +} + +#ifndef NDEBUG +struct extensions_cache_value; +static struct extensions_cache_value * _find_cached_def(PyModuleDef *); +static Py_ssize_t _get_cached_module_index(struct extensions_cache_value *); +#endif + +static Py_ssize_t +_get_module_index_from_def(PyModuleDef *def) +{ + Py_ssize_t index = def->m_base.m_index; +#ifndef NDEBUG + struct extensions_cache_value *cached = _find_cached_def(def); + assert(cached == NULL || index == _get_cached_module_index(cached)); +#endif return index; } +static void +_set_module_index(PyModuleDef *def, Py_ssize_t index) +{ + assert(index > 0); + if (index == def->m_base.m_index) { + /* There's nothing to do. */ + } + else if (def->m_base.m_index == 0) { + /* It should have been initialized by PyModuleDef_Init(). + * We assert here to catch this in dev, but keep going otherwise. */ + assert(def->m_base.m_index != 0); + def->m_base.m_index = index; + } + else { + /* It was already set for a different module. + * We replace the old value. */ + assert(def->m_base.m_index > 0); + def->m_base.m_index = index; + } +} + static const char * _modules_by_index_check(PyInterpreterState *interp, Py_ssize_t index) { - if (index == 0) { + if (index <= 0) { return "invalid module index"; } if (MODULES_BY_INDEX(interp) == NULL) { return "Interpreters module-list not accessible."; } - if (index > PyList_GET_SIZE(MODULES_BY_INDEX(interp))) { + if (index >= PyList_GET_SIZE(MODULES_BY_INDEX(interp))) { return "Module index out of bounds."; } return NULL; } static PyObject * -_modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_get(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; if (_modules_by_index_check(interp, index) != NULL) { return NULL; } @@ -450,11 +455,9 @@ _modules_by_index_get(PyInterpreterState *interp, PyModuleDef *def) static int _modules_by_index_set(PyInterpreterState *interp, - PyModuleDef *def, PyObject *module) + Py_ssize_t index, PyObject *module) { - assert(def != NULL); - assert(def->m_slots == NULL); - assert(def->m_base.m_index > 0); + assert(index > 0); if (MODULES_BY_INDEX(interp) == NULL) { MODULES_BY_INDEX(interp) = PyList_New(0); @@ -463,7 +466,6 @@ _modules_by_index_set(PyInterpreterState *interp, } } - Py_ssize_t index = def->m_base.m_index; while (PyList_GET_SIZE(MODULES_BY_INDEX(interp)) <= index) { if (PyList_Append(MODULES_BY_INDEX(interp), Py_None) < 0) { return -1; @@ -474,9 +476,8 @@ _modules_by_index_set(PyInterpreterState *interp, } static int -_modules_by_index_clear_one(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_clear_one(PyInterpreterState *interp, Py_ssize_t index) { - Py_ssize_t index = def->m_base.m_index; const char *err = _modules_by_index_check(interp, index); if (err != NULL) { Py_FatalError(err); @@ -493,7 +494,8 @@ PyState_FindModule(PyModuleDef* module) if (module->m_slots) { return NULL; } - return _modules_by_index_get(interp, module); + Py_ssize_t index = _get_module_index_from_def(module); + return _modules_by_index_get(interp, index); } /* _PyState_AddModule() has been completely removed from the C-API @@ -513,7 +515,9 @@ _PyState_AddModule(PyThreadState *tstate, PyObject* module, PyModuleDef* def) "PyState_AddModule called on module with slots"); return -1; } - return _modules_by_index_set(tstate->interp, def, module); + assert(def->m_slots == NULL); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_set(tstate->interp, index, module); } int @@ -533,7 +537,7 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) } PyInterpreterState *interp = tstate->interp; - Py_ssize_t index = def->m_base.m_index; + Py_ssize_t index = _get_module_index_from_def(def); if (MODULES_BY_INDEX(interp) && index < PyList_GET_SIZE(MODULES_BY_INDEX(interp)) && module == PyList_GET_ITEM(MODULES_BY_INDEX(interp), index)) @@ -542,7 +546,8 @@ PyState_AddModule(PyObject* module, PyModuleDef* def) return -1; } - return _modules_by_index_set(interp, def, module); + assert(def->m_slots == NULL); + return _modules_by_index_set(interp, index, module); } int @@ -555,7 +560,8 @@ PyState_RemoveModule(PyModuleDef* def) "PyState_RemoveModule called on module with slots"); return -1; } - return _modules_by_index_clear_one(tstate->interp, def); + Py_ssize_t index = _get_module_index_from_def(def); + return _modules_by_index_clear_one(tstate->interp, index); } @@ -574,6 +580,8 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) /* cleanup the saved copy of module dicts */ PyModuleDef *md = PyModule_GetDef(m); if (md) { + // XXX Do this more carefully. The dict might be owned + // by another interpreter. Py_CLEAR(md->m_base.m_copy); } } @@ -584,7 +592,7 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) if (PyList_SetSlice(MODULES_BY_INDEX(interp), 0, PyList_GET_SIZE(MODULES_BY_INDEX(interp)), NULL)) { - PyErr_WriteUnraisable(MODULES_BY_INDEX(interp)); + PyErr_FormatUnraisable("Exception ignored on clearing interpreters module list"); } } @@ -598,7 +606,7 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) when an extension is loaded. This includes when it is imported for the first time. - Here's a summary, using importlib._boostrap._load() as a starting point. + Here's a summary, using importlib._bootstrap._load() as a starting point. 1. importlib._bootstrap._load() 2. _load(): acquire import lock @@ -618,77 +626,100 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) ...for single-phase init modules, where m_size == -1: (6). first time (not found in _PyRuntime.imports.extensions): - 1. _imp_create_dynamic_impl() -> import_find_extension() - 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() - 3. _PyImport_LoadDynamicModuleWithSpec(): load <module init func> - 4. _PyImport_LoadDynamicModuleWithSpec(): call <module init func> - 5. <module init func> -> PyModule_Create() -> PyModule_Create2() -> PyModule_CreateInitialized() - 6. PyModule_CreateInitialized() -> PyModule_New() - 7. PyModule_CreateInitialized(): allocate mod->md_state - 8. PyModule_CreateInitialized() -> PyModule_AddFunctions() - 9. PyModule_CreateInitialized() -> PyModule_SetDocString() - 10. PyModule_CreateInitialized(): set mod->md_def - 11. <module init func>: initialize the module - 12. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() - 13. _PyImport_LoadDynamicModuleWithSpec(): set def->m_base.m_init - 14. _PyImport_LoadDynamicModuleWithSpec(): set __file__ - 15. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_FixupExtensionObject() - 16. _PyImport_FixupExtensionObject(): add it to interp->imports.modules_by_index - 17. _PyImport_FixupExtensionObject(): copy __dict__ into def->m_base.m_copy - 18. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + A. _imp_create_dynamic_impl() -> import_find_extension() + B. _imp_create_dynamic_impl() -> _PyImport_GetModInitFunc() + C. _PyImport_GetModInitFunc(): load <module init func> + D. _imp_create_dynamic_impl() -> import_run_extension() + E. import_run_extension() -> _PyImport_RunModInitFunc() + F. _PyImport_RunModInitFunc(): call <module init func> + G. <module init func> -> PyModule_Create() -> PyModule_Create2() + -> PyModule_CreateInitialized() + H. PyModule_CreateInitialized() -> PyModule_New() + I. PyModule_CreateInitialized(): allocate mod->md_state + J. PyModule_CreateInitialized() -> PyModule_AddFunctions() + K. PyModule_CreateInitialized() -> PyModule_SetDocString() + L. PyModule_CreateInitialized(): set mod->md_def + M. <module init func>: initialize the module, etc. + N. import_run_extension() + -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + O. import_run_extension(): set __file__ + P. import_run_extension() -> update_global_state_for_extension() + Q. update_global_state_for_extension(): + copy __dict__ into def->m_base.m_copy + R. update_global_state_for_extension(): + add it to _PyRuntime.imports.extensions + S. import_run_extension() -> finish_singlephase_extension() + T. finish_singlephase_extension(): + add it to interp->imports.modules_by_index + U. finish_singlephase_extension(): add it to sys.modules + + Step (Q) is skipped for core modules (sys/builtins). (6). subsequent times (found in _PyRuntime.imports.extensions): - 1. _imp_create_dynamic_impl() -> import_find_extension() - 2. import_find_extension() -> import_add_module() - 3. if name in sys.modules: use that module - 4. else: - 1. import_add_module() -> PyModule_NewObject() - 2. import_add_module(): set it on sys.modules - 5. import_find_extension(): copy the "m_copy" dict into __dict__ - 6. _imp_create_dynamic_impl() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + A. _imp_create_dynamic_impl() -> import_find_extension() + B. import_find_extension() -> reload_singlephase_extension() + C. reload_singlephase_extension() + -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + D. reload_singlephase_extension() -> import_add_module() + E. if name in sys.modules: use that module + F. else: + 1. import_add_module() -> PyModule_NewObject() + 2. import_add_module(): set it on sys.modules + G. reload_singlephase_extension(): copy the "m_copy" dict into __dict__ + H. reload_singlephase_extension(): add to modules_by_index (10). (every time): - 1. noop + A. noop ...for single-phase init modules, where m_size >= 0: (6). not main interpreter and never loaded there - every time (not found in _PyRuntime.imports.extensions): - 1-16. (same as for m_size == -1) + A-P. (same as for m_size == -1) + Q. _PyImport_RunModInitFunc(): set def->m_base.m_init + R. (skipped) + S-U. (same as for m_size == -1) (6). main interpreter - first time (not found in _PyRuntime.imports.extensions): - 1-16. (same as for m_size == -1) - 17. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + A-P. (same as for m_size == -1) + Q. _PyImport_RunModInitFunc(): set def->m_base.m_init + R-U. (same as for m_size == -1) - (6). previously loaded in main interpreter (found in _PyRuntime.imports.extensions): - 1. _imp_create_dynamic_impl() -> import_find_extension() - 2. import_find_extension(): call def->m_base.m_init - 3. import_find_extension(): add the module to sys.modules + (6). subsequent times (found in _PyRuntime.imports.extensions): + A. _imp_create_dynamic_impl() -> import_find_extension() + B. import_find_extension() -> reload_singlephase_extension() + C. reload_singlephase_extension() + -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + D. reload_singlephase_extension(): call def->m_base.m_init (see above) + E. reload_singlephase_extension(): add the module to sys.modules + F. reload_singlephase_extension(): add to modules_by_index (10). every time: - 1. noop + A. noop ...for multi-phase init modules: (6). every time: - 1. _imp_create_dynamic_impl() -> import_find_extension() (not found) - 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() - 3. _PyImport_LoadDynamicModuleWithSpec(): load module init func - 4. _PyImport_LoadDynamicModuleWithSpec(): call module init func - 5. _PyImport_LoadDynamicModuleWithSpec() -> PyModule_FromDefAndSpec() - 6. PyModule_FromDefAndSpec(): gather/check moduledef slots - 7. if there's a Py_mod_create slot: + A. _imp_create_dynamic_impl() -> import_find_extension() (not found) + B. _imp_create_dynamic_impl() -> _PyImport_GetModInitFunc() + C. _PyImport_GetModInitFunc(): load <module init func> + D. _imp_create_dynamic_impl() -> import_run_extension() + E. import_run_extension() -> _PyImport_RunModInitFunc() + F. _PyImport_RunModInitFunc(): call <module init func> + G. import_run_extension() -> PyModule_FromDefAndSpec() + H. PyModule_FromDefAndSpec(): gather/check moduledef slots + I. if there's a Py_mod_create slot: 1. PyModule_FromDefAndSpec(): call its function - 8. else: + J. else: 1. PyModule_FromDefAndSpec() -> PyModule_NewObject() - 9: PyModule_FromDefAndSpec(): set mod->md_def - 10. PyModule_FromDefAndSpec() -> _add_methods_to_object() - 11. PyModule_FromDefAndSpec() -> PyModule_SetDocString() + K: PyModule_FromDefAndSpec(): set mod->md_def + L. PyModule_FromDefAndSpec() -> _add_methods_to_object() + M. PyModule_FromDefAndSpec() -> PyModule_SetDocString() (10). every time: - 1. _imp_exec_dynamic_impl() -> exec_builtin_or_dynamic() - 2. if mod->md_state == NULL (including if m_size == 0): + A. _imp_exec_dynamic_impl() -> exec_builtin_or_dynamic() + B. if mod->md_state == NULL (including if m_size == 0): 1. exec_builtin_or_dynamic() -> PyModule_ExecDef() 2. PyModule_ExecDef(): allocate mod->md_state 3. if there's a Py_mod_exec slot: @@ -716,7 +747,7 @@ const char * _PyImport_ResolveNameWithPackageContext(const char *name) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif if (PKGCONTEXT != NULL) { const char *p = strrchr(PKGCONTEXT, '.'); @@ -726,7 +757,7 @@ _PyImport_ResolveNameWithPackageContext(const char *name) } } #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return name; } @@ -735,12 +766,12 @@ const char * _PyImport_SwapPackageContext(const char *newcontext) { #ifndef HAVE_THREAD_LOCAL - PyThread_acquire_lock(EXTENSIONS.mutex, WAIT_LOCK); + PyMutex_Lock(&EXTENSIONS.mutex); #endif const char *oldcontext = PKGCONTEXT; PKGCONTEXT = newcontext; #ifndef HAVE_THREAD_LOCAL - PyThread_release_lock(EXTENSIONS.mutex); + PyMutex_Unlock(&EXTENSIONS.mutex); #endif return oldcontext; } @@ -790,6 +821,8 @@ static int clear_singlephase_extension(PyInterpreterState *interp, // Currently, this is only used for testing. // (See _testinternalcapi.clear_extension().) +// If adding another use, be careful about modules that import themselves +// recursively (see gh-123880). int _PyImport_ClearExtension(PyObject *name, PyObject *filename) { @@ -807,16 +840,6 @@ _PyImport_ClearExtension(PyObject *name, PyObject *filename) } -/*******************/ - -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) -#error #include <emscripten.h> -EM_JS(PyObject*, _PyImport_InitFunc_TrampolineCall, (PyModInitFunction func), { - return wasmTable.get(func)(); -}); -#endif // __EMSCRIPTEN__ && PY_CALL_TRAMPOLINE - - /*****************************/ /* single-phase init modules */ /*****************************/ @@ -889,22 +912,23 @@ gets even messier. static inline void extensions_lock_acquire(void) { - PyThread_acquire_lock(_PyRuntime.imports.extensions.mutex, WAIT_LOCK); + PyMutex_Lock(&_PyRuntime.imports.extensions.mutex); } static inline void extensions_lock_release(void) { - PyThread_release_lock(_PyRuntime.imports.extensions.mutex); + PyMutex_Unlock(&_PyRuntime.imports.extensions.mutex); } + /* Magic for extension modules (built-in as well as dynamically loaded). To prevent initializing an extension module more than once, we keep a static dictionary 'extensions' keyed by the tuple (module name, module name) (for built-in modules) or by (filename, module name) (for dynamically loaded modules), containing these modules. A copy of the module's dictionary is stored by calling - _PyImport_FixupExtensionObject() immediately after the module initialization + fix_up_extension() immediately after the module initialization function succeeds. A copy can be retrieved from there by calling import_find_extension(). @@ -914,6 +938,220 @@ extensions_lock_release(void) dictionary, to avoid loading shared libraries twice. */ +typedef struct cached_m_dict { + /* A shallow copy of the original module's __dict__. */ + PyObject *copied; + /* The interpreter that owns the copy. */ + int64_t interpid; +} *cached_m_dict_t; + +struct extensions_cache_value { + PyModuleDef *def; + + /* The function used to re-initialize the module. + This is only set for legacy (single-phase init) extension modules + and only used for those that support multiple initializations + (m_size >= 0). + It is set by update_global_state_for_extension(). */ + PyModInitFunction m_init; + + /* The module's index into its interpreter's modules_by_index cache. + This is set for all extension modules but only used for legacy ones. + (See PyInterpreterState.modules_by_index for more info.) */ + Py_ssize_t m_index; + + /* A copy of the module's __dict__ after the first time it was loaded. + This is only set/used for legacy modules that do not support + multiple initializations. + It is set exclusively by fixup_cached_def(). */ + cached_m_dict_t m_dict; + struct cached_m_dict _m_dict; + + _Py_ext_module_origin origin; + +#ifdef Py_GIL_DISABLED + /* The module's md_gil slot, for legacy modules that are reinitialized from + m_dict rather than calling their initialization function again. */ + void *md_gil; +#endif +}; + +static struct extensions_cache_value * +alloc_extensions_cache_value(void) +{ + struct extensions_cache_value *value + = PyMem_RawMalloc(sizeof(struct extensions_cache_value)); + if (value == NULL) { + PyErr_NoMemory(); + return NULL; + } + *value = (struct extensions_cache_value){0}; + return value; +} + +static void +free_extensions_cache_value(struct extensions_cache_value *value) +{ + PyMem_RawFree(value); +} + +static Py_ssize_t +_get_cached_module_index(struct extensions_cache_value *cached) +{ + assert(cached->m_index > 0); + return cached->m_index; +} + +static void +fixup_cached_def(struct extensions_cache_value *value) +{ + /* For the moment, the values in the def's m_base may belong + * to another module, and we're replacing them here. This can + * cause problems later if the old module is reloaded. + * + * Also, we don't decref any old cached values first when we + * replace them here, in case we need to restore them in the + * near future. Instead, the caller is responsible for wrapping + * this up by calling cleanup_old_cached_def() or + * restore_old_cached_def() if there was an error. */ + PyModuleDef *def = value->def; + assert(def != NULL); + + /* We assume that all module defs are statically allocated + and will never be freed. Otherwise, we would incref here. */ + _Py_SetImmortalUntracked((PyObject *)def); + + def->m_base.m_init = value->m_init; + + assert(value->m_index > 0); + _set_module_index(def, value->m_index); + + /* Different modules can share the same def, so we can't just + * expect m_copy to be NULL. */ + assert(def->m_base.m_copy == NULL + || def->m_base.m_init == NULL + || value->m_dict != NULL); + if (value->m_dict != NULL) { + assert(value->m_dict->copied != NULL); + /* As noted above, we don't first decref the old value, if any. */ + def->m_base.m_copy = Py_NewRef(value->m_dict->copied); + } +} + +static void +restore_old_cached_def(PyModuleDef *def, PyModuleDef_Base *oldbase) +{ + def->m_base = *oldbase; +} + +static void +cleanup_old_cached_def(PyModuleDef_Base *oldbase) +{ + Py_XDECREF(oldbase->m_copy); +} + +static void +del_cached_def(struct extensions_cache_value *value) +{ + /* If we hadn't made the stored defs immortal, we would decref here. + However, this decref would be problematic if the module def were + dynamically allocated, it were the last ref, and this function + were called with an interpreter other than the def's owner. */ + assert(value->def == NULL || _Py_IsImmortalLoose(value->def)); + + Py_XDECREF(value->def->m_base.m_copy); + value->def->m_base.m_copy = NULL; +} + +static int +init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) +{ + assert(value != NULL); + /* This should only have been called without an m_dict already set. */ + assert(value->m_dict == NULL); + if (m_dict == NULL) { + return 0; + } + assert(PyDict_Check(m_dict)); + assert(value->origin != _Py_ext_module_origin_CORE); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!is_interpreter_isolated(interp)); + + /* XXX gh-88216: The copied dict is owned by the current + * interpreter. That's a problem if the interpreter has + * its own obmalloc state or if the module is successfully + * imported into such an interpreter. If the interpreter + * has its own GIL then there may be data races and + * PyImport_ClearModulesByIndex() can crash. Normally, + * a single-phase init module cannot be imported in an + * isolated interpreter, but there are ways around that. + * Hence, heere be dragons! Ideally we would instead do + * something like make a read-only, immortal copy of the + * dict using PyMem_RawMalloc() and store *that* in m_copy. + * Then we'd need to make sure to clear that when the + * runtime is finalized, rather than in + * PyImport_ClearModulesByIndex(). */ + PyObject *copied = PyDict_Copy(m_dict); + if (copied == NULL) { + /* We expect this can only be "out of memory". */ + return -1; + } + // XXX We may want to make the copy immortal. + + value->_m_dict = (struct cached_m_dict){ + .copied=copied, + .interpid=PyInterpreterState_GetID(interp), + }; + + value->m_dict = &value->_m_dict; + return 0; +} + +static void +del_cached_m_dict(struct extensions_cache_value *value) +{ + if (value->m_dict != NULL) { + assert(value->m_dict == &value->_m_dict); + assert(value->m_dict->copied != NULL); + /* In the future we can take advantage of m_dict->interpid + * to decref the dict using the owning interpreter. */ + Py_XDECREF(value->m_dict->copied); + value->m_dict = NULL; + } +} + +static PyObject * get_core_module_dict( + PyInterpreterState *interp, PyObject *name, PyObject *path); + +static PyObject * +get_cached_m_dict(struct extensions_cache_value *value, + PyObject *name, PyObject *path) +{ + assert(value != NULL); + PyInterpreterState *interp = _PyInterpreterState_GET(); + /* It might be a core module (e.g. sys & builtins), + for which we don't cache m_dict. */ + if (value->origin == _Py_ext_module_origin_CORE) { + return get_core_module_dict(interp, name, path); + } + assert(value->def != NULL); + // XXX Switch to value->m_dict. + PyObject *m_dict = value->def->m_base.m_copy; + Py_XINCREF(m_dict); + return m_dict; +} + +static void +del_extensions_cache_value(struct extensions_cache_value *value) +{ + if (value != NULL) { + del_cached_m_dict(value); + del_cached_def(value); + free_extensions_cache_value(value); + } +} + static void * hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) { @@ -929,6 +1167,7 @@ hashtable_key_from_2_strings(PyObject *str1, PyObject *str2, const char sep) assert(SIZE_MAX - str1_len - str2_len > 2); size_t size = str1_len + 1 + str2_len + 1; + // XXX Use a buffer if it's a temp value (every case but "set"). char *key = PyMem_RawMalloc(size); if (key == NULL) { PyErr_NoMemory(); @@ -960,102 +1199,225 @@ hashtable_destroy_str(void *ptr) PyMem_RawFree(ptr); } -#define HTSEP ':' +#ifndef NDEBUG +struct hashtable_next_match_def_data { + PyModuleDef *def; + struct extensions_cache_value *matched; +}; -static PyModuleDef * -_extensions_cache_get(PyObject *filename, PyObject *name) +static int +hashtable_next_match_def(_Py_hashtable_t *ht, + const void *key, const void *value, void *user_data) { - PyModuleDef *def = NULL; - void *key = NULL; - extensions_lock_acquire(); + if (value == NULL) { + /* It was previously deleted. */ + return 0; + } + struct hashtable_next_match_def_data *data + = (struct hashtable_next_match_def_data *)user_data; + struct extensions_cache_value *cur + = (struct extensions_cache_value *)value; + if (cur->def == data->def) { + data->matched = cur; + return 1; + } + return 0; +} + +static struct extensions_cache_value * +_find_cached_def(PyModuleDef *def) +{ + struct hashtable_next_match_def_data data = {0}; + (void)_Py_hashtable_foreach( + EXTENSIONS.hashtable, hashtable_next_match_def, &data); + return data.matched; +} +#endif + +#define HTSEP ':' +static int +_extensions_cache_init(void) +{ + _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; + EXTENSIONS.hashtable = _Py_hashtable_new_full( + hashtable_hash_str, + hashtable_compare_str, + hashtable_destroy_str, // key + (_Py_hashtable_destroy_func)del_extensions_cache_value, // value + &alloc + ); if (EXTENSIONS.hashtable == NULL) { - goto finally; + PyErr_NoMemory(); + return -1; } + return 0; +} - key = hashtable_key_from_2_strings(filename, name, HTSEP); +static _Py_hashtable_entry_t * +_extensions_cache_find_unlocked(PyObject *path, PyObject *name, + void **p_key) +{ + if (EXTENSIONS.hashtable == NULL) { + return NULL; + } + void *key = hashtable_key_from_2_strings(path, name, HTSEP); if (key == NULL) { - goto finally; + return NULL; + } + _Py_hashtable_entry_t *entry = + _Py_hashtable_get_entry(EXTENSIONS.hashtable, key); + if (p_key != NULL) { + *p_key = key; + } + else { + hashtable_destroy_str(key); } - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + return entry; +} + +/* This can only fail with "out of memory". */ +static struct extensions_cache_value * +_extensions_cache_get(PyObject *path, PyObject *name) +{ + struct extensions_cache_value *value = NULL; + extensions_lock_acquire(); + + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(path, name, NULL); if (entry == NULL) { + /* It was never added. */ goto finally; } - def = (PyModuleDef *)entry->value; + value = (struct extensions_cache_value *)entry->value; finally: extensions_lock_release(); - if (key != NULL) { - PyMem_RawFree(key); - } - return def; + return value; } -static int -_extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) +/* This can only fail with "out of memory". */ +static struct extensions_cache_value * +_extensions_cache_set(PyObject *path, PyObject *name, + PyModuleDef *def, PyModInitFunction m_init, + Py_ssize_t m_index, PyObject *m_dict, + _Py_ext_module_origin origin, void *md_gil) { - int res = -1; + struct extensions_cache_value *value = NULL; + void *key = NULL; + struct extensions_cache_value *newvalue = NULL; + PyModuleDef_Base olddefbase = def->m_base; + + assert(def != NULL); + assert(m_init == NULL || m_dict == NULL); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(def->m_base.m_init == NULL || def->m_base.m_init == m_init); + /* For now we don't worry about comparing value->m_copy. */ + assert(def->m_base.m_copy == NULL || m_dict != NULL); + assert((origin == _Py_ext_module_origin_DYNAMIC) == (name != path)); + assert(origin != _Py_ext_module_origin_CORE || m_dict == NULL); + extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { - _Py_hashtable_allocator_t alloc = {PyMem_RawMalloc, PyMem_RawFree}; - EXTENSIONS.hashtable = _Py_hashtable_new_full( - hashtable_hash_str, - hashtable_compare_str, - hashtable_destroy_str, // key - /* There's no need to decref the def since it's immortal. */ - NULL, // value - &alloc - ); - if (EXTENSIONS.hashtable == NULL) { - PyErr_NoMemory(); + if (_extensions_cache_init() < 0) { goto finally; } } - void *key = hashtable_key_from_2_strings(filename, name, HTSEP); - if (key == NULL) { + /* Create a cached value to populate for the module. */ + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(path, name, &key); + value = entry == NULL + ? NULL + : (struct extensions_cache_value *)entry->value; + if (value != NULL) { + /* gh-123880: If there's an existing cache value, it means a module is + * being imported recursively from its PyInit_* or Py_mod_* function. + * (That function presumably handles returning a partially + * constructed module in such a case.) + * We can reuse the existing cache value; it is owned by the cache. + * (Entries get removed from it in exceptional circumstances, + * after interpreter shutdown, and in runtime shutdown.) + */ + goto finally_oldvalue; + } + newvalue = alloc_extensions_cache_value(); + if (newvalue == NULL) { goto finally; } - int already_set = 0; - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + /* Populate the new cache value data. */ + *newvalue = (struct extensions_cache_value){ + .def=def, + .m_init=m_init, + .m_index=m_index, + /* m_dict is set by set_cached_m_dict(). */ + .origin=origin, +#ifdef Py_GIL_DISABLED + .md_gil=md_gil, +#endif + }; +#ifndef Py_GIL_DISABLED + (void)md_gil; +#endif + if (init_cached_m_dict(newvalue, m_dict) < 0) { + goto finally; + } + fixup_cached_def(newvalue); + if (entry == NULL) { - if (_Py_hashtable_set(EXTENSIONS.hashtable, key, def) < 0) { - PyMem_RawFree(key); + /* It was never added. */ + if (_Py_hashtable_set(EXTENSIONS.hashtable, key, newvalue) < 0) { PyErr_NoMemory(); goto finally; } + /* The hashtable owns the key now. */ + key = NULL; + } + else if (value == NULL) { + /* It was previously deleted. */ + entry->value = newvalue; } else { - if (entry->value == NULL) { - entry->value = def; - } - else { - /* We expect it to be static, so it must be the same pointer. */ - assert((PyModuleDef *)entry->value == def); - already_set = 1; + /* We are updating the entry for an existing module. */ + /* We expect def to be static, so it must be the same pointer. */ + assert(value->def == def); + /* We expect the same symbol to be used and the shared object file + * to have remained loaded, so it must be the same pointer. */ + assert(value->m_init == m_init); + /* The same module can't switch between caching __dict__ and not. */ + assert((value->m_dict == NULL) == (m_dict == NULL)); + /* This shouldn't ever happen. */ + Py_UNREACHABLE(); + } + + value = newvalue; + +finally: + if (value == NULL) { + restore_old_cached_def(def, &olddefbase); + if (newvalue != NULL) { + del_extensions_cache_value(newvalue); } - PyMem_RawFree(key); } - if (!already_set) { - /* We assume that all module defs are statically allocated - and will never be freed. Otherwise, we would incref here. */ - _Py_SetImmortal(def); + else { + cleanup_old_cached_def(&olddefbase); } - res = 0; -finally: +finally_oldvalue: extensions_lock_release(); - return res; + if (key != NULL) { + hashtable_destroy_str(key); + } + + return value; } static void -_extensions_cache_delete(PyObject *filename, PyObject *name) +_extensions_cache_delete(PyObject *path, PyObject *name) { - void *key = NULL; extensions_lock_acquire(); if (EXTENSIONS.hashtable == NULL) { @@ -1063,13 +1425,8 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) goto finally; } - key = hashtable_key_from_2_strings(filename, name, HTSEP); - if (key == NULL) { - goto finally; - } - - _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry( - EXTENSIONS.hashtable, key); + _Py_hashtable_entry_t *entry = + _extensions_cache_find_unlocked(path, name, NULL); if (entry == NULL) { /* It was never added. */ goto finally; @@ -1078,18 +1435,13 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) /* It was already removed. */ goto finally; } - /* If we hadn't made the stored defs immortal, we would decref here. - However, this decref would be problematic if the module def were - dynamically allocated, it were the last ref, and this function - were called with an interpreter other than the def's owner. */ - assert(_Py_IsImmortal(entry->value)); + struct extensions_cache_value *value = entry->value; entry->value = NULL; + del_extensions_cache_value(value); + finally: extensions_lock_release(); - if (key != NULL) { - PyMem_RawFree(key); - } } static void @@ -1124,7 +1476,7 @@ check_multi_interp_extensions(PyInterpreterState *interp) int _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name) { - PyInterpreterState *interp = _PyInterpreterState_Get(); + PyInterpreterState *interp = _PyInterpreterState_GET(); if (check_multi_interp_extensions(interp)) { assert(!_Py_IsMainInterpreter(interp)); PyErr_Format(PyExc_ImportError, @@ -1135,31 +1487,122 @@ _PyImport_CheckSubinterpIncompatibleExtensionAllowed(const char *name) return 0; } +#ifdef Py_GIL_DISABLED +int +_PyImport_CheckGILForModule(PyObject* module, PyObject *module_name) +{ + PyThreadState *tstate = _PyThreadState_GET(); + if (module == NULL) { + _PyEval_DisableGIL(tstate); + return 0; + } + + if (!PyModule_Check(module) || + ((PyModuleObject *)module)->md_gil == Py_MOD_GIL_USED) { + if (_PyEval_EnableGILPermanent(tstate)) { + int warn_result = PyErr_WarnFormat( + PyExc_RuntimeWarning, + 1, + "The global interpreter lock (GIL) has been enabled to load " + "module '%U', which has not declared that it can run safely " + "without the GIL. To override this behavior and keep the GIL " + "disabled (at your own risk), run with PYTHON_GIL=0 or -Xgil=0.", + module_name + ); + if (warn_result < 0) { + return warn_result; + } + } + + const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); + if (config->enable_gil == _PyConfig_GIL_DEFAULT && config->verbose) { + PySys_FormatStderr("# loading module '%U', which requires the GIL\n", + module_name); + } + } + else { + _PyEval_DisableGIL(tstate); + } + + return 0; +} +#endif + +static PyThreadState * +switch_to_main_interpreter(PyThreadState *tstate) +{ + if (_Py_IsMainInterpreter(tstate->interp)) { + return tstate; + } + PyThreadState *main_tstate = _PyThreadState_NewBound( + _PyInterpreterState_Main(), _PyThreadState_WHENCE_EXEC); + if (main_tstate == NULL) { + return NULL; + } +#ifndef NDEBUG + PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); + assert(old_tstate == tstate); +#else + (void)PyThreadState_Swap(main_tstate); +#endif + return main_tstate; +} + +static void +switch_back_from_main_interpreter(PyThreadState *tstate, + PyThreadState *main_tstate, + PyObject *tempobj) +{ + assert(main_tstate == PyThreadState_GET()); + assert(_Py_IsMainInterpreter(main_tstate->interp)); + assert(tstate->interp != main_tstate->interp); + + /* Handle any exceptions, which we cannot propagate directly + * to the subinterpreter. */ + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_MemoryError)) { + /* We trust it will be caught again soon. */ + PyErr_Clear(); + } + else { + /* Printing the exception should be sufficient. */ + PyErr_PrintEx(0); + } + } + + Py_XDECREF(tempobj); + + PyThreadState_Clear(main_tstate); + (void)PyThreadState_Swap(tstate); + PyThreadState_Delete(main_tstate); +} + static PyObject * get_core_module_dict(PyInterpreterState *interp, - PyObject *name, PyObject *filename) + PyObject *name, PyObject *path) { /* Only builtin modules are core. */ - if (filename == name) { + if (path == name) { assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "sys") == 0) { - return interp->sysdict_copy; + return Py_NewRef(interp->sysdict_copy); } assert(!PyErr_Occurred()); if (PyUnicode_CompareWithASCIIString(name, "builtins") == 0) { - return interp->builtins_copy; + return Py_NewRef(interp->builtins_copy); } assert(!PyErr_Occurred()); } return NULL; } +#ifndef NDEBUG static inline int -is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *filename) +is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *path) { /* This might be called before the core dict copies are in place, so we can't rely on get_core_module_dict() here. */ - if (filename == name) { + if (path == name) { if (PyUnicode_CompareWithASCIIString(name, "sys") == 0) { return 1; } @@ -1170,171 +1613,577 @@ is_core_module(PyInterpreterState *interp, PyObject *name, PyObject *filename) return 0; } -static int -fix_up_extension(PyObject *mod, PyObject *name, PyObject *filename) + +static _Py_ext_module_kind +_get_extension_kind(PyModuleDef *def, bool check_size) { - if (mod == NULL || !PyModule_Check(mod)) { - PyErr_BadInternalCall(); - return -1; + _Py_ext_module_kind kind; + if (def == NULL) { + /* It must be a module created by reload_singlephase_extension() + * from m_copy. Ideally we'd do away with this case. */ + kind = _Py_ext_module_kind_SINGLEPHASE; } - - struct PyModuleDef *def = PyModule_GetDef(mod); - if (!def) { - PyErr_BadInternalCall(); - return -1; + else if (def->m_slots != NULL) { + kind = _Py_ext_module_kind_MULTIPHASE; } - - PyThreadState *tstate = _PyThreadState_GET(); - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { - return -1; + else if (check_size && def->m_size == -1) { + kind = _Py_ext_module_kind_SINGLEPHASE; + } + else if (def->m_base.m_init != NULL) { + kind = _Py_ext_module_kind_SINGLEPHASE; } + else { + // This is probably single-phase init, but a multi-phase + // module *can* have NULL m_slots. + kind = _Py_ext_module_kind_UNKNOWN; + } + return kind; +} - // bpo-44050: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - if (!is_core_module(tstate->interp, name, filename)) { +/* The module might not be fully initialized yet + * and PyModule_FromDefAndSpec() checks m_size + * so we skip m_size. */ +#define assert_multiphase_def(def) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(def, false); \ + assert(kind == _Py_ext_module_kind_MULTIPHASE \ + /* m_slots can be NULL. */ \ + || kind == _Py_ext_module_kind_UNKNOWN); \ + } while (0) + +#define assert_singlephase_def(def) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(def, true); \ + assert(kind == _Py_ext_module_kind_SINGLEPHASE \ + || kind == _Py_ext_module_kind_UNKNOWN); \ + } while (0) + +#define assert_singlephase(cached) \ + do { \ + _Py_ext_module_kind kind = _get_extension_kind(cached->def, true); \ + assert(kind == _Py_ext_module_kind_SINGLEPHASE); \ + } while (0) + +#else /* defined(NDEBUG) */ +#define assert_multiphase_def(def) +#define assert_singlephase_def(def) +#define assert_singlephase(cached) +#endif + + +struct singlephase_global_update { + PyModInitFunction m_init; + Py_ssize_t m_index; + PyObject *m_dict; + _Py_ext_module_origin origin; + void *md_gil; +}; + +static struct extensions_cache_value * +update_global_state_for_extension(PyThreadState *tstate, + PyObject *path, PyObject *name, + PyModuleDef *def, + struct singlephase_global_update *singlephase) +{ + struct extensions_cache_value *cached = NULL; + PyModInitFunction m_init = NULL; + PyObject *m_dict = NULL; + + /* Set up for _extensions_cache_set(). */ + if (singlephase == NULL) { + assert(def->m_base.m_init == NULL); + assert(def->m_base.m_copy == NULL); + } + else { + if (singlephase->m_init != NULL) { + assert(singlephase->m_dict == NULL); + assert(def->m_base.m_copy == NULL); + assert(def->m_size >= 0); + /* Remember pointer to module init function. */ + // XXX If two modules share a def then def->m_base will + // reflect the last one added (here) to the global cache. + // We should prevent this somehow. The simplest solution + // is probably to store m_copy/m_init in the cache along + // with the def, rather than within the def. + m_init = singlephase->m_init; + } + else if (singlephase->m_dict == NULL) { + /* It must be a core builtin module. */ + assert(is_core_module(tstate->interp, name, path)); + assert(def->m_size == -1); + assert(def->m_base.m_copy == NULL); + assert(def->m_base.m_init == NULL); + } + else { + assert(PyDict_Check(singlephase->m_dict)); + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + assert(def->m_size == -1); + assert(!is_core_module(tstate->interp, name, path)); assert(PyUnicode_CompareWithASCIIString(name, "sys") != 0); assert(PyUnicode_CompareWithASCIIString(name, "builtins") != 0); - if (def->m_base.m_copy) { - /* Somebody already imported the module, - likely under a different name. - XXX this should really not happen. */ - Py_CLEAR(def->m_base.m_copy); - } - PyObject *dict = PyModule_GetDict(mod); - if (dict == NULL) { - return -1; - } - def->m_base.m_copy = PyDict_Copy(dict); - if (def->m_base.m_copy == NULL) { - return -1; - } + m_dict = singlephase->m_dict; } } + /* Add the module's def to the global cache. */ // XXX Why special-case the main interpreter? if (_Py_IsMainInterpreter(tstate->interp) || def->m_size == -1) { - if (_extensions_cache_set(filename, name, def) < 0) { - return -1; +#ifndef NDEBUG + cached = _extensions_cache_get(path, name); + assert(cached == NULL || cached->def == def); +#endif + cached = _extensions_cache_set( + path, name, def, m_init, singlephase->m_index, m_dict, + singlephase->origin, singlephase->md_gil); + if (cached == NULL) { + // XXX Ignore this error? Doing so would effectively + // mark the module as not loadable. + return NULL; } } - return 0; + return cached; } -int -_PyImport_FixupExtensionObject(PyObject *mod, PyObject *name, - PyObject *filename, PyObject *modules) +/* For multi-phase init modules, the module is finished + * by PyModule_FromDefAndSpec(). */ +static int +finish_singlephase_extension(PyThreadState *tstate, PyObject *mod, + struct extensions_cache_value *cached, + PyObject *name, PyObject *modules) { - if (PyObject_SetItem(modules, name, mod) < 0) { + assert(mod != NULL && PyModule_Check(mod)); + assert(cached->def == _PyModule_GetDef(mod)); + + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { return -1; } - if (fix_up_extension(mod, name, filename) < 0) { - PyMapping_DelItem(modules, name); - return -1; + + if (modules != NULL) { + if (PyObject_SetItem(modules, name, mod) < 0) { + return -1; + } } + return 0; } static PyObject * -import_find_extension(PyThreadState *tstate, PyObject *name, - PyObject *filename) +reload_singlephase_extension(PyThreadState *tstate, + struct extensions_cache_value *cached, + struct _Py_ext_module_loader_info *info) { - /* Only single-phase init modules will be in the cache. */ - PyModuleDef *def = _extensions_cache_get(filename, name); - if (def == NULL) { - return NULL; - } + PyModuleDef *def = cached->def; + assert(def != NULL); + assert_singlephase(cached); + PyObject *mod = NULL; /* It may have been successfully imported previously in an interpreter that allows legacy modules but is not allowed in the current interpreter. */ - const char *name_buf = PyUnicode_AsUTF8(name); + const char *name_buf = PyUnicode_AsUTF8(info->name); assert(name_buf != NULL); if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { return NULL; } - PyObject *mod, *mdict; - PyObject *modules = MODULES(tstate->interp); - + PyObject *modules = get_modules_dict(tstate, true); if (def->m_size == -1) { - PyObject *m_copy = def->m_base.m_copy; /* Module does not support repeated initialization */ + assert(cached->m_init == NULL); + assert(def->m_base.m_init == NULL); + // XXX Copying the cached dict may break interpreter isolation. + // We could solve this by temporarily acquiring the original + // interpreter's GIL. + PyObject *m_copy = get_cached_m_dict(cached, info->name, info->path); if (m_copy == NULL) { - /* It might be a core module (e.g. sys & builtins), - for which we don't set m_copy. */ - m_copy = get_core_module_dict(tstate->interp, name, filename); - if (m_copy == NULL) { - return NULL; - } + assert(!PyErr_Occurred()); + return NULL; } - mod = import_add_module(tstate, name); + mod = import_add_module(tstate, info->name); if (mod == NULL) { + Py_DECREF(m_copy); return NULL; } - mdict = PyModule_GetDict(mod); + PyObject *mdict = PyModule_GetDict(mod); if (mdict == NULL) { + Py_DECREF(m_copy); Py_DECREF(mod); return NULL; } - if (PyDict_Update(mdict, m_copy)) { + int rc = PyDict_Update(mdict, m_copy); + Py_DECREF(m_copy); + if (rc < 0) { Py_DECREF(mod); return NULL; } +#ifdef Py_GIL_DISABLED + if (def->m_base.m_copy != NULL) { + // For non-core modules, fetch the GIL slot that was stored by + // import_run_extension(). + ((PyModuleObject *)mod)->md_gil = cached->md_gil; + } +#endif + /* We can't set mod->md_def if it's missing, + * because _PyImport_ClearModulesByIndex() might break + * due to violating interpreter isolation. + * See the note in set_cached_m_dict(). + * Until that is solved, we leave md_def set to NULL. */ + assert(_PyModule_GetDef(mod) == NULL + || _PyModule_GetDef(mod) == def); } else { - if (def->m_base.m_init == NULL) + assert(cached->m_dict == NULL); + assert(def->m_base.m_copy == NULL); + // XXX Use cached->m_init. + PyModInitFunction p0 = def->m_base.m_init; + if (p0 == NULL) { + assert(!PyErr_Occurred()); return NULL; - mod = _PyImport_InitFunc_TrampolineCall(def->m_base.m_init); - if (mod == NULL) + } + struct _Py_ext_module_loader_result res; + if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); return NULL; - if (PyObject_SetItem(modules, name, mod) == -1) { + } + assert(!PyErr_Occurred()); + assert(res.err == NULL); + assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); + mod = res.module; + /* Tchnically, the init function could return a different module def. + * Then we would probably need to update the global cache. + * However, we don't expect anyone to change the def. */ + assert(res.def == def); + _Py_ext_module_loader_result_clear(&res); + + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + + if (PyObject_SetItem(modules, info->name, mod) == -1) { Py_DECREF(mod); return NULL; } } - if (_modules_by_index_set(tstate->interp, def, mod) < 0) { - PyMapping_DelItem(modules, name); + + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_set(tstate->interp, index, mod) < 0) { + PyMapping_DelItem(modules, info->name); Py_DECREF(mod); return NULL; } + return mod; +} + +static PyObject * +import_find_extension(PyThreadState *tstate, + struct _Py_ext_module_loader_info *info, + struct extensions_cache_value **p_cached) +{ + /* Only single-phase init modules will be in the cache. */ + struct extensions_cache_value *cached + = _extensions_cache_get(info->path, info->name); + if (cached == NULL) { + return NULL; + } + assert(cached->def != NULL); + assert_singlephase(cached); + *p_cached = cached; + + /* It may have been successfully imported previously + in an interpreter that allows legacy modules + but is not allowed in the current interpreter. */ + const char *name_buf = PyUnicode_AsUTF8(info->name); + assert(name_buf != NULL); + if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { + return NULL; + } + + PyObject *mod = reload_singlephase_extension(tstate, cached, info); + if (mod == NULL) { + return NULL; + } + int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose; if (verbose) { PySys_FormatStderr("import %U # previously loaded (%R)\n", - name, filename); + info->name, info->path); + } + + return mod; +} + +static PyObject * +import_run_extension(PyThreadState *tstate, PyModInitFunction p0, + struct _Py_ext_module_loader_info *info, + PyObject *spec, PyObject *modules) +{ + /* Core modules go through _PyImport_FixupBuiltin(). */ + assert(!is_core_module(tstate->interp, info->name, info->path)); + + PyObject *mod = NULL; + PyModuleDef *def = NULL; + struct extensions_cache_value *cached = NULL; + const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + + /* We cannot know if the module is single-phase init or + * multi-phase init until after we call its init function. Even + * in isolated interpreters (that do not support single-phase init), + * the init function will run without restriction. For multi-phase + * init modules that isn't a problem because the init function only + * runs PyModuleDef_Init() on the module's def and then returns it. + * + * However, for single-phase init the module's init function will + * create the module, create other objects (and allocate other + * memory), populate it and its module state, and initialze static + * types. Some modules store other objects and data in global C + * variables and register callbacks with the runtime/stdlib or + * even external libraries (which is part of why we can't just + * dlclose() the module in the error case). That's a problem + * for isolated interpreters since all of the above happens + * and only then * will the import fail. Memory will leak, + * callbacks will still get used, and sometimes there + * will be crashes (memory access violations + * and use-after-free). + * + * To put it another way, if the module is single-phase init + * then the import will probably break interpreter isolation + * and should fail ASAP. However, the module's init function + * will still get run. That means it may still store state + * in the shared-object/DLL address space (which never gets + * closed/cleared), including objects (e.g. static types). + * This is a problem for isolated subinterpreters since each + * has its own object allocator. If the loaded shared-object + * still holds a reference to an object after the corresponding + * interpreter has finalized then either we must let it leak + * or else any later use of that object by another interpreter + * (or across multiple init-fini cycles) will crash the process. + * + * To avoid all of that, we make sure the module's init function + * is always run first with the main interpreter active. If it was + * already the main interpreter then we can continue loading the + * module like normal. Otherwise, right after the init function, + * we take care of some import state bookkeeping, switch back + * to the subinterpreter, check for single-phase init, + * and then continue loading like normal. */ + + bool switched = false; + /* We *could* leave in place a legacy interpreter here + * (one that shares obmalloc/GIL with main interp), + * but there isn't a big advantage, we anticipate + * such interpreters will be increasingly uncommon, + * and the code is a bit simpler if we always switch + * to the main interpreter. */ + PyThreadState *main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return NULL; + } + else if (main_tstate != tstate) { + switched = true; + /* In the switched case, we could play it safe + * by getting the main interpreter's import lock here. + * It's unlikely to matter though. */ + } + + struct _Py_ext_module_loader_result res; + int rc = _PyImport_RunModInitFunc(p0, info, &res); + if (rc < 0) { + /* We discard res.def. */ + assert(res.module == NULL); + } + else { + assert(!PyErr_Occurred()); + assert(res.err == NULL); + + mod = res.module; + res.module = NULL; + def = res.def; + assert(def != NULL); + + /* Do anything else that should be done + * while still using the main interpreter. */ + if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + PyObject *filename = NULL; + if (switched) { + // The original filename may be allocated by subinterpreter's + // obmalloc, so we create a copy here. + filename = _PyUnicode_Copy(info->filename); + if (filename == NULL) { + return NULL; + } + } else { + filename = Py_NewRef(info->filename); + } + // XXX There's a refleak somewhere with the filename. + // Until we can track it down, we immortalize it. + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyUnicode_InternImmortal(interp, &filename); + + if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + + /* Update global import state. */ + assert(def->m_base.m_index != 0); + struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, + .origin=info->origin, +#ifdef Py_GIL_DISABLED + .md_gil=((PyModuleObject *)mod)->md_gil, +#endif + }; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); + singlephase.m_init = p0; + } + cached = update_global_state_for_extension( + main_tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { + assert(PyErr_Occurred()); + goto main_finally; + } + } + } + +main_finally: + /* Switch back to the subinterpreter. */ + if (switched) { + assert(main_tstate != tstate); + switch_back_from_main_interpreter(tstate, main_tstate, mod); + /* Any module we got from the init function will have to be + * reloaded in the subinterpreter. */ + mod = NULL; + } + + /*****************************************************************/ + /* At this point we are back to the interpreter we started with. */ + /*****************************************************************/ + + /* Finally we handle the error return from _PyImport_RunModInitFunc(). */ + if (rc < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); + goto error; + } + + if (res.kind == _Py_ext_module_kind_MULTIPHASE) { + assert_multiphase_def(def); + assert(mod == NULL); + /* Note that we cheat a little by not repeating the calls + * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */ + mod = PyModule_FromDefAndSpec(def, spec); + if (mod == NULL) { + goto error; + } + } + else { + assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); + assert_singlephase_def(def); + + if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { + goto error; + } + assert(!PyErr_Occurred()); + + if (switched) { + /* We switched to the main interpreter to run the init + * function, so now we will "reload" the module from the + * cached data using the original subinterpreter. */ + assert(mod == NULL); + mod = reload_singlephase_extension(tstate, cached, info); + if (mod == NULL) { + goto error; + } + assert(!PyErr_Occurred()); + assert(PyModule_Check(mod)); + } + else { + assert(mod != NULL); + assert(PyModule_Check(mod)); + + /* Update per-interpreter import state. */ + PyObject *modules = get_modules_dict(tstate, true); + if (finish_singlephase_extension( + tstate, mod, cached, info->name, modules) < 0) + { + goto error; + } + } } + + _Py_ext_module_loader_result_clear(&res); return mod; + +error: + Py_XDECREF(mod); + _Py_ext_module_loader_result_clear(&res); + return NULL; } + +// Used in _PyImport_ClearExtension; see notes there. static int clear_singlephase_extension(PyInterpreterState *interp, - PyObject *name, PyObject *filename) + PyObject *name, PyObject *path) { - PyModuleDef *def = _extensions_cache_get(filename, name); - if (def == NULL) { + struct extensions_cache_value *cached = _extensions_cache_get(path, name); + if (cached == NULL) { if (PyErr_Occurred()) { return -1; } return 0; } + PyModuleDef *def = cached->def; /* Clear data set when the module was initially loaded. */ def->m_base.m_init = NULL; Py_CLEAR(def->m_base.m_copy); - // We leave m_index alone since there's no reason to reset it. + def->m_base.m_index = 0; /* Clear the PyState_*Module() cache entry. */ - if (_modules_by_index_check(interp, def->m_base.m_index) == NULL) { - if (_modules_by_index_clear_one(interp, def) < 0) { + Py_ssize_t index = _get_cached_module_index(cached); + if (_modules_by_index_check(interp, index) == NULL) { + if (_modules_by_index_clear_one(interp, index) < 0) { return -1; } } + /* We must use the main interpreter to clean up the cache. + * See the note in import_run_extension(). */ + PyThreadState *tstate = PyThreadState_GET(); + PyThreadState *main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return -1; + } + /* Clear the cached module def. */ - _extensions_cache_delete(filename, name); + _extensions_cache_delete(path, name); + + if (main_tstate != tstate) { + switch_back_from_main_interpreter(tstate, main_tstate, NULL); + } return 0; } @@ -1345,21 +2194,62 @@ clear_singlephase_extension(PyInterpreterState *interp, /*******************/ int -_PyImport_FixupBuiltin(PyObject *mod, const char *name, PyObject *modules) +_PyImport_FixupBuiltin(PyThreadState *tstate, PyObject *mod, const char *name, + PyObject *modules) { int res = -1; + assert(mod != NULL && PyModule_Check(mod)); + PyObject *nameobj; nameobj = PyUnicode_InternFromString(name); if (nameobj == NULL) { return -1; } - if (PyObject_SetItem(modules, nameobj, mod) < 0) { + + PyModuleDef *def = PyModule_GetDef(mod); + if (def == NULL) { + PyErr_BadInternalCall(); goto finally; } - if (fix_up_extension(mod, nameobj, nameobj) < 0) { - PyMapping_DelItem(modules, nameobj); + + /* We only use _PyImport_FixupBuiltin() for the core builtin modules + * (sys and builtins). These modules are single-phase init with no + * module state, but we also don't populate def->m_base.m_copy + * for them. */ + assert(is_core_module(tstate->interp, nameobj, nameobj)); + assert_singlephase_def(def); + assert(def->m_size == -1); + assert(def->m_base.m_copy == NULL); + assert(def->m_base.m_index >= 0); + + /* We aren't using import_find_extension() for core modules, + * so we have to do the extra check to make sure the module + * isn't already in the global cache before calling + * update_global_state_for_extension(). */ + struct extensions_cache_value *cached + = _extensions_cache_get(nameobj, nameobj); + if (cached == NULL) { + struct singlephase_global_update singlephase = { + .m_index=def->m_base.m_index, + /* We don't want def->m_base.m_copy populated. */ + .m_dict=NULL, + .origin=_Py_ext_module_origin_CORE, +#ifdef Py_GIL_DISABLED + /* Unused when m_dict == NULL. */ + .md_gil=NULL, +#endif + }; + cached = update_global_state_for_extension( + tstate, nameobj, nameobj, def, &singlephase); + if (cached == NULL) { + goto finally; + } + } + + if (finish_singlephase_extension(tstate, mod, cached, nameobj, modules) < 0) { goto finally; } + res = 0; finally: @@ -1388,53 +2278,76 @@ is_builtin(PyObject *name) static PyObject* create_builtin(PyThreadState *tstate, PyObject *name, PyObject *spec) { - const char *oldcontext, *newcontext; - PyObject *mod = import_find_extension(tstate, name, name); - if (mod || _PyErr_Occurred(tstate)) { - return mod; + struct _Py_ext_module_loader_info info; + if (_Py_ext_module_loader_info_init_for_builtin(&info, name) < 0) { + return NULL; } - PyObject *modules = MODULES(tstate->interp); - for (struct _inittab *p = INITTAB; p->name != NULL; p++) { - if (_PyUnicode_EqualToASCIIString(name, p->name)) { - if (p->initfunc == NULL) { - /* Cannot re-init internal module ("sys" or "builtins") */ - mod = PyImport_AddModuleObject(name); - return Py_XNewRef(mod); - } - newcontext = PyUnicode_AsUTF8(name); - if (newcontext == NULL) { - Py_RETURN_NONE; - } - oldcontext = _PyImport_SwapPackageContext(newcontext); - mod = _PyImport_InitFunc_TrampolineCall(*p->initfunc); - _PyImport_SwapPackageContext(oldcontext); - if (mod == NULL) { - return NULL; - } + struct extensions_cache_value *cached = NULL; + PyObject *mod = import_find_extension(tstate, &info, &cached); + if (mod != NULL) { + assert(!_PyErr_Occurred(tstate)); + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached); + goto finally; + } + else if (_PyErr_Occurred(tstate)) { + goto finally; + } - if (PyObject_TypeCheck(mod, &PyModuleDef_Type)) { - return PyModule_FromDefAndSpec((PyModuleDef*)mod, spec); - } - else { - /* Remember pointer to module init function. */ - PyModuleDef *def = PyModule_GetDef(mod); - if (def == NULL) { - return NULL; - } + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } - def->m_base.m_init = p->initfunc; - if (_PyImport_FixupExtensionObject(mod, name, name, - modules) < 0) { - return NULL; - } - return mod; - } + struct _inittab *found = NULL; + for (struct _inittab *p = INITTAB; p->name != NULL; p++) { + if (_PyUnicode_EqualToASCIIString(info.name, p->name)) { + found = p; } } + if (found == NULL) { + // not found + mod = Py_NewRef(Py_None); + goto finally; + } - // not found - Py_RETURN_NONE; + PyModInitFunction p0 = (PyModInitFunction)found->initfunc; + if (p0 == NULL) { + /* Cannot re-init internal module ("sys" or "builtins") */ + assert(is_core_module(tstate->interp, info.name, info.path)); + mod = import_add_module(tstate, info.name); + goto finally; + } + +#ifdef Py_GIL_DISABLED + // This call (and the corresponding call to _PyImport_CheckGILForModule()) + // would ideally be inside import_run_extension(). They are kept in the + // callers for now because that would complicate the control flow inside + // import_run_extension(). It should be possible to restructure + // import_run_extension() to address this. + _PyEval_EnableGILTransient(tstate); +#endif + /* Now load it. */ + mod = import_run_extension( + tstate, p0, &info, spec, get_modules_dict(tstate, true)); +#ifdef Py_GIL_DISABLED + if (_PyImport_CheckGILForModule(mod, info.name) < 0) { + Py_CLEAR(mod); + goto finally; + } +#endif + +finally: + _Py_ext_module_loader_info_clear(&info); + return mod; } @@ -1668,7 +2581,7 @@ PyImport_ExecCodeModuleWithPathnames(const char *name, PyObject *co, external= PyObject_GetAttrString(IMPORTLIB(interp), "_bootstrap_external"); if (external != NULL) { - pathobj = _PyObject_CallMethodOneArg( + pathobj = PyObject_CallMethodOneArg( external, &_Py_ID(_get_sourcefile), cpathobj); Py_DECREF(external); } @@ -2022,7 +2935,6 @@ look_up_frozen(const char *name) struct frozen_info { PyObject *nameobj; const char *data; - PyObject *(*get_code)(void); Py_ssize_t size; bool is_package; bool is_alias; @@ -2056,7 +2968,6 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) if (info != NULL) { info->nameobj = nameobj; // borrowed info->data = (const char *)p->code; - info->get_code = p->get_code; info->size = p->size; info->is_package = p->is_package; if (p->size < 0) { @@ -2068,10 +2979,6 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases, &info->origname); } - if (p->code == NULL && p->size == 0 && p->get_code != NULL) { - /* It is only deepfrozen. */ - return FROZEN_OKAY; - } if (p->code == NULL) { /* It is frozen but marked as un-importable. */ return FROZEN_EXCLUDED; @@ -2086,11 +2993,6 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) static PyObject * unmarshal_frozen_code(PyInterpreterState *interp, struct frozen_info *info) { - if (info->get_code && _Py_IsMainInterpreter(interp)) { - PyObject *code = info->get_code(); - assert(code != NULL); - return code; - } PyObject *co = PyMarshal_ReadObjectFromString(info->data, info->size); if (co == NULL) { /* Does not contain executable code. */ @@ -2282,11 +3184,12 @@ init_importlib(PyThreadState *tstate, PyObject *sysmod) if (PyImport_ImportFrozenModule("_frozen_importlib") <= 0) { return -1; } - PyObject *importlib = PyImport_AddModule("_frozen_importlib"); // borrowed + + PyObject *importlib = PyImport_AddModuleRef("_frozen_importlib"); if (importlib == NULL) { return -1; } - IMPORTLIB(interp) = Py_NewRef(importlib); + IMPORTLIB(interp) = importlib; // Import the _imp module if (verbose) { @@ -2409,11 +3312,11 @@ get_path_importer(PyThreadState *tstate, PyObject *path_importer_cache, if (nhooks < 0) return NULL; /* Shouldn't happen */ - importer = PyDict_GetItemWithError(path_importer_cache, p); - if (importer != NULL || _PyErr_Occurred(tstate)) { - return Py_XNewRef(importer); + if (PyDict_GetItemRef(path_importer_cache, p, &importer) != 0) { + // found or error + return importer; } - + // not found /* set path_importer_cache[p] to None to avoid recursion */ if (PyDict_SetItem(path_importer_cache, p, Py_None) != 0) return NULL; @@ -2469,12 +3372,11 @@ int _PyImport_InitDefaultImportFunc(PyInterpreterState *interp) { // Get the __import__ function - PyObject *import_func = _PyDict_GetItemStringWithError(interp->builtins, - "__import__"); - if (import_func == NULL) { + PyObject *import_func; + if (PyDict_GetItemStringRef(interp->builtins, "__import__", &import_func) <= 0) { return -1; } - IMPORT_FUNC(interp) = Py_NewRef(import_func); + IMPORT_FUNC(interp) = import_func; return 0; } @@ -2515,6 +3417,12 @@ PyImport_ImportModule(const char *name) PyObject * PyImport_ImportModuleNoBlock(const char *name) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyImport_ImportModuleNoBlock() is deprecated and scheduled for " + "removal in Python 3.15. Use PyImport_ImportModule() instead.", 1)) + { + return NULL; + } return PyImport_ImportModule(name); } @@ -2595,7 +3503,7 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level { PyObject *abs_name; PyObject *package = NULL; - PyObject *spec; + PyObject *spec = NULL; Py_ssize_t last_dot; PyObject *base; int level_up; @@ -2608,20 +3516,18 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level _PyErr_SetString(tstate, PyExc_TypeError, "globals must be a dict"); goto error; } - package = PyDict_GetItemWithError(globals, &_Py_ID(__package__)); + if (PyDict_GetItemRef(globals, &_Py_ID(__package__), &package) < 0) { + goto error; + } if (package == Py_None) { + Py_DECREF(package); package = NULL; } - else if (package == NULL && _PyErr_Occurred(tstate)) { - goto error; - } - spec = PyDict_GetItemWithError(globals, &_Py_ID(__spec__)); - if (spec == NULL && _PyErr_Occurred(tstate)) { + if (PyDict_GetItemRef(globals, &_Py_ID(__spec__), &spec) < 0) { goto error; } if (package != NULL) { - Py_INCREF(package); if (!PyUnicode_Check(package)) { _PyErr_SetString(tstate, PyExc_TypeError, "package must be a string"); @@ -2665,16 +3571,15 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level goto error; } - package = PyDict_GetItemWithError(globals, &_Py_ID(__name__)); + if (PyDict_GetItemRef(globals, &_Py_ID(__name__), &package) < 0) { + goto error; + } if (package == NULL) { - if (!_PyErr_Occurred(tstate)) { - _PyErr_SetString(tstate, PyExc_KeyError, - "'__name__' not in globals"); - } + _PyErr_SetString(tstate, PyExc_KeyError, + "'__name__' not in globals"); goto error; } - Py_INCREF(package); if (!PyUnicode_Check(package)) { _PyErr_SetString(tstate, PyExc_TypeError, "__name__ must be a string"); @@ -2688,10 +3593,6 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level if (!haspath) { Py_ssize_t dot; - if (PyUnicode_READY(package) < 0) { - goto error; - } - dot = PyUnicode_FindChar(package, '.', 0, PyUnicode_GET_LENGTH(package), -1); if (dot == -2) { @@ -2726,6 +3627,7 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level } } + Py_XDECREF(spec); base = PyUnicode_Substring(package, 0, last_dot); Py_DECREF(package); if (base == NULL || PyUnicode_GET_LENGTH(name) == 0) { @@ -2742,6 +3644,7 @@ resolve_name(PyThreadState *tstate, PyObject *name, PyObject *globals, int level "with no known parent package"); error: + Py_XDECREF(spec); Py_XDECREF(package); return NULL; } @@ -2755,7 +3658,7 @@ import_find_and_load(PyThreadState *tstate, PyObject *abs_name) #define import_level FIND_AND_LOAD(interp).import_level #define accumulated FIND_AND_LOAD(interp).accumulated - _PyTime_t t1 = 0, accumulated_copy = accumulated; + PyTime_t t1 = 0, accumulated_copy = accumulated; PyObject *sys_path, *sys_meta_path, *sys_path_hooks; if (_PySys_GetOptionalAttrString("path", &sys_path) < 0) { @@ -2799,7 +3702,8 @@ import_find_and_load(PyThreadState *tstate, PyObject *abs_name) #undef header import_level++; - t1 = _PyTime_GetPerfCounter(); + // ignore error: don't block import if reading the clock fails + (void)PyTime_PerfCounterRaw(&t1); accumulated = 0; } @@ -2814,7 +3718,9 @@ import_find_and_load(PyThreadState *tstate, PyObject *abs_name) mod != NULL); if (import_time) { - _PyTime_t cum = _PyTime_GetPerfCounter() - t1; + PyTime_t t2; + (void)PyTime_PerfCounterRaw(&t2); + PyTime_t cum = t2 - t1; import_level--; fprintf(stderr, "import time: %9ld | %10ld | %*s%s\n", @@ -2856,9 +3762,6 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, "module name must be a string"); goto error; } - if (PyUnicode_READY(name) < 0) { - goto error; - } if (level < 0) { _PyErr_SetString(tstate, PyExc_ValueError, "level must be >= 0"); goto error; @@ -2936,15 +3839,17 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, } final_mod = import_get_module(tstate, to_return); - Py_DECREF(to_return); if (final_mod == NULL) { if (!_PyErr_Occurred(tstate)) { _PyErr_Format(tstate, PyExc_KeyError, "%R not in sys.modules as expected", to_return); } + Py_DECREF(to_return); goto error; } + + Py_DECREF(to_return); } } else { @@ -2952,12 +3857,11 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *globals, } } else { - PyObject *path; - if (_PyObject_LookupAttr(mod, &_Py_ID(__path__), &path) < 0) { + int has_path = PyObject_HasAttrWithError(mod, &_Py_ID(__path__)); + if (has_path < 0) { goto error; } - if (path) { - Py_DECREF(path); + if (has_path) { final_mod = PyObject_CallMethodObjArgs( IMPORTLIB(interp), &_Py_ID(_handle_fromlist), mod, fromlist, IMPORT_FUNC(interp), NULL); @@ -3210,18 +4114,13 @@ _PyImport_FiniCore(PyInterpreterState *interp) int verbose = _PyInterpreterState_GetConfig(interp)->verbose; if (_PySys_ClearAttrString(interp, "meta_path", verbose) < 0) { - PyErr_WriteUnraisable(NULL); + PyErr_FormatUnraisable("Exception ignored on clearing sys.meta_path"); } // XXX Pull in most of finalize_modules() in pylifecycle.c. if (_PySys_ClearAttrString(interp, "modules", verbose) < 0) { - PyErr_WriteUnraisable(NULL); - } - - if (IMPORT_LOCK(interp) != NULL) { - PyThread_free_lock(IMPORT_LOCK(interp)); - IMPORT_LOCK(interp) = NULL; + PyErr_FormatUnraisable("Exception ignored on clearing sys.modules"); } _PyImport_ClearCore(interp); @@ -3296,10 +4195,10 @@ _PyImport_FiniExternal(PyInterpreterState *interp) // XXX Uninstall importlib metapath importers here? if (_PySys_ClearAttrString(interp, "path_importer_cache", verbose) < 0) { - PyErr_WriteUnraisable(NULL); + PyErr_FormatUnraisable("Exception ignored on clearing sys.path_importer_cache"); } if (_PySys_ClearAttrString(interp, "path_hooks", verbose) < 0) { - PyErr_WriteUnraisable(NULL); + PyErr_FormatUnraisable("Exception ignored on clearing sys.path_hooks"); } } @@ -3356,8 +4255,7 @@ _imp_lock_held_impl(PyObject *module) /*[clinic end generated code: output=8b89384b5e1963fc input=9b088f9b217d9bdf]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - return PyBool_FromLong( - IMPORT_LOCK_THREAD(interp) != PYTHREAD_INVALID_THREAD_ID); + return PyBool_FromLong(PyMutex_IsLocked(&IMPORT_LOCK(interp).mutex)); } /*[clinic input] @@ -3391,11 +4289,12 @@ _imp_release_lock_impl(PyObject *module) /*[clinic end generated code: output=7faab6d0be178b0a input=934fb11516dd778b]*/ { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyImport_ReleaseLock(interp) < 0) { + if (!_PyRecursiveMutex_IsLockedByCurrentThread(&IMPORT_LOCK(interp))) { PyErr_SetString(PyExc_RuntimeError, "not holding the import lock"); return NULL; } + _PyImport_ReleaseLock(interp); Py_RETURN_NONE; } @@ -3621,7 +4520,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, if (info.nameobj == NULL) { info.nameobj = name; } - if (info.size == 0 && info.get_code == NULL) { + if (info.size == 0) { /* Does not contain executable code. */ set_frozen_error(FROZEN_INVALID, name); return NULL; @@ -3751,9 +4650,16 @@ _imp__override_multi_interp_extensions_check_impl(PyObject *module, "cannot be used in the main interpreter"); return NULL; } +#ifdef Py_GIL_DISABLED + PyErr_SetString(PyExc_RuntimeError, + "_imp._override_multi_interp_extensions_check() " + "cannot be used in the free-threaded build"); + return NULL; +#else int oldvalue = OVERRIDE_MULTI_INTERP_EXTENSIONS_CHECK(interp); OVERRIDE_MULTI_INTERP_EXTENSIONS_CHECK(interp) = override; return PyLong_FromLong(oldvalue); +#endif } #ifdef HAVE_DYNAMIC_LOADING @@ -3772,44 +4678,88 @@ static PyObject * _imp_create_dynamic_impl(PyObject *module, PyObject *spec, PyObject *file) /*[clinic end generated code: output=83249b827a4fde77 input=c31b954f4cf4e09d]*/ { - PyObject *mod, *name, *path; - FILE *fp; + PyObject *mod = NULL; + PyThreadState *tstate = _PyThreadState_GET(); - name = PyObject_GetAttrString(spec, "name"); - if (name == NULL) { + struct _Py_ext_module_loader_info info; + if (_Py_ext_module_loader_info_init_from_spec(&info, spec) < 0) { return NULL; } - path = PyObject_GetAttrString(spec, "origin"); - if (path == NULL) { - Py_DECREF(name); - return NULL; + struct extensions_cache_value *cached = NULL; + mod = import_find_extension(tstate, &info, &cached); + if (mod != NULL) { + assert(!_PyErr_Occurred(tstate)); + assert(cached != NULL); + /* The module might not have md_def set in certain reload cases. */ + assert(_PyModule_GetDef(mod) == NULL + || cached->def == _PyModule_GetDef(mod)); + assert_singlephase(cached); + goto finally; } + else if (_PyErr_Occurred(tstate)) { + goto finally; + } + /* Otherwise it must be multi-phase init or the first time it's loaded. */ - PyThreadState *tstate = _PyThreadState_GET(); - mod = import_find_extension(tstate, name, path); - if (mod != NULL || _PyErr_Occurred(tstate)) { - assert(mod == NULL || !_PyErr_Occurred(tstate)); + /* If the module was added to the global cache + * but def->m_base.m_copy was cleared (e.g. subinterp fini) + * then we have to do a little dance here. */ + if (cached != NULL) { + assert(cached->def->m_base.m_copy == NULL); + /* For now we clear the cache and move on. */ + _extensions_cache_delete(info.path, info.name); + } + + if (PySys_Audit("import", "OOOOO", info.name, info.filename, + Py_None, Py_None, Py_None) < 0) + { goto finally; } + /* We would move this (and the fclose() below) into + * _PyImport_GetModInitFunc(), but it isn't clear if the intervening + * code relies on fp still being open. */ + FILE *fp; if (file != NULL) { - fp = _Py_fopen_obj(path, "r"); + fp = _Py_fopen_obj(info.filename, "r"); if (fp == NULL) { goto finally; } } - else + else { fp = NULL; + } - mod = _PyImport_LoadDynamicModuleWithSpec(spec, fp); + PyModInitFunction p0 = _PyImport_GetModInitFunc(&info, fp); + if (p0 == NULL) { + goto finally; + } + +#ifdef Py_GIL_DISABLED + // This call (and the corresponding call to _PyImport_CheckGILForModule()) + // would ideally be inside import_run_extension(). They are kept in the + // callers for now because that would complicate the control flow inside + // import_run_extension(). It should be possible to restructure + // import_run_extension() to address this. + _PyEval_EnableGILTransient(tstate); +#endif + mod = import_run_extension( + tstate, p0, &info, spec, get_modules_dict(tstate, true)); +#ifdef Py_GIL_DISABLED + if (_PyImport_CheckGILForModule(mod, info.name) < 0) { + Py_CLEAR(mod); + goto finally; + } +#endif - if (fp) + // XXX Shouldn't this happen in the error cases too (i.e. in "finally")? + if (fp) { fclose(fp); + } finally: - Py_DECREF(name); - Py_DECREF(path); + _Py_ext_module_loader_info_clear(&info); return mod; } @@ -3909,14 +4859,9 @@ imp_module_exec(PyObject *module) { const wchar_t *mode = _Py_GetConfig()->check_hash_pycs_mode; PyObject *pyc_mode = PyUnicode_FromWideChar(mode, -1); - if (pyc_mode == NULL) { - return -1; - } - if (PyModule_AddObjectRef(module, "check_hash_based_pycs", pyc_mode) < 0) { - Py_DECREF(pyc_mode); + if (PyModule_Add(module, "check_hash_based_pycs", pyc_mode) < 0) { return -1; } - Py_DECREF(pyc_mode); return 0; } @@ -3925,6 +4870,7 @@ imp_module_exec(PyObject *module) static PyModuleDef_Slot imp_slots[] = { {Py_mod_exec, imp_module_exec}, {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; @@ -3942,8 +4888,3 @@ PyInit__imp(void) { return PyModuleDef_Init(&imp_module); } - - -#ifdef __cplusplus -} -#endif |
