diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index b2aae774a12205..64282d0f2d0bcf 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -2454,10 +2454,6 @@ def setUpClass(cls): # Start fresh. cls.clean_up() - @classmethod - def tearDownClass(cls): - restore__testsinglephase() - def tearDown(self): # Clean up the module. self.clean_up() @@ -3014,20 +3010,20 @@ def test_basic_multiple_interpreters_deleted_no_reset(self): # * alive in 0 interpreters # * module def in _PyRuntime.imports.extensions # * mod init func ran for the first time (since reset) - # * m_copy is NULL (claered when the interpreter was destroyed) + # * m_copy is still set (owned by main interpreter) # * module's global state was initialized, not reset # Use a subinterpreter that sticks around. loaded_interp1 = self.import_in_subinterp(interpid1) self.check_common(loaded_interp1) - self.check_semi_fresh(loaded_interp1, loaded_main, base) + self.check_copied(loaded_interp1, base) # At this point: # * alive in 1 interpreter (interp1) # * module def still in _PyRuntime.imports.extensions - # * mod init func ran for the second time (since reset) - # * m_copy was copied from interp1 (was NULL) - # * module's global state was updated, not reset + # * mod init func did not run again + # * m_copy was not changed + # * module's global state was not touched # Use a subinterpreter while the previous one is still alive. loaded_interp2 = self.import_in_subinterp(interpid2) @@ -3038,8 +3034,8 @@ def test_basic_multiple_interpreters_deleted_no_reset(self): # * alive in 2 interpreters (interp1, interp2) # * module def still in _PyRuntime.imports.extensions # * mod init func did not run again - # * m_copy was copied from interp2 (was from interp1) - # * module's global state was updated, not reset + # * m_copy was not changed + # * module's global state was not touched @requires_subinterpreters def test_basic_multiple_interpreters_reset_each(self): diff --git a/Lib/test/test_interpreters/__init__.py b/Lib/test/test_interpreters/__init__.py index 4b16ecc31156a5..52ff553f60d0d7 100644 --- a/Lib/test/test_interpreters/__init__.py +++ b/Lib/test/test_interpreters/__init__.py @@ -1,5 +1,6 @@ import os -from test.support import load_package_tests +from test.support import load_package_tests, Py_GIL_DISABLED -def load_tests(*args): - return load_package_tests(os.path.dirname(__file__), *args) +if not Py_GIL_DISABLED: + def load_tests(*args): + return load_package_tests(os.path.dirname(__file__), *args) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst new file mode 100644 index 00000000000000..6b69c9311013bb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-06-10-57-54.gh-issue-117953.DqCzIs.rst @@ -0,0 +1,8 @@ +When a builtin or extension module is imported for the first time, while a +subinterpreter is active, the module's init function is now run by the main +interpreter first before import continues in the subinterpreter. +Consequently, single-phase init modules now fail in an isolated +subinterpreter without the init function running under that interpreter, +whereas before it would run under the subinterpreter *before* failing, +potentially leaving behind global state and callbacks and otherwise leaving +the module in an inconsistent state. diff --git a/Python/import.c b/Python/import.c index 0b58567dde1fec..ba44477318d473 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1154,9 +1154,6 @@ init_cached_m_dict(struct extensions_cache_value *value, PyObject *m_dict) return -1; } // XXX We may want to make the copy immortal. - // XXX This incref shouldn't be necessary. We are decref'ing - // one to many times somewhere. - Py_INCREF(copied); value->_m_dict = (struct cached_m_dict){ .copied=copied, @@ -1580,6 +1577,26 @@ _PyImport_CheckGILForModule(PyObject* module, PyObject *module_name) } #endif +static PyThreadState * +switch_to_main_interpreter(PyThreadState *tstate) +{ + if (_Py_IsMainInterpreter(tstate->interp)) { + return tstate; + } + PyThreadState *main_tstate = PyThreadState_New(_PyInterpreterState_Main()); + if (main_tstate == NULL) { + return NULL; + } + main_tstate->_whence = _PyThreadState_WHENCE_EXEC; +#ifndef NDEBUG + PyThreadState *old_tstate = PyThreadState_Swap(main_tstate); + assert(old_tstate == tstate); +#else + (void)PyThreadState_Swap(main_tstate); +#endif + return main_tstate; +} + static PyObject * get_core_module_dict(PyInterpreterState *interp, PyObject *name, PyObject *path) @@ -1936,24 +1953,171 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, struct extensions_cache_value *cached = NULL; const char *name_buf = PyBytes_AS_STRING(info->name_encoded); + /* We cannot know if the module is single-phase init or + * multi-phase init until after we call its init function. Even + * in isolated interpreters (that do not support single-phase init), + * the init function will run without restriction. For multi-phase + * init modules that isn't a problem because the init function only + * runs PyModuleDef_Init() on the module's def and then returns it. + * + * However, for single-phase init the module's init function will + * create the module, create other objects (and allocate other + * memory), populate it and its module state, and initialze static + * types. Some modules store other objects and data in global C + * variables and register callbacks with the runtime/stdlib or + * even external libraries (which is part of why we can't just + * dlclose() the module in the error case). That's a problem + * for isolated interpreters since all of the above happens + * and only then * will the import fail. Memory will leak, + * callbacks will still get used, and sometimes there + * will be crashes (memory access violations + * and use-after-free). + * + * To put it another way, if the module is single-phase init + * then the import will probably break interpreter isolation + * and should fail ASAP. However, the module's init function + * will still get run. That means it may still store state + * in the shared-object/DLL address space (which never gets + * closed/cleared), including objects (e.g. static types). + * This is a problem for isolated subinterpreters since each + * has its own object allocator. If the loaded shared-object + * still holds a reference to an object after the corresponding + * interpreter has finalized then either we must let it leak + * or else any later use of that object by another interpreter + * (or across multiple init-fini cycles) will crash the process. + * + * To avoid all of that, we make sure the module's init function + * is always run first with the main interpreter active. If it was + * already the main interpreter then we can continue loading the + * module like normal. Otherwise, right after the init function, + * we take care of some import state bookkeeping, switch back + * to the subinterpreter, check for single-phase init, + * and then continue loading like normal. */ + + bool switched = false; + /* We *could* leave in place a legacy interpreter here + * (one that shares obmalloc/GIL with main interp), + * but there isn't a big advantage, we anticipate + * such interpreters will be increasingly uncommon, + * and the code is a bit simpler if we always switch + * to the main interpreter. */ + PyThreadState *main_tstate = switch_to_main_interpreter(tstate); + if (main_tstate == NULL) { + return NULL; + } + else if (main_tstate != tstate) { + switched = true; + /* In the switched case, we could play it safe + * by getting the main interpreter's import lock here. + * It's unlikely to matter though. */ + } + struct _Py_ext_module_loader_result res; - if (_PyImport_RunModInitFunc(p0, info, &res) < 0) { + int rc = _PyImport_RunModInitFunc(p0, info, &res); + if (rc < 0) { /* We discard res.def. */ assert(res.module == NULL); - _Py_ext_module_loader_result_apply_error(&res, name_buf); - return NULL; } - assert(!PyErr_Occurred()); - assert(res.err == NULL); + else { + assert(!PyErr_Occurred()); + assert(res.err == NULL); + + mod = res.module; + res.module = NULL; + def = res.def; + assert(def != NULL); + + /* Do anything else that should be done + * while still using the main interpreter. */ + if (res.kind == _Py_ext_module_kind_SINGLEPHASE) { + /* Remember the filename as the __file__ attribute */ + if (info->filename != NULL) { + // XXX There's a refleak somewhere with the filename. + // Until we can track it down, we intern it. + PyObject *filename = Py_NewRef(info->filename); + PyUnicode_InternInPlace(&filename); + if (PyModule_AddObjectRef(mod, "__file__", filename) < 0) { + PyErr_Clear(); /* Not important enough to report */ + } + } + + /* Update global import state. */ + assert(def->m_base.m_index != 0); + struct singlephase_global_update singlephase = { + // XXX Modules that share a def should each get their own index, + // whereas currently they share (which means the per-interpreter + // cache is less reliable than it should be). + .m_index=def->m_base.m_index, + .origin=info->origin, +#ifdef Py_GIL_DISABLED + .md_gil=((PyModuleObject *)mod)->md_gil, +#endif + }; + // gh-88216: Extensions and def->m_base.m_copy can be updated + // when the extension module doesn't support sub-interpreters. + if (def->m_size == -1) { + /* We will reload from m_copy. */ + assert(def->m_base.m_init == NULL); + singlephase.m_dict = PyModule_GetDict(mod); + assert(singlephase.m_dict != NULL); + } + else { + /* We will reload via the init function. */ + assert(def->m_size >= 0); + assert(def->m_base.m_copy == NULL); + singlephase.m_init = p0; + } + cached = update_global_state_for_extension( + tstate, info->path, info->name, def, &singlephase); + if (cached == NULL) { + assert(PyErr_Occurred()); + goto main_finally; + } + } + } - mod = res.module; - res.module = NULL; - def = res.def; - assert(def != NULL); +main_finally: + /* Switch back to the subinterpreter. */ + if (switched) { + assert(main_tstate != tstate); + + /* Handle any exceptions, which we cannot propagate directly + * to the subinterpreter. */ + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_MemoryError)) { + /* We trust it will be caught again soon. */ + PyErr_Clear(); + } + else { + /* Printing the exception should be sufficient. */ + PyErr_PrintEx(0); + } + } + + /* Any module we got from the init function will have to be + * reloaded in the subinterpreter. */ + Py_CLEAR(mod); + + PyThreadState_Clear(main_tstate); + (void)PyThreadState_Swap(tstate); + PyThreadState_Delete(main_tstate); + } + + /*****************************************************************/ + /* At this point we are back to the interpreter we started with. */ + /*****************************************************************/ + + /* Finally we handle the error return from _PyImport_RunModInitFunc(). */ + if (rc < 0) { + _Py_ext_module_loader_result_apply_error(&res, name_buf); + goto error; + } if (res.kind == _Py_ext_module_kind_MULTIPHASE) { assert_multiphase_def(def); assert(mod == NULL); + /* Note that we cheat a little by not repeating the calls + * to _PyImport_GetModInitFunc() and _PyImport_RunModInitFunc(). */ mod = PyModule_FromDefAndSpec(def, spec); if (mod == NULL) { goto error; @@ -1962,57 +2126,35 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, else { assert(res.kind == _Py_ext_module_kind_SINGLEPHASE); assert_singlephase_def(def); - assert(PyModule_Check(mod)); if (_PyImport_CheckSubinterpIncompatibleExtensionAllowed(name_buf) < 0) { goto error; } + assert(!PyErr_Occurred()); - /* Remember the filename as the __file__ attribute */ - if (info->filename != NULL) { - if (PyModule_AddObjectRef(mod, "__file__", info->filename) < 0) { - PyErr_Clear(); /* Not important enough to report */ + if (switched) { + /* We switched to the main interpreter to run the init + * function, so now we will "reload" the module from the + * cached data using the original subinterpreter. */ + assert(mod == NULL); + mod = reload_singlephase_extension(tstate, cached, info); + if (mod == NULL) { + goto error; } - } - - /* Update global import state. */ - assert(def->m_base.m_index != 0); - struct singlephase_global_update singlephase = { - // XXX Modules that share a def should each get their own index, - // whereas currently they share (which means the per-interpreter - // cache is less reliable than it should be). - .m_index=def->m_base.m_index, - .origin=info->origin, -#ifdef Py_GIL_DISABLED - .md_gil=((PyModuleObject *)mod)->md_gil, -#endif - }; - // gh-88216: Extensions and def->m_base.m_copy can be updated - // when the extension module doesn't support sub-interpreters. - if (def->m_size == -1) { - /* We will reload from m_copy. */ - assert(def->m_base.m_init == NULL); - singlephase.m_dict = PyModule_GetDict(mod); - assert(singlephase.m_dict != NULL); + assert(!PyErr_Occurred()); + assert(PyModule_Check(mod)); } else { - /* We will reload via the init function. */ - assert(def->m_size >= 0); - assert(def->m_base.m_copy == NULL); - singlephase.m_init = p0; - } - cached = update_global_state_for_extension( - tstate, info->path, info->name, def, &singlephase); - if (cached == NULL) { - goto error; - } - - /* Update per-interpreter import state. */ - PyObject *modules = get_modules_dict(tstate, true); - if (finish_singlephase_extension( - tstate, mod, cached, info->name, modules) < 0) - { - goto error; + assert(mod != NULL); + assert(PyModule_Check(mod)); + + /* Update per-interpreter import state. */ + PyObject *modules = get_modules_dict(tstate, true); + if (finish_singlephase_extension( + tstate, mod, cached, info->name, modules) < 0) + { + goto error; + } } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy