diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 9957da1fc5f22a..cdb50a3c21b1b4 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -27,7 +27,21 @@ extern "C" { _PyRuntime.cached_objects.NAME struct _Py_cached_objects { + /* A thread state tied to the main interpreter, + used exclusively for when a global object (e.g. interned strings) + is resized (i.e. deallocated + allocated) from an arbitrary thread. */ + PyThreadState main_tstate; + + /* The dict of interned strings. */ PyObject *interned_strings; + + /* A dict mapping (filename, name) to PyModuleDef for modules. + Only legacy (single-phase init) extension modules are added + and only if they support multiple initialization (m_size >- 0) + or are imported in the main interpreter. + This is initialized lazily in _PyImport_FixupExtensionObject(). + Modules are added there and looked up in _imp.find_extension(). */ + PyObject *extensions; }; #define _Py_GLOBAL_OBJECT(NAME) \ diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index 69ed6273b7e609..ce004813f8178e 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -14,15 +14,10 @@ struct _import_runtime_state { which is just about every time an extension module is imported. See PyInterpreterState.modules_by_index for more info. */ Py_ssize_t last_module_index; - /* A dict mapping (filename, name) to PyModuleDef for modules. - Only legacy (single-phase init) extension modules are added - and only if they support multiple initialization (m_size >- 0) - or are imported in the main interpreter. - This is initialized lazily in _PyImport_FixupExtensionObject(). - Modules are added there and looked up in _imp.find_extension(). */ - PyObject *extensions; /* Package context -- the full module name for package imports */ const char * pkgcontext; + /* The dict of cached module defs is over at + _PyRuntime.cached_objects.extensions. */ }; struct _import_state { diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 7046ec8d9adaaf..9200743f775ba9 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -127,6 +127,13 @@ PyAPI_FUNC(void) _PyThreadState_Init( PyThreadState *tstate); PyAPI_FUNC(void) _PyThreadState_DeleteExcept(PyThreadState *tstate); +extern void _PyThreadState_InitDetached(PyThreadState *, PyInterpreterState *); +extern void _PyThreadState_ClearDetached(PyThreadState *); + +extern PyObject * _Py_GetFromGlobalDict(PyObject *, PyObject *); +extern PyObject * _Py_AddToGlobalDict(PyObject *, PyObject *, PyObject *); +extern PyObject * _Py_PopFromGlobalDict(PyObject *, PyObject *); + static inline void _PyThreadState_UpdateTracingState(PyThreadState *tstate) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index bdecac944dfd3a..b408ca0fff866f 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -58,6 +58,9 @@ extern PyTypeObject _PyExc_MemoryError; .types = { \ .next_version_tag = 1, \ }, \ + .cached_objects = { \ + .main_tstate = _PyThreadState_INIT, \ + }, \ .static_objects = { \ .singletons = { \ .small_ints = _Py_small_ints_INIT, \ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 19faceebf1d8ee..ed4feb603d6f38 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -34,6 +34,7 @@ struct _Py_unicode_runtime_ids { struct _Py_unicode_runtime_state { struct _Py_unicode_runtime_ids ids; + /* The interned dict is at _PyRuntime.cached_objects.interned_strings. */ }; /* fs_codec.encoding is initialized to NULL. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b9fb53147b9b51..de0780dc752719 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1534,7 +1534,8 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(interned, unicode) != 0) { + if (_Py_PopFromGlobalDict(interned, unicode) == NULL + && PyErr_Occurred()) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } @@ -14609,16 +14610,11 @@ PyUnicode_InternInPlace(PyObject **p) } PyObject *interned = get_interned_dict(); - assert(interned != NULL); - - PyObject *t = PyDict_SetDefault(interned, s, s); - if (t == NULL) { - PyErr_Clear(); - return; - } - + PyObject *t = _Py_AddToGlobalDict(interned, s, s); if (t != s) { - Py_SETREF(*p, Py_NewRef(t)); + if (t != NULL) { + Py_SETREF(*p, Py_NewRef(t)); + } return; } diff --git a/Python/import.c b/Python/import.c index c68bd1f7db420d..3c7736066f5382 100644 --- a/Python/import.c +++ b/Python/import.c @@ -54,7 +54,7 @@ static struct _inittab *inittab_copy = NULL; #define INITTAB _PyRuntime.imports.inittab #define LAST_MODULE_INDEX _PyRuntime.imports.last_module_index -#define EXTENSIONS _PyRuntime.imports.extensions +#define EXTENSIONS _Py_CACHED_OBJECT(extensions) #define PKGCONTEXT (_PyRuntime.imports.pkgcontext) @@ -889,7 +889,7 @@ _extensions_cache_get(PyObject *filename, PyObject *name) if (key == NULL) { return NULL; } - PyModuleDef *def = (PyModuleDef *)PyDict_GetItemWithError(extensions, key); + PyModuleDef *def = (PyModuleDef *)_Py_GetFromGlobalDict(extensions, key); Py_DECREF(key); return def; } @@ -909,9 +909,15 @@ _extensions_cache_set(PyObject *filename, PyObject *name, PyModuleDef *def) if (key == NULL) { return -1; } - int res = PyDict_SetItem(extensions, key, (PyObject *)def); + PyObject *existing = _Py_AddToGlobalDict(extensions, key, (PyObject *)def); Py_DECREF(key); - if (res < 0) { + if (existing == NULL) { + return -1; + } + if (existing != (PyObject *)def) { + assert(!PyErr_Occurred()); + PyErr_Format(PyExc_ImportError, + "could not add moduledef for %s to the cache", name); return -1; } return 0; @@ -928,14 +934,11 @@ _extensions_cache_delete(PyObject *filename, PyObject *name) if (key == NULL) { return -1; } - if (PyDict_DelItem(extensions, key) < 0) { - if (!PyErr_ExceptionMatches(PyExc_KeyError)) { - Py_DECREF(key); - return -1; - } - PyErr_Clear(); - } + PyObject *value = _Py_PopFromGlobalDict(extensions, key); Py_DECREF(key); + if (value == NULL && PyErr_Occurred()) { + return -1; + } return 0; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index d0e85519d23464..1658b6c1ebe8ac 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -635,6 +635,8 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + _PyThreadState_InitDetached(&runtime->cached_objects.main_tstate, interp); + *tstate_p = tstate; return _PyStatus_OK(); } @@ -1928,6 +1930,8 @@ Py_FinalizeEx(void) // XXX Do this sooner during finalization. // XXX Ensure finalizer errors are handled properly. + _PyThreadState_ClearDetached(&runtime->cached_objects.main_tstate); + finalize_interp_clear(tstate); finalize_interp_delete(tstate->interp); diff --git a/Python/pystate.c b/Python/pystate.c index b17efdbefd124c..6933d09393af65 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -565,6 +565,210 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) #endif +//--------------- +// global objects +//--------------- + +/* The global objects thread state is meant to be used in a very limited + way and should not be used to actually run any Python code. */ + +static PyThreadState * +bind_global_objects_state(_PyRuntimeState *runtime) +{ + PyThreadState *main_tstate = &runtime->cached_objects.main_tstate; + + bind_tstate(main_tstate); + /* Unlike _PyThreadState_Bind(), we do not modify gilstate TSS. */ + + return main_tstate; +} + +static void +unbind_global_objects_state(_PyRuntimeState *runtime) +{ + PyThreadState *main_tstate = &runtime->cached_objects.main_tstate; + assert(tstate_is_alive(main_tstate)); + assert(!main_tstate->_status.active); + assert(gilstate_tss_get(runtime) != main_tstate); + + unbind_tstate(main_tstate); + + /* This thread state may be bound/unbound repeatedly, + so we must erase evidence that it was ever bound (or unbound). */ + main_tstate->_status.bound = 0; + main_tstate->_status.unbound = 0; + + /* We must fully unlink the thread state from any OS thread, + to allow it to be bound more than once. */ + main_tstate->thread_id = 0; +#ifdef PY_HAVE_THREAD_NATIVE_ID + main_tstate->native_thread_id = 0; +#endif +} + +static inline void +acquire_global_objects_lock(_PyRuntimeState *runtime) +{ + /* For now we can rely on the GIL, so we don't actually + acquire a global lock here. */ + assert(current_fast_get(runtime) != NULL); +} + +static inline void +release_global_objects_lock(_PyRuntimeState *runtime) +{ + /* For now we can rely on the GIL, so we don't actually + release a global lock here. */ + assert(current_fast_get(runtime) != NULL); +} + +PyObject * +_Py_GetFromGlobalDict(PyObject *dict, PyObject *key) +{ + assert(dict != NULL); + assert(PyDict_CheckExact(dict)); + + _PyRuntimeState *runtime = &_PyRuntime; + + /* Due to interpreter isolation we must hold a global lock, + starting at this point and ending before we return. + Note that the operations in this function are very fucused + and we should not expect any reentrancy. */ + acquire_global_objects_lock(runtime); + /* We don't worry about the global objects state + since there are no memory operations. */ + PyObject *value = PyDict_GetItemWithError(dict, key); + release_global_objects_lock(runtime); + return value; +} + +PyObject * +_Py_AddToGlobalDict(PyObject *dict, PyObject *key, PyObject *value) +{ + assert(dict != NULL); + assert(PyDict_CheckExact(dict)); + + /* All global objects are stored in _PyRuntime + and owned by the main interpreter. */ + _PyRuntimeState *runtime = &_PyRuntime; + PyThreadState *curts = current_fast_get(runtime); + PyInterpreterState *interp = curts->interp; + assert(interp != NULL); // The GIL must be held. + + /* Due to interpreter isolation we must hold a global lock, + starting at this point and ending before we return. + Note that the operations in this function are very fucused + and we should not expect any reentrancy. */ + acquire_global_objects_lock(runtime); + + /* We only swap interpreters if necessary. */ + PyObject *actual = PyDict_GetItemWithError(dict, key); + if (actual == NULL && !PyErr_Occurred()) { + /* Swap to the main interpreter, if necessary. */ + PyThreadState *oldts = NULL; + if (!_Py_IsMainInterpreter(interp)) { + PyThreadState *main_tstate = bind_global_objects_state(runtime); + + oldts = _PyThreadState_Swap(runtime, main_tstate); + assert(oldts != NULL); + assert(!_Py_IsMainInterpreter(oldts->interp)); + + /* The limitations of the global objects thread state apply + from this point to the point we swap back to oldts. */ + } + + /* This might trigger a resize, which is why we must "acquire" + the global object state. Also note that PyDict_SetItem() + must be compatible with our reentrancy and global objects state + constraints. */ + if (PyDict_SetItem(dict, key, value) < 0) { + /* Raising an exception from one interpreter in another + is problematic, so we clear it and let the caller deal + with the returned NULL. */ + assert(PyErr_ExceptionMatches(PyExc_MemoryError)); + PyErr_Clear(); + } + else { + actual = value; + } + + /* Swap back, it it wasn't in the main interpreter already. */ + if (oldts != NULL) { + // The returned tstate should be _PyRuntime.cached_objects.main_tstate. + _PyThreadState_Swap(runtime, oldts); + + unbind_global_objects_state(runtime); + } + } + + release_global_objects_lock(runtime); + + // XXX Immortalize the key and value. + + return actual; +} + +PyObject * +_Py_PopFromGlobalDict(PyObject *dict, PyObject *key) +{ + assert(dict != NULL); + assert(PyDict_CheckExact(dict)); + + /* All global objects are stored in _PyRuntime + and owned by the main interpreter. */ + _PyRuntimeState *runtime = &_PyRuntime; + PyThreadState *curts = current_fast_get(runtime); + PyInterpreterState *interp = curts->interp; + assert(interp != NULL); // The GIL must be held. + + /* Due to interpreter isolation we must hold a global lock, + starting at this point and ending before we return. + Note that the operations in this function are very fucused + and we should not expect any reentrancy. */ + acquire_global_objects_lock(runtime); + + /* We only swap interpreters if necessary. */ + PyObject *value = PyDict_GetItemWithError(dict, key); + if (value != NULL) { + /* Swap to the main interpreter, if necessary. */ + PyThreadState *oldts = NULL; + if (!_Py_IsMainInterpreter(interp)) { + PyThreadState *main_tstate = bind_global_objects_state(runtime); + + oldts = _PyThreadState_Swap(runtime, main_tstate); + assert(oldts != NULL); + assert(!_Py_IsMainInterpreter(oldts->interp)); + + /* The limitations of the global objects thread state apply + from this point to the point we swap back to oldts. */ + } + + /* This might trigger a resize, which is why we must "acquire" + the global object state. Also note that PyDict_DelItem() + must be compatible with our reentrancy and global objects state + constraints. */ + int res = PyDict_DelItem(dict, key); + assert(res == 0); + if (res < 0) { + /* This really shouldn't happen. */ + PyErr_Clear(); + } + + /* Swap back, it it wasn't in the main interpreter already. */ + if (oldts != NULL) { + // The returned tstate should be _PyRuntime.cached_objects.main_tstate. + _PyThreadState_Swap(runtime, oldts); + + unbind_global_objects_state(runtime); + } + } + + release_global_objects_lock(runtime); + + return value; +} + + /*************************************/ /* the per-interpreter runtime state */ /*************************************/ @@ -1217,8 +1421,7 @@ free_threadstate(PyThreadState *tstate) static void init_threadstate(PyThreadState *tstate, - PyInterpreterState *interp, uint64_t id, - PyThreadState *next) + PyInterpreterState *interp, uint64_t id) { if (tstate->_status.initialized) { Py_FatalError("thread state already initialized"); @@ -1227,18 +1430,13 @@ init_threadstate(PyThreadState *tstate, assert(interp != NULL); tstate->interp = interp; + // next/prev are set in add_threadstate(). + assert(tstate->next == NULL); + assert(tstate->prev == NULL); + assert(id > 0); tstate->id = id; - assert(interp->threads.head == tstate); - assert((next != NULL && id != 1) || (next == NULL && id == 1)); - if (next != NULL) { - assert(next->prev == NULL || next->prev == tstate); - next->prev = tstate; - } - tstate->next = next; - assert(tstate->prev == NULL); - // thread_id and native_thread_id are set in bind_tstate(). tstate->py_recursion_limit = interp->ceval.recursion_limit, @@ -1259,6 +1457,22 @@ init_threadstate(PyThreadState *tstate, tstate->_status.initialized = 1; } +static void +add_threadstate(PyInterpreterState *interp, PyThreadState *tstate, + PyThreadState *next) +{ + assert(interp->threads.head != tstate); + assert((next != NULL && tstate->id != 1) || + (next == NULL && tstate->id == 1)); + if (next != NULL) { + assert(next->prev == NULL || next->prev == tstate); + next->prev = tstate; + } + tstate->next = next; + assert(tstate->prev == NULL); + interp->threads.head = tstate; +} + static PyThreadState * new_threadstate(PyInterpreterState *interp) { @@ -1298,9 +1512,9 @@ new_threadstate(PyInterpreterState *interp) &initial._main_interpreter._initial_thread, sizeof(*tstate)); } - interp->threads.head = tstate; - init_threadstate(tstate, interp, id, old_head); + init_threadstate(tstate, interp, id); + add_threadstate(interp, tstate, old_head); HEAD_UNLOCK(runtime); if (!used_newtstate) { @@ -1347,6 +1561,33 @@ _PyThreadState_Init(PyThreadState *tstate) Py_FatalError("_PyThreadState_Init() is for internal use only"); } +void +_PyThreadState_InitDetached(PyThreadState *tstate, PyInterpreterState *interp) +{ + _PyRuntimeState *runtime = interp->runtime; + + HEAD_LOCK(runtime); + interp->threads.next_unique_id += 1; + uint64_t id = interp->threads.next_unique_id; + HEAD_UNLOCK(runtime); + + init_threadstate(tstate, interp, id); + // We do not call add_threadstate(). +} + + +static void +clear_datastack(PyThreadState *tstate) +{ + _PyStackChunk *chunk = tstate->datastack_chunk; + tstate->datastack_chunk = NULL; + while (chunk != NULL) { + _PyStackChunk *prev = chunk->previous; + _PyObject_VirtualFree(chunk, chunk->size); + chunk = prev; + } +} + void PyThreadState_Clear(PyThreadState *tstate) { @@ -1421,7 +1662,6 @@ PyThreadState_Clear(PyThreadState *tstate) // XXX Do it as early in the function as possible. } - /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void tstate_delete_common(PyThreadState *tstate) @@ -1454,17 +1694,25 @@ tstate_delete_common(PyThreadState *tstate) unbind_tstate(tstate); // XXX Move to PyThreadState_Clear()? - _PyStackChunk *chunk = tstate->datastack_chunk; - tstate->datastack_chunk = NULL; - while (chunk != NULL) { - _PyStackChunk *prev = chunk->previous; - _PyObject_VirtualFree(chunk, chunk->size); - chunk = prev; - } + clear_datastack(tstate); tstate->_status.finalized = 1; } +void +_PyThreadState_ClearDetached(PyThreadState *tstate) +{ + assert(!tstate->_status.bound); + assert(!tstate->_status.bound_gilstate); + assert(tstate->datastack_chunk == NULL); + assert(tstate->thread_id == 0); + assert(tstate->native_thread_id == 0); + assert(tstate->next == NULL); + assert(tstate->prev == NULL); + + PyThreadState_Clear(tstate); + clear_datastack(tstate); +} static void zapthreads(PyInterpreterState *interp)
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: