diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 6cae3bca6be45a..2122c9bf598716 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -45,6 +45,8 @@ extern "C" { _PyRuntime.global_objects.NAME #define _Py_SINGLETON(NAME) \ _Py_GLOBAL_OBJECT(singletons.NAME) +#define _Py_CACHED_OBJECT(NAME) \ + _Py_GLOBAL_OBJECT(cached.NAME) struct _Py_global_objects { struct { @@ -54,7 +56,29 @@ struct _Py_global_objects { * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). */ PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + + /* Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() */ + struct _Py_unicode_ids { + PyThread_type_lock lock; + // next_index value must be preserved when Py_Initialize()/Py_Finalize() + // is called multiple times: see _PyUnicode_FromId() implementation. + Py_ssize_t next_index; + + Py_ssize_t size; + PyObject **array; + } unicode_ids; } singletons; + struct { + /* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) + */ + PyObject *unicode_interned; + } cached; }; #define _Py_global_objects_INIT { \ diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 725c859ea7853d..d3b2b59b85e3b8 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -116,8 +116,6 @@ typedef struct pyruntimestate { void *open_code_userdata; _Py_AuditHookEntry *audit_hook_head; - struct _Py_unicode_runtime_ids unicode_ids; - struct _Py_global_objects global_objects; // If anything gets added after global_objects then // _PyRuntimeState_reset() needs to get updated to clear it. diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c50c42011a9349..7551fe5d468b40 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -19,13 +19,6 @@ extern void _PyUnicode_Fini(PyInterpreterState *); /* other API */ -struct _Py_unicode_runtime_ids { - PyThread_type_lock lock; - // next_index value must be preserved when Py_Initialize()/Py_Finalize() - // is called multiple times: see _PyUnicode_FromId() implementation. - Py_ssize_t next_index; -}; - /* fs_codec.encoding is initialized to NULL. Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */ struct _Py_unicode_fs_codec { @@ -35,11 +28,6 @@ struct _Py_unicode_fs_codec { _Py_error_handler error_handler; }; -struct _Py_unicode_ids { - Py_ssize_t size; - PyObject **array; -}; - struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. PyObject *empty_string; @@ -47,19 +35,6 @@ struct _Py_unicode_state { shared as well. */ PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; - - /* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) - */ - PyObject *interned; - - // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() - struct _Py_unicode_ids ids; }; extern void _PyUnicode_ClearInterned(PyInterpreterState *); diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst new file mode 100644 index 00000000000000..702ca75982699c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst @@ -0,0 +1,3 @@ +Move the interned strings and Py_IDENTIFIER strings back to the +process-global runtime state instead of the per-interpreter state (at least +for now). diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 14449bce70839f..12219eff0a6d86 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -233,6 +233,9 @@ static int unicode_is_singleton(PyObject *unicode); #endif +#define IDENTIFIERS _Py_SINGLETON(unicode_ids) +#define INTERNED _Py_CACHED_OBJECT(unicode_interned) + static struct _Py_unicode_state* get_unicode_state(void) { @@ -1950,7 +1953,6 @@ unicode_dealloc(PyObject *unicode) case SSTATE_INTERNED_MORTAL: { - struct _Py_unicode_state *state = get_unicode_state(); /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1958,7 +1960,7 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(state->interned, unicode) != 0) { + if (PyDict_DelItem(INTERNED, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } @@ -2331,30 +2333,25 @@ PyUnicode_FromString(const char *u) PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _Py_unicode_ids *ids = &interp->unicode.ids; - Py_ssize_t index = _Py_atomic_size_get(&id->index); if (index < 0) { - struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; - - PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + PyThread_acquire_lock(IDENTIFIERS.lock, WAIT_LOCK); // Check again to detect concurrent access. Another thread can have // initialized the index while this thread waited for the lock. index = _Py_atomic_size_get(&id->index); if (index < 0) { - assert(rt_ids->next_index < PY_SSIZE_T_MAX); - index = rt_ids->next_index; - rt_ids->next_index++; + assert(IDENTIFIERS.next_index < PY_SSIZE_T_MAX); + index = IDENTIFIERS.next_index; + IDENTIFIERS.next_index++; _Py_atomic_size_set(&id->index, index); } - PyThread_release_lock(rt_ids->lock); + PyThread_release_lock(IDENTIFIERS.lock); } assert(index >= 0); PyObject *obj; - if (index < ids->size) { - obj = ids->array[index]; + if (index < IDENTIFIERS.size) { + obj = IDENTIFIERS.array[index]; if (obj) { // Return a borrowed reference return obj; @@ -2368,22 +2365,22 @@ _PyUnicode_FromId(_Py_Identifier *id) } PyUnicode_InternInPlace(&obj); - if (index >= ids->size) { + if (index >= IDENTIFIERS.size) { // Overallocate to reduce the number of realloc Py_ssize_t new_size = Py_MAX(index * 2, 16); - Py_ssize_t item_size = sizeof(ids->array[0]); - PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); + Py_ssize_t item_size = sizeof(IDENTIFIERS.array[0]); + PyObject **new_array = PyMem_Realloc(IDENTIFIERS.array, new_size * item_size); if (new_array == NULL) { PyErr_NoMemory(); return NULL; } - memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); - ids->array = new_array; - ids->size = new_size; + memset(&new_array[IDENTIFIERS.size], 0, (new_size - IDENTIFIERS.size) * item_size); + IDENTIFIERS.array = new_array; + IDENTIFIERS.size = new_size; } // The array stores a strong reference - ids->array[index] = obj; + IDENTIFIERS.array[index] = obj; // Return a borrowed reference return obj; @@ -2391,15 +2388,17 @@ _PyUnicode_FromId(_Py_Identifier *id) static void -unicode_clear_identifiers(struct _Py_unicode_state *state) +unicode_clear_identifiers(PyInterpreterState *interp) { - struct _Py_unicode_ids *ids = &state->ids; - for (Py_ssize_t i=0; i < ids->size; i++) { - Py_XDECREF(ids->array[i]); + if (!_Py_IsMainInterpreter(interp)) { + return; + } + for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) { + Py_XDECREF(IDENTIFIERS.array[i]); } - ids->size = 0; - PyMem_Free(ids->array); - ids->array = NULL; + IDENTIFIERS.size = 0; + PyMem_Free(IDENTIFIERS.array); + IDENTIFIERS.array = NULL; // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid // after Py_Finalize(). } @@ -15596,16 +15595,15 @@ PyUnicode_InternInPlace(PyObject **p) return; } - struct _Py_unicode_state *state = get_unicode_state(); - if (state->interned == NULL) { - state->interned = PyDict_New(); - if (state->interned == NULL) { + if (INTERNED == NULL) { + INTERNED = PyDict_New(); + if (INTERNED == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - PyObject *t = PyDict_SetDefault(state->interned, s, s); + PyObject *t = PyDict_SetDefault(INTERNED, s, s); if (t == NULL) { PyErr_Clear(); return; @@ -15658,11 +15656,13 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - struct _Py_unicode_state *state = &interp->unicode; - if (state->interned == NULL) { + if (!_Py_IsMainInterpreter(interp)) { return; } - assert(PyDict_CheckExact(state->interned)); + if (INTERNED == NULL) { + return; + } + assert(PyDict_CheckExact(INTERNED)); /* Interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the @@ -15670,13 +15670,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", - PyDict_GET_SIZE(state->interned)); + PyDict_GET_SIZE(INTERNED)); Py_ssize_t immortal_size = 0, mortal_size = 0; #endif Py_ssize_t pos = 0; PyObject *s, *ignored_value; - while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { + while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); switch (PyUnicode_CHECK_INTERNED(s)) { @@ -15707,8 +15707,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) mortal_size, immortal_size); #endif - PyDict_Clear(state->interned); - Py_CLEAR(state->interned); + PyDict_Clear(INTERNED); + Py_CLEAR(INTERNED); } @@ -16079,8 +16079,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) static inline int unicode_is_finalizing(void) { - struct _Py_unicode_state *state = get_unicode_state(); - return (state->interned == NULL); + return (INTERNED == NULL); } #endif @@ -16091,11 +16090,11 @@ _PyUnicode_Fini(PyInterpreterState *interp) struct _Py_unicode_state *state = &interp->unicode; // _PyUnicode_ClearInterned() must be called before - assert(state->interned == NULL); + assert(INTERNED == NULL || !_Py_IsMainInterpreter(interp)); _PyUnicode_FiniEncodings(&state->fs_codec); - unicode_clear_identifiers(state); + unicode_clear_identifiers(interp); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); diff --git a/Python/pystate.c b/Python/pystate.c index 463b248f22336e..709c621087a937 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -120,8 +120,9 @@ init_runtime(_PyRuntimeState *runtime, // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); - runtime->unicode_ids.next_index = unicode_next_index; - runtime->unicode_ids.lock = unicode_ids_mutex; + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + ids->next_index = unicode_next_index; + ids->lock = unicode_ids_mutex; runtime->_initialized = 1; } @@ -137,7 +138,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) _Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head; // bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize() // is called multiple times. - Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index; + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + Py_ssize_t unicode_next_index = ids->next_index; PyThread_type_lock lock1, lock2, lock3; if (alloc_for_runtime(&lock1, &lock2, &lock3) != 0) { @@ -164,7 +166,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) FREE_LOCK(runtime->interpreters.mutex); FREE_LOCK(runtime->xidregistry.mutex); - FREE_LOCK(runtime->unicode_ids.lock); + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + FREE_LOCK(ids->lock); #undef FREE_LOCK PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); @@ -186,7 +189,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex); int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex); - int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock); + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + int reinit_unicode_ids = _PyThread_at_fork_reinit(&ids->lock); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy