From e79afd9acda243ecbc6927a2d233cef8e9ffc164 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 15 Dec 2021 17:54:09 -0700 Subject: [PATCH 1/5] Move the unicode identifiers (_Py_Identifer) to _PyRuntimeState. --- Include/internal/pycore_global_objects.h | 11 ++++++ Include/internal/pycore_runtime.h | 2 - Include/internal/pycore_unicodeobject.h | 15 ------- Objects/unicodeobject.c | 50 +++++++++++------------- Python/pystate.c | 14 ++++--- 5 files changed, 43 insertions(+), 49 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 6cae3bca6be45a..0862238598b293 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -54,6 +54,17 @@ struct _Py_global_objects { * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). */ PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + + /* Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() */ + struct _Py_unicode_ids { + PyThread_type_lock lock; + // next_index value must be preserved when Py_Initialize()/Py_Finalize() + // is called multiple times: see _PyUnicode_FromId() implementation. + Py_ssize_t next_index; + + Py_ssize_t size; + PyObject **array; + } unicode_ids; } singletons; }; diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 725c859ea7853d..d3b2b59b85e3b8 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -116,8 +116,6 @@ typedef struct pyruntimestate { void *open_code_userdata; _Py_AuditHookEntry *audit_hook_head; - struct _Py_unicode_runtime_ids unicode_ids; - struct _Py_global_objects global_objects; // If anything gets added after global_objects then // _PyRuntimeState_reset() needs to get updated to clear it. diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index c50c42011a9349..1ad751d180a181 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -19,13 +19,6 @@ extern void _PyUnicode_Fini(PyInterpreterState *); /* other API */ -struct _Py_unicode_runtime_ids { - PyThread_type_lock lock; - // next_index value must be preserved when Py_Initialize()/Py_Finalize() - // is called multiple times: see _PyUnicode_FromId() implementation. - Py_ssize_t next_index; -}; - /* fs_codec.encoding is initialized to NULL. Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */ struct _Py_unicode_fs_codec { @@ -35,11 +28,6 @@ struct _Py_unicode_fs_codec { _Py_error_handler error_handler; }; -struct _Py_unicode_ids { - Py_ssize_t size; - PyObject **array; -}; - struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. PyObject *empty_string; @@ -57,9 +45,6 @@ struct _Py_unicode_state { count of a string is: s->ob_refcnt + (s->state ? 2 : 0) */ PyObject *interned; - - // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() - struct _Py_unicode_ids ids; }; extern void _PyUnicode_ClearInterned(PyInterpreterState *); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 14449bce70839f..fada8c0955ac4a 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -233,6 +233,8 @@ static int unicode_is_singleton(PyObject *unicode); #endif +#define IDENTIFIERS _Py_SINGLETON(unicode_ids) + static struct _Py_unicode_state* get_unicode_state(void) { @@ -2331,30 +2333,25 @@ PyUnicode_FromString(const char *u) PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _Py_unicode_ids *ids = &interp->unicode.ids; - Py_ssize_t index = _Py_atomic_size_get(&id->index); if (index < 0) { - struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; - - PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + PyThread_acquire_lock(IDENTIFIERS.lock, WAIT_LOCK); // Check again to detect concurrent access. Another thread can have // initialized the index while this thread waited for the lock. index = _Py_atomic_size_get(&id->index); if (index < 0) { - assert(rt_ids->next_index < PY_SSIZE_T_MAX); - index = rt_ids->next_index; - rt_ids->next_index++; + assert(IDENTIFIERS.next_index < PY_SSIZE_T_MAX); + index = IDENTIFIERS.next_index; + IDENTIFIERS.next_index++; _Py_atomic_size_set(&id->index, index); } - PyThread_release_lock(rt_ids->lock); + PyThread_release_lock(IDENTIFIERS.lock); } assert(index >= 0); PyObject *obj; - if (index < ids->size) { - obj = ids->array[index]; + if (index < IDENTIFIERS.size) { + obj = IDENTIFIERS.array[index]; if (obj) { // Return a borrowed reference return obj; @@ -2368,22 +2365,22 @@ _PyUnicode_FromId(_Py_Identifier *id) } PyUnicode_InternInPlace(&obj); - if (index >= ids->size) { + if (index >= IDENTIFIERS.size) { // Overallocate to reduce the number of realloc Py_ssize_t new_size = Py_MAX(index * 2, 16); - Py_ssize_t item_size = sizeof(ids->array[0]); - PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); + Py_ssize_t item_size = sizeof(IDENTIFIERS.array[0]); + PyObject **new_array = PyMem_Realloc(IDENTIFIERS.array, new_size * item_size); if (new_array == NULL) { PyErr_NoMemory(); return NULL; } - memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); - ids->array = new_array; - ids->size = new_size; + memset(&new_array[IDENTIFIERS.size], 0, (new_size - IDENTIFIERS.size) * item_size); + IDENTIFIERS.array = new_array; + IDENTIFIERS.size = new_size; } // The array stores a strong reference - ids->array[index] = obj; + IDENTIFIERS.array[index] = obj; // Return a borrowed reference return obj; @@ -2391,15 +2388,14 @@ _PyUnicode_FromId(_Py_Identifier *id) static void -unicode_clear_identifiers(struct _Py_unicode_state *state) +unicode_clear_identifiers(void) { - struct _Py_unicode_ids *ids = &state->ids; - for (Py_ssize_t i=0; i < ids->size; i++) { - Py_XDECREF(ids->array[i]); + for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) { + Py_XDECREF(IDENTIFIERS.array[i]); } - ids->size = 0; - PyMem_Free(ids->array); - ids->array = NULL; + IDENTIFIERS.size = 0; + PyMem_Free(IDENTIFIERS.array); + IDENTIFIERS.array = NULL; // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid // after Py_Finalize(). } @@ -16095,7 +16091,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); - unicode_clear_identifiers(state); + unicode_clear_identifiers(); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); diff --git a/Python/pystate.c b/Python/pystate.c index 463b248f22336e..709c621087a937 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -120,8 +120,9 @@ init_runtime(_PyRuntimeState *runtime, // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); - runtime->unicode_ids.next_index = unicode_next_index; - runtime->unicode_ids.lock = unicode_ids_mutex; + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + ids->next_index = unicode_next_index; + ids->lock = unicode_ids_mutex; runtime->_initialized = 1; } @@ -137,7 +138,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime) _Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head; // bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize() // is called multiple times. - Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index; + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + Py_ssize_t unicode_next_index = ids->next_index; PyThread_type_lock lock1, lock2, lock3; if (alloc_for_runtime(&lock1, &lock2, &lock3) != 0) { @@ -164,7 +166,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) FREE_LOCK(runtime->interpreters.mutex); FREE_LOCK(runtime->xidregistry.mutex); - FREE_LOCK(runtime->unicode_ids.lock); + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + FREE_LOCK(ids->lock); #undef FREE_LOCK PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); @@ -186,7 +189,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex); int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex); - int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock); + struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids; + int reinit_unicode_ids = _PyThread_at_fork_reinit(&ids->lock); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); From 7c6c441b294b312d38df68de330c14bd17b406e3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 15 Dec 2021 18:02:45 -0700 Subject: [PATCH 2/5] Move the interned strings to _PyRuntimeState. --- Include/internal/pycore_global_objects.h | 13 ++++++++++ Include/internal/pycore_unicodeobject.h | 10 -------- Objects/unicodeobject.c | 31 +++++++++++------------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 0862238598b293..2122c9bf598716 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -45,6 +45,8 @@ extern "C" { _PyRuntime.global_objects.NAME #define _Py_SINGLETON(NAME) \ _Py_GLOBAL_OBJECT(singletons.NAME) +#define _Py_CACHED_OBJECT(NAME) \ + _Py_GLOBAL_OBJECT(cached.NAME) struct _Py_global_objects { struct { @@ -66,6 +68,17 @@ struct _Py_global_objects { PyObject **array; } unicode_ids; } singletons; + struct { + /* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) + */ + PyObject *unicode_interned; + } cached; }; #define _Py_global_objects_INIT { \ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h index 1ad751d180a181..7551fe5d468b40 100644 --- a/Include/internal/pycore_unicodeobject.h +++ b/Include/internal/pycore_unicodeobject.h @@ -35,16 +35,6 @@ struct _Py_unicode_state { shared as well. */ PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; - - /* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) - */ - PyObject *interned; }; extern void _PyUnicode_ClearInterned(PyInterpreterState *); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index fada8c0955ac4a..8ddfa9e502b576 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -234,6 +234,7 @@ static int unicode_is_singleton(PyObject *unicode); #define IDENTIFIERS _Py_SINGLETON(unicode_ids) +#define INTERNED _Py_CACHED_OBJECT(unicode_interned) static struct _Py_unicode_state* get_unicode_state(void) @@ -1952,7 +1953,6 @@ unicode_dealloc(PyObject *unicode) case SSTATE_INTERNED_MORTAL: { - struct _Py_unicode_state *state = get_unicode_state(); /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1960,7 +1960,7 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(state->interned, unicode) != 0) { + if (PyDict_DelItem(INTERNED, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } @@ -15592,16 +15592,15 @@ PyUnicode_InternInPlace(PyObject **p) return; } - struct _Py_unicode_state *state = get_unicode_state(); - if (state->interned == NULL) { - state->interned = PyDict_New(); - if (state->interned == NULL) { + if (INTERNED == NULL) { + INTERNED = PyDict_New(); + if (INTERNED == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - PyObject *t = PyDict_SetDefault(state->interned, s, s); + PyObject *t = PyDict_SetDefault(INTERNED, s, s); if (t == NULL) { PyErr_Clear(); return; @@ -15654,11 +15653,10 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - struct _Py_unicode_state *state = &interp->unicode; - if (state->interned == NULL) { + if (INTERNED == NULL) { return; } - assert(PyDict_CheckExact(state->interned)); + assert(PyDict_CheckExact(INTERNED)); /* Interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the @@ -15666,13 +15664,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", - PyDict_GET_SIZE(state->interned)); + PyDict_GET_SIZE(INTERNED)); Py_ssize_t immortal_size = 0, mortal_size = 0; #endif Py_ssize_t pos = 0; PyObject *s, *ignored_value; - while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { + while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); switch (PyUnicode_CHECK_INTERNED(s)) { @@ -15703,8 +15701,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) mortal_size, immortal_size); #endif - PyDict_Clear(state->interned); - Py_CLEAR(state->interned); + PyDict_Clear(INTERNED); + Py_CLEAR(INTERNED); } @@ -16075,8 +16073,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) static inline int unicode_is_finalizing(void) { - struct _Py_unicode_state *state = get_unicode_state(); - return (state->interned == NULL); + return (INTERNED == NULL); } #endif @@ -16087,7 +16084,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) struct _Py_unicode_state *state = &interp->unicode; // _PyUnicode_ClearInterned() must be called before - assert(state->interned == NULL); + assert(INTERNED == NULL); _PyUnicode_FiniEncodings(&state->fs_codec); From 58d9c0beef998ce8f320ed33700e8a54222e9f44 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Dec 2021 17:22:23 -0700 Subject: [PATCH 3/5] Only clean up it it's the main interpreter. --- Objects/unicodeobject.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8ddfa9e502b576..c2045e37515542 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2388,8 +2388,11 @@ _PyUnicode_FromId(_Py_Identifier *id) static void -unicode_clear_identifiers(void) +unicode_clear_identifiers(PyInterpreterState *interp) { + if (!_Py_IsMainInterpreter(interp)) { + return; + } for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) { Py_XDECREF(IDENTIFIERS.array[i]); } @@ -15653,6 +15656,9 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { + if (!_Py_IsMainInterpreter(interp)) { + return; + } if (INTERNED == NULL) { return; } @@ -16088,7 +16094,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) _PyUnicode_FiniEncodings(&state->fs_codec); - unicode_clear_identifiers(); + unicode_clear_identifiers(interp); for (Py_ssize_t i = 0; i < 256; i++) { Py_CLEAR(state->latin1[i]); From 01f19dc3582f4cffb382a4c495ca34412796176b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Dec 2021 17:26:26 -0700 Subject: [PATCH 4/5] Add a NEWS entry. --- .../Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst new file mode 100644 index 00000000000000..702ca75982699c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-12-16-17-26-17.bpo-46006.vAP3Et.rst @@ -0,0 +1,3 @@ +Move the interned strings and Py_IDENTIFIER strings back to the +process-global runtime state instead of the per-interpreter state (at least +for now). From b60bd334c908cdedd11270775f24ae904d9decde Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 17 Dec 2021 09:51:34 -0700 Subject: [PATCH 5/5] Only expect NULL if the main interpreter. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c2045e37515542..12219eff0a6d86 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16090,7 +16090,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) struct _Py_unicode_state *state = &interp->unicode; // _PyUnicode_ClearInterned() must be called before - assert(INTERNED == NULL); + assert(INTERNED == NULL || !_Py_IsMainInterpreter(interp)); _PyUnicode_FiniEncodings(&state->fs_codec); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy