diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 19c066b0ab78c7..86889f857689c4 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -35,12 +35,13 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. */ typedef struct _Py_Identifier { - struct _Py_Identifier *next; const char* string; - PyObject *object; + // Index in PyInterpreterState.unicode.ids.array. It is process-wide + // unique and must be initialized to -1. + Py_ssize_t index; } _Py_Identifier; -#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } +#define _Py_static_string_init(value) { .string = value, .index = -1 } #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 9ec5358b5e36e5..8c61802545253d 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -64,6 +64,11 @@ struct _Py_bytes_state { PyBytesObject *characters[256]; }; +struct _Py_unicode_ids { + Py_ssize_t size; + PyObject **array; +}; + struct _Py_unicode_state { // The empty Unicode object is a singleton to improve performance. PyObject *empty_string; @@ -71,6 +76,8 @@ struct _Py_unicode_state { shared as well. */ PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; + // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() + struct _Py_unicode_ids ids; }; struct _Py_float_state { diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 3a01d64e63d811..8c54abbb0694a3 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -49,6 +49,11 @@ typedef struct _Py_AuditHookEntry { void *userData; } _Py_AuditHookEntry; +struct _Py_unicode_runtime_ids { + PyThread_type_lock lock; + Py_ssize_t next_index; +}; + /* Full Python runtime state */ typedef struct pyruntimestate { @@ -106,6 +111,8 @@ typedef struct pyruntimestate { void *open_code_userdata; _Py_AuditHookEntry *audit_hook_head; + struct _Py_unicode_runtime_ids unicode_ids; + // XXX Consolidate globals found via the check-c-globals script. } _PyRuntimeState; diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst b/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst new file mode 100644 index 00000000000000..c29b5e1757f0c6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-05-13-18-50-27.bpo-39465.j7nl6A.rst @@ -0,0 +1,3 @@ +Make :c:func:`_PyUnicode_FromId` function compatible with subinterpreters. +Each interpreter now has an array of identifier objects (interned strings +decoded from UTF-8). Patch by Victor Stinner. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 409355534a2ce1..3238d1e692a517 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() +#include "pycore_atomic_funcs.h" // _Py_atomic_size_get() #include "pycore_bytes_methods.h" // _Py_bytes_lower() #include "pycore_format.h" // F_LJUST #include "pycore_initconfig.h" // _PyStatus_OK() @@ -302,9 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, _Py_error_handler error_handler, const char *errors, Py_ssize_t *consumed); -/* List of static strings. */ -static _Py_Identifier *static_strings = NULL; - /* Fast detection of the most frequent whitespace characters */ const unsigned char _Py_ascii_whitespace[] = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -2312,42 +2310,85 @@ PyUnicode_FromString(const char *u) return PyUnicode_DecodeUTF8Stateful(u, (Py_ssize_t)size, NULL, NULL); } + PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - if (id->object) { - return id->object; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_unicode_ids *ids = &interp->unicode.ids; + + int index = _Py_atomic_size_get(&id->index); + if (index < 0) { + struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids; + + PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK); + // Check again to detect concurrent access. Another thread can have + // initialized the index while this thread waited for the lock. + index = _Py_atomic_size_get(&id->index); + if (index < 0) { + assert(rt_ids->next_index < PY_SSIZE_T_MAX); + index = rt_ids->next_index; + rt_ids->next_index++; + _Py_atomic_size_set(&id->index, index); + } + PyThread_release_lock(rt_ids->lock); } + assert(index >= 0); PyObject *obj; - obj = PyUnicode_DecodeUTF8Stateful(id->string, - strlen(id->string), + if (index < ids->size) { + obj = ids->array[index]; + if (obj) { + // Return a borrowed reference + return obj; + } + } + + obj = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), NULL, NULL); if (!obj) { return NULL; } PyUnicode_InternInPlace(&obj); - assert(!id->next); - id->object = obj; - id->next = static_strings; - static_strings = id; - return id->object; + if (index >= ids->size) { + // Overallocate to reduce the number of realloc + Py_ssize_t new_size = Py_MAX(index * 2, 16); + Py_ssize_t item_size = sizeof(ids->array[0]); + PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size); + if (new_array == NULL) { + PyErr_NoMemory(); + return NULL; + } + memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size); + ids->array = new_array; + ids->size = new_size; + } + + // The array stores a strong reference + ids->array[index] = obj; + + // Return a borrowed reference + return obj; } + static void -unicode_clear_static_strings(void) +unicode_clear_identifiers(PyThreadState *tstate) { - _Py_Identifier *tmp, *s = static_strings; - while (s) { - Py_CLEAR(s->object); - tmp = s->next; - s->next = NULL; - s = tmp; + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct _Py_unicode_ids *ids = &interp->unicode.ids; + for (Py_ssize_t i=0; i < ids->size; i++) { + Py_XDECREF(ids->array[i]); } - static_strings = NULL; + ids->size = 0; + PyMem_Free(ids->array); + ids->array = NULL; + // Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid + // after Py_Finalize(). } + /* Internal function, doesn't check maximum character */ PyObject* @@ -16238,9 +16279,7 @@ _PyUnicode_Fini(PyThreadState *tstate) Py_CLEAR(state->latin1[i]); } - if (_Py_IsMainInterpreter(tstate)) { - unicode_clear_static_strings(); - } + unicode_clear_identifiers(tstate); _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec); } diff --git a/Python/pystate.c b/Python/pystate.c index ead25b08d7a83a..231144b082861f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -73,18 +73,24 @@ _PyRuntimeState_Init_impl(_PyRuntimeState *runtime) runtime->interpreters.mutex = PyThread_allocate_lock(); if (runtime->interpreters.mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for interpreter"); + return _PyStatus_NO_MEMORY(); } runtime->interpreters.next_id = -1; runtime->xidregistry.mutex = PyThread_allocate_lock(); if (runtime->xidregistry.mutex == NULL) { - return _PyStatus_ERR("Can't initialize threads for cross-interpreter data registry"); + return _PyStatus_NO_MEMORY(); } // Set it to the ID of the main thread of the main interpreter. runtime->main_thread = PyThread_get_thread_ident(); + runtime->unicode_ids.lock = PyThread_allocate_lock(); + if (runtime->unicode_ids.lock == NULL) { + return _PyStatus_NO_MEMORY(); + } + runtime->unicode_ids.next_index = 0; + return _PyStatus_OK(); } @@ -108,17 +114,17 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime) /* Force the allocator used by _PyRuntimeState_Init(). */ PyMemAllocatorEx old_alloc; _PyMem_SetDefaultAllocator(PYMEM_DOMAIN_RAW, &old_alloc); - - if (runtime->interpreters.mutex != NULL) { - PyThread_free_lock(runtime->interpreters.mutex); - runtime->interpreters.mutex = NULL; +#define FREE_LOCK(LOCK) \ + if (LOCK != NULL) { \ + PyThread_free_lock(LOCK); \ + LOCK = NULL; \ } - if (runtime->xidregistry.mutex != NULL) { - PyThread_free_lock(runtime->xidregistry.mutex); - runtime->xidregistry.mutex = NULL; - } + FREE_LOCK(runtime->interpreters.mutex); + FREE_LOCK(runtime->xidregistry.mutex); + FREE_LOCK(runtime->unicode_ids.lock); +#undef FREE_LOCK PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); } @@ -139,12 +145,14 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex); int reinit_main_id = _PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex); int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex); + int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock); PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc); if (reinit_interp < 0 || reinit_main_id < 0 - || reinit_xidregistry < 0) + || reinit_xidregistry < 0 + || reinit_unicode_ids < 0) { return _PyStatus_ERR("Failed to reinitialize runtime locks");
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: