From 2b0acbbd90b3ea1d80d8ec3d3e71f4ad3a111039 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 May 2020 01:52:54 +0200 Subject: [PATCH] bpo-39465: _PyUnicode_FromId() now uses an hash table Rewrote _Py_Identifier structure and _PyUnicode_FromId() function to store Python objects in an hash table rather than a single-linked list. Add _PyUnicode_PreInit() to create the hash table: it must be called before the first PyType_Ready() call. --- Include/cpython/object.h | 4 +- Include/internal/pycore_pylifecycle.h | 1 + Objects/unicodeobject.c | 67 +++++++++++++++++++-------- Python/pylifecycle.c | 5 ++ 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/Include/cpython/object.h b/Include/cpython/object.h index 8bf05a32711835..1e294dc898290d 100644 --- a/Include/cpython/object.h +++ b/Include/cpython/object.h @@ -43,12 +43,10 @@ PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. */ typedef struct _Py_Identifier { - struct _Py_Identifier *next; const char* string; - PyObject *object; } _Py_Identifier; -#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } +#define _Py_static_string_init(value) { .string = value} #define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) #define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 77ea3f27454da0..db96ff6e3515b2 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -31,6 +31,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); /* Various one-time initializers */ +extern PyStatus _PyUnicode_PreInit(PyThreadState *tstate); extern PyStatus _PyUnicode_Init(void); extern int _PyStructSequence_Init(void); extern int _PyLong_Init(PyThreadState *tstate); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 826298c23a924c..e2c8d0f9c40c34 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -43,6 +43,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_bytes_methods.h" #include "pycore_fileutils.h" +#include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_object.h" @@ -286,7 +287,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size, Py_ssize_t *consumed); /* List of static strings. */ -static _Py_Identifier *static_strings = NULL; +static _Py_hashtable_t *static_strings = NULL; /* bpo-40521: Latin1 singletons are shared by all interpreters. */ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS @@ -2275,31 +2276,43 @@ PyUnicode_FromString(const char *u) PyObject * _PyUnicode_FromId(_Py_Identifier *id) { - if (!id->object) { - id->object = PyUnicode_DecodeUTF8Stateful(id->string, - strlen(id->string), - NULL, NULL); - if (!id->object) - return NULL; - PyUnicode_InternInPlace(&id->object); - assert(!id->next); - id->next = static_strings; - static_strings = id; + PyObject *object = _Py_hashtable_get(static_strings, id); + if (object) { + // Return a borrowed reference + return object; + } + + object = PyUnicode_DecodeUTF8Stateful(id->string, strlen(id->string), + NULL, NULL); + if (object == NULL) { + return NULL; } - return id->object; + PyUnicode_InternInPlace(&object); + + // Store a strong reference + if (_Py_hashtable_set(static_strings, id, object) < 0) { + PyErr_NoMemory(); + return NULL; + } + + // Return a borrowed reference + return object; +} + +static void +static_strings_decref(void *data) +{ + PyObject *object = (PyObject *)data; + Py_DECREF(object); } void _PyUnicode_ClearStaticStrings() { - _Py_Identifier *tmp, *s = static_strings; - while (s) { - Py_CLEAR(s->object); - tmp = s->next; - s->next = NULL; - s = tmp; + if (static_strings) { + _Py_hashtable_destroy(static_strings); + static_strings = NULL; } - static_strings = NULL; } /* Internal function, doesn't check maximum character */ @@ -15509,6 +15522,22 @@ PyTypeObject PyUnicode_Type = { /* Initialize the Unicode implementation */ +PyStatus +_PyUnicode_PreInit(PyThreadState *tstate) +{ + if (_Py_IsMainInterpreter(tstate)) { + static_strings = _Py_hashtable_new_full(_Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, static_strings_decref, + NULL); + if (static_strings == NULL) { + return _PyStatus_NO_MEMORY(); + } + } + return _PyStatus_OK(); +} + + PyStatus _PyUnicode_Init(void) { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index da66a82ada70a8..b4cec67c987b20 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -579,6 +579,11 @@ pycore_init_types(PyThreadState *tstate) return status; } + status = _PyUnicode_PreInit(tstate); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + if (is_main_interp) { status = _PyTypes_Init(); if (_PyStatus_EXCEPTION(status)) { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy