Skip to content

bpo-46006: Move the interned strings and identifiers to _PyRuntimeState. #30131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Move the unicode identifiers (_Py_Identifer) to _PyRuntimeState.
  • Loading branch information
ericsnowcurrently committed Dec 16, 2021
commit e79afd9acda243ecbc6927a2d233cef8e9ffc164
11 changes: 11 additions & 0 deletions Include/internal/pycore_global_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ struct _Py_global_objects {
* -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
*/
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];

/* Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() */
struct _Py_unicode_ids {
PyThread_type_lock lock;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this lock unnecessary? The GIL is held whenever an identifier is used, isn't it?

// next_index value must be preserved when Py_Initialize()/Py_Finalize()
// is called multiple times: see _PyUnicode_FromId() implementation.
Py_ssize_t next_index;

Py_ssize_t size;
PyObject **array;
} unicode_ids;
} singletons;
};

Expand Down
2 changes: 0 additions & 2 deletions Include/internal/pycore_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ typedef struct pyruntimestate {
void *open_code_userdata;
_Py_AuditHookEntry *audit_hook_head;

struct _Py_unicode_runtime_ids unicode_ids;

struct _Py_global_objects global_objects;
// If anything gets added after global_objects then
// _PyRuntimeState_reset() needs to get updated to clear it.
Expand Down
15 changes: 0 additions & 15 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,6 @@ extern void _PyUnicode_Fini(PyInterpreterState *);

/* other API */

struct _Py_unicode_runtime_ids {
PyThread_type_lock lock;
// next_index value must be preserved when Py_Initialize()/Py_Finalize()
// is called multiple times: see _PyUnicode_FromId() implementation.
Py_ssize_t next_index;
};

/* fs_codec.encoding is initialized to NULL.
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct _Py_unicode_fs_codec {
Expand All @@ -35,11 +28,6 @@ struct _Py_unicode_fs_codec {
_Py_error_handler error_handler;
};

struct _Py_unicode_ids {
Py_ssize_t size;
PyObject **array;
};

struct _Py_unicode_state {
// The empty Unicode object is a singleton to improve performance.
PyObject *empty_string;
Expand All @@ -57,9 +45,6 @@ struct _Py_unicode_state {
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
PyObject *interned;

// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
struct _Py_unicode_ids ids;
};

extern void _PyUnicode_ClearInterned(PyInterpreterState *);
Expand Down
50 changes: 23 additions & 27 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,8 @@ static int unicode_is_singleton(PyObject *unicode);
#endif


#define IDENTIFIERS _Py_SINGLETON(unicode_ids)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you drop the IDENTIFIERS and INTERNED macros. They impair readability.
You can leave _Py_SINGLETON as it conveys some meaning.


static struct _Py_unicode_state*
get_unicode_state(void)
{
Expand Down Expand Up @@ -2331,30 +2333,25 @@ PyUnicode_FromString(const char *u)
PyObject *
_PyUnicode_FromId(_Py_Identifier *id)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
struct _Py_unicode_ids *ids = &interp->unicode.ids;

Py_ssize_t index = _Py_atomic_size_get(&id->index);
if (index < 0) {
struct _Py_unicode_runtime_ids *rt_ids = &interp->runtime->unicode_ids;

PyThread_acquire_lock(rt_ids->lock, WAIT_LOCK);
PyThread_acquire_lock(IDENTIFIERS.lock, WAIT_LOCK);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this, and assert that the GIL is held?

// Check again to detect concurrent access. Another thread can have
// initialized the index while this thread waited for the lock.
index = _Py_atomic_size_get(&id->index);
if (index < 0) {
assert(rt_ids->next_index < PY_SSIZE_T_MAX);
index = rt_ids->next_index;
rt_ids->next_index++;
assert(IDENTIFIERS.next_index < PY_SSIZE_T_MAX);
index = IDENTIFIERS.next_index;
IDENTIFIERS.next_index++;
_Py_atomic_size_set(&id->index, index);
}
PyThread_release_lock(rt_ids->lock);
PyThread_release_lock(IDENTIFIERS.lock);
}
assert(index >= 0);

PyObject *obj;
if (index < ids->size) {
obj = ids->array[index];
if (index < IDENTIFIERS.size) {
obj = IDENTIFIERS.array[index];
if (obj) {
// Return a borrowed reference
return obj;
Expand All @@ -2368,38 +2365,37 @@ _PyUnicode_FromId(_Py_Identifier *id)
}
PyUnicode_InternInPlace(&obj);

if (index >= ids->size) {
if (index >= IDENTIFIERS.size) {
// Overallocate to reduce the number of realloc
Py_ssize_t new_size = Py_MAX(index * 2, 16);
Py_ssize_t item_size = sizeof(ids->array[0]);
PyObject **new_array = PyMem_Realloc(ids->array, new_size * item_size);
Py_ssize_t item_size = sizeof(IDENTIFIERS.array[0]);
PyObject **new_array = PyMem_Realloc(IDENTIFIERS.array, new_size * item_size);
if (new_array == NULL) {
PyErr_NoMemory();
return NULL;
}
memset(&new_array[ids->size], 0, (new_size - ids->size) * item_size);
ids->array = new_array;
ids->size = new_size;
memset(&new_array[IDENTIFIERS.size], 0, (new_size - IDENTIFIERS.size) * item_size);
IDENTIFIERS.array = new_array;
IDENTIFIERS.size = new_size;
}

// The array stores a strong reference
ids->array[index] = obj;
IDENTIFIERS.array[index] = obj;

// Return a borrowed reference
return obj;
}


static void
unicode_clear_identifiers(struct _Py_unicode_state *state)
unicode_clear_identifiers(void)
{
struct _Py_unicode_ids *ids = &state->ids;
for (Py_ssize_t i=0; i < ids->size; i++) {
Py_XDECREF(ids->array[i]);
for (Py_ssize_t i=0; i < IDENTIFIERS.size; i++) {
Py_XDECREF(IDENTIFIERS.array[i]);
}
ids->size = 0;
PyMem_Free(ids->array);
ids->array = NULL;
IDENTIFIERS.size = 0;
PyMem_Free(IDENTIFIERS.array);
IDENTIFIERS.array = NULL;
// Don't reset _PyRuntime next_index: _Py_Identifier.id remains valid
// after Py_Finalize().
}
Expand Down Expand Up @@ -16095,7 +16091,7 @@ _PyUnicode_Fini(PyInterpreterState *interp)

_PyUnicode_FiniEncodings(&state->fs_codec);

unicode_clear_identifiers(state);
unicode_clear_identifiers();

for (Py_ssize_t i = 0; i < 256; i++) {
Py_CLEAR(state->latin1[i]);
Expand Down
14 changes: 9 additions & 5 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ init_runtime(_PyRuntimeState *runtime,
// Set it to the ID of the main thread of the main interpreter.
runtime->main_thread = PyThread_get_thread_ident();

runtime->unicode_ids.next_index = unicode_next_index;
runtime->unicode_ids.lock = unicode_ids_mutex;
struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
ids->next_index = unicode_next_index;
ids->lock = unicode_ids_mutex;

runtime->_initialized = 1;
}
Expand All @@ -137,7 +138,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
_Py_AuditHookEntry *audit_hook_head = runtime->audit_hook_head;
// bpo-42882: Preserve next_index value if Py_Initialize()/Py_Finalize()
// is called multiple times.
Py_ssize_t unicode_next_index = runtime->unicode_ids.next_index;
struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
Py_ssize_t unicode_next_index = ids->next_index;

PyThread_type_lock lock1, lock2, lock3;
if (alloc_for_runtime(&lock1, &lock2, &lock3) != 0) {
Expand All @@ -164,7 +166,8 @@ _PyRuntimeState_Fini(_PyRuntimeState *runtime)

FREE_LOCK(runtime->interpreters.mutex);
FREE_LOCK(runtime->xidregistry.mutex);
FREE_LOCK(runtime->unicode_ids.lock);
struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
FREE_LOCK(ids->lock);

#undef FREE_LOCK
PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
Expand All @@ -186,7 +189,8 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)

int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_ids.lock);
struct _Py_unicode_ids *ids = &runtime->global_objects.singletons.unicode_ids;
int reinit_unicode_ids = _PyThread_at_fork_reinit(&ids->lock);

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy