Skip to content

bpo-46006: Move the interned strings and identifiers to _PyRuntimeState. #30131

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Move the interned strings to _PyRuntimeState.
  • Loading branch information
ericsnowcurrently committed Dec 16, 2021
commit 7c6c441b294b312d38df68de330c14bd17b406e3
13 changes: 13 additions & 0 deletions Include/internal/pycore_global_objects.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ extern "C" {
_PyRuntime.global_objects.NAME
#define _Py_SINGLETON(NAME) \
_Py_GLOBAL_OBJECT(singletons.NAME)
#define _Py_CACHED_OBJECT(NAME) \
_Py_GLOBAL_OBJECT(cached.NAME)

struct _Py_global_objects {
struct {
Expand All @@ -66,6 +68,17 @@ struct _Py_global_objects {
PyObject **array;
} unicode_ids;
} singletons;
struct {
/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.

Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
PyObject *unicode_interned;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use "string" or "str" rather than "unicode". Python 2 is history 🙂

} cached;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please drop this struct.

};

#define _Py_global_objects_INIT { \
Expand Down
10 changes: 0 additions & 10 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,6 @@ struct _Py_unicode_state {
shared as well. */
PyObject *latin1[256];
struct _Py_unicode_fs_codec fs_codec;

/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
function will delete the reference from this dictionary.

Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
PyObject *interned;
};

extern void _PyUnicode_ClearInterned(PyInterpreterState *);
Expand Down
31 changes: 14 additions & 17 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ static int unicode_is_singleton(PyObject *unicode);


#define IDENTIFIERS _Py_SINGLETON(unicode_ids)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you drop the IDENTIFIERS and INTERNED macros. They impair readability.
You can leave _Py_SINGLETON as it conveys some meaning.

#define INTERNED _Py_CACHED_OBJECT(unicode_interned)

static struct _Py_unicode_state*
get_unicode_state(void)
Expand Down Expand Up @@ -1952,15 +1953,14 @@ unicode_dealloc(PyObject *unicode)

case SSTATE_INTERNED_MORTAL:
{
struct _Py_unicode_state *state = get_unicode_state();
/* Revive the dead object temporarily. PyDict_DelItem() removes two
references (key and value) which were ignored by
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
to prevent calling unicode_dealloc() again. Adjust refcnt after
PyDict_DelItem(). */
assert(Py_REFCNT(unicode) == 0);
Py_SET_REFCNT(unicode, 3);
if (PyDict_DelItem(state->interned, unicode) != 0) {
if (PyDict_DelItem(INTERNED, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
Expand Down Expand Up @@ -15592,16 +15592,15 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}

struct _Py_unicode_state *state = get_unicode_state();
if (state->interned == NULL) {
state->interned = PyDict_New();
if (state->interned == NULL) {
if (INTERNED == NULL) {
INTERNED = PyDict_New();
if (INTERNED == NULL) {
PyErr_Clear(); /* Don't leave an exception */
return;
}
}

PyObject *t = PyDict_SetDefault(state->interned, s, s);
PyObject *t = PyDict_SetDefault(INTERNED, s, s);
if (t == NULL) {
PyErr_Clear();
return;
Expand Down Expand Up @@ -15654,25 +15653,24 @@ PyUnicode_InternFromString(const char *cp)
void
_PyUnicode_ClearInterned(PyInterpreterState *interp)
{
struct _Py_unicode_state *state = &interp->unicode;
if (state->interned == NULL) {
if (INTERNED == NULL) {
return;
}
assert(PyDict_CheckExact(state->interned));
assert(PyDict_CheckExact(INTERNED));

/* Interned unicode strings are not forcibly deallocated; rather, we give
them their stolen references back, and then clear and DECREF the
interned dict. */

#ifdef INTERNED_STATS
fprintf(stderr, "releasing %zd interned strings\n",
PyDict_GET_SIZE(state->interned));
PyDict_GET_SIZE(INTERNED));

Py_ssize_t immortal_size = 0, mortal_size = 0;
#endif
Py_ssize_t pos = 0;
PyObject *s, *ignored_value;
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) {
assert(PyUnicode_IS_READY(s));

switch (PyUnicode_CHECK_INTERNED(s)) {
Expand Down Expand Up @@ -15703,8 +15701,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
mortal_size, immortal_size);
#endif

PyDict_Clear(state->interned);
Py_CLEAR(state->interned);
PyDict_Clear(INTERNED);
Py_CLEAR(INTERNED);
}


Expand Down Expand Up @@ -16075,8 +16073,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
static inline int
unicode_is_finalizing(void)
{
struct _Py_unicode_state *state = get_unicode_state();
return (state->interned == NULL);
return (INTERNED == NULL);
}
#endif

Expand All @@ -16087,7 +16084,7 @@ _PyUnicode_Fini(PyInterpreterState *interp)
struct _Py_unicode_state *state = &interp->unicode;

// _PyUnicode_ClearInterned() must be called before
assert(state->interned == NULL);
assert(INTERNED == NULL);

_PyUnicode_FiniEncodings(&state->fs_codec);

Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy