-
-
Notifications
You must be signed in to change notification settings - Fork 32.5k
bpo-46006: Move the interned strings and identifiers to _PyRuntimeState. #30131
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
e79afd9
7c6c441
58d9c0b
01f19dc
b60bd33
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,8 @@ extern "C" { | |
_PyRuntime.global_objects.NAME | ||
#define _Py_SINGLETON(NAME) \ | ||
_Py_GLOBAL_OBJECT(singletons.NAME) | ||
#define _Py_CACHED_OBJECT(NAME) \ | ||
_Py_GLOBAL_OBJECT(cached.NAME) | ||
|
||
struct _Py_global_objects { | ||
struct { | ||
|
@@ -66,6 +68,17 @@ struct _Py_global_objects { | |
PyObject **array; | ||
} unicode_ids; | ||
} singletons; | ||
struct { | ||
/* This dictionary holds all interned unicode strings. Note that references | ||
to strings in this dictionary are *not* counted in the string's ob_refcnt. | ||
When the interned string reaches a refcnt of 0 the string deallocation | ||
function will delete the reference from this dictionary. | ||
|
||
Another way to look at this is that to say that the actual reference | ||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0) | ||
*/ | ||
PyObject *unicode_interned; | ||
} cached; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please drop this struct. |
||
}; | ||
|
||
#define _Py_global_objects_INIT { \ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -234,6 +234,7 @@ static int unicode_is_singleton(PyObject *unicode); | |
|
||
|
||
#define IDENTIFIERS _Py_SINGLETON(unicode_ids) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you drop the |
||
#define INTERNED _Py_CACHED_OBJECT(unicode_interned) | ||
|
||
static struct _Py_unicode_state* | ||
get_unicode_state(void) | ||
|
@@ -1952,15 +1953,14 @@ unicode_dealloc(PyObject *unicode) | |
|
||
case SSTATE_INTERNED_MORTAL: | ||
{ | ||
struct _Py_unicode_state *state = get_unicode_state(); | ||
/* Revive the dead object temporarily. PyDict_DelItem() removes two | ||
references (key and value) which were ignored by | ||
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 | ||
to prevent calling unicode_dealloc() again. Adjust refcnt after | ||
PyDict_DelItem(). */ | ||
assert(Py_REFCNT(unicode) == 0); | ||
Py_SET_REFCNT(unicode, 3); | ||
if (PyDict_DelItem(state->interned, unicode) != 0) { | ||
if (PyDict_DelItem(INTERNED, unicode) != 0) { | ||
_PyErr_WriteUnraisableMsg("deletion of interned string failed", | ||
NULL); | ||
} | ||
|
@@ -15592,16 +15592,15 @@ PyUnicode_InternInPlace(PyObject **p) | |
return; | ||
} | ||
|
||
struct _Py_unicode_state *state = get_unicode_state(); | ||
if (state->interned == NULL) { | ||
state->interned = PyDict_New(); | ||
if (state->interned == NULL) { | ||
if (INTERNED == NULL) { | ||
INTERNED = PyDict_New(); | ||
if (INTERNED == NULL) { | ||
PyErr_Clear(); /* Don't leave an exception */ | ||
return; | ||
} | ||
} | ||
|
||
PyObject *t = PyDict_SetDefault(state->interned, s, s); | ||
PyObject *t = PyDict_SetDefault(INTERNED, s, s); | ||
if (t == NULL) { | ||
PyErr_Clear(); | ||
return; | ||
|
@@ -15654,25 +15653,24 @@ PyUnicode_InternFromString(const char *cp) | |
void | ||
_PyUnicode_ClearInterned(PyInterpreterState *interp) | ||
{ | ||
ericsnowcurrently marked this conversation as resolved.
Show resolved
Hide resolved
|
||
struct _Py_unicode_state *state = &interp->unicode; | ||
if (state->interned == NULL) { | ||
if (INTERNED == NULL) { | ||
return; | ||
} | ||
assert(PyDict_CheckExact(state->interned)); | ||
assert(PyDict_CheckExact(INTERNED)); | ||
|
||
/* Interned unicode strings are not forcibly deallocated; rather, we give | ||
them their stolen references back, and then clear and DECREF the | ||
interned dict. */ | ||
|
||
#ifdef INTERNED_STATS | ||
fprintf(stderr, "releasing %zd interned strings\n", | ||
PyDict_GET_SIZE(state->interned)); | ||
PyDict_GET_SIZE(INTERNED)); | ||
|
||
Py_ssize_t immortal_size = 0, mortal_size = 0; | ||
#endif | ||
Py_ssize_t pos = 0; | ||
PyObject *s, *ignored_value; | ||
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { | ||
while (PyDict_Next(INTERNED, &pos, &s, &ignored_value)) { | ||
assert(PyUnicode_IS_READY(s)); | ||
|
||
switch (PyUnicode_CHECK_INTERNED(s)) { | ||
|
@@ -15703,8 +15701,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) | |
mortal_size, immortal_size); | ||
#endif | ||
|
||
PyDict_Clear(state->interned); | ||
Py_CLEAR(state->interned); | ||
PyDict_Clear(INTERNED); | ||
Py_CLEAR(INTERNED); | ||
} | ||
|
||
|
||
|
@@ -16075,8 +16073,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) | |
static inline int | ||
unicode_is_finalizing(void) | ||
{ | ||
struct _Py_unicode_state *state = get_unicode_state(); | ||
return (state->interned == NULL); | ||
return (INTERNED == NULL); | ||
} | ||
#endif | ||
|
||
|
@@ -16087,7 +16084,7 @@ _PyUnicode_Fini(PyInterpreterState *interp) | |
struct _Py_unicode_state *state = &interp->unicode; | ||
|
||
// _PyUnicode_ClearInterned() must be called before | ||
assert(state->interned == NULL); | ||
assert(INTERNED == NULL); | ||
|
||
_PyUnicode_FiniEncodings(&state->fs_codec); | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you use "string" or "str" rather than "unicode". Python 2 is history 🙂