Skip to content

Commit 35d6540

Browse files
authored
bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422)
This reverts commit ea25180. Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for the main interpreter. Keep _PyUnicode_ClearInterned() changes avoiding the creation of a temporary Python list object.
1 parent e5894ca commit 35d6540

File tree

4 files changed

+75
-30
lines changed

4 files changed

+75
-30
lines changed

Include/internal/pycore_unicodeobject.h

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,21 +48,11 @@ struct _Py_unicode_state {
4848
PyObject *latin1[256];
4949
struct _Py_unicode_fs_codec fs_codec;
5050

51-
/* This dictionary holds all interned unicode strings. Note that references
52-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
53-
When the interned string reaches a refcnt of 0 the string deallocation
54-
function will delete the reference from this dictionary.
55-
56-
Another way to look at this is that to say that the actual reference
57-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
58-
*/
59-
PyObject *interned;
60-
6151
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
6252
struct _Py_unicode_ids ids;
6353
};
6454

65-
extern void _PyUnicode_ClearInterned(PyInterpreterState *);
55+
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
6656

6757

6858
#ifdef __cplusplus
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a regression when a type method like ``__init__()`` is modified in a
2+
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
3+
``update_slot()``. Revert the change which made the Unicode dictionary of
4+
interned strings compatible with subinterpreters: the internal interned
5+
dictionary is shared again by all interpreters. Patch by Victor Stinner.

Objects/typeobject.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ typedef struct PySlot_Offset {
5454
} PySlot_Offset;
5555

5656

57+
/* bpo-40521: Interned strings are shared by all subinterpreters */
58+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
59+
# define INTERN_NAME_STRINGS
60+
#endif
61+
5762
/* alphabetical order */
5863
_Py_IDENTIFIER(__abstractmethods__);
5964
_Py_IDENTIFIER(__annotations__);
@@ -4028,6 +4033,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40284033
if (name == NULL)
40294034
return -1;
40304035
}
4036+
#ifdef INTERN_NAME_STRINGS
40314037
if (!PyUnicode_CHECK_INTERNED(name)) {
40324038
PyUnicode_InternInPlace(&name);
40334039
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -4037,6 +4043,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
40374043
return -1;
40384044
}
40394045
}
4046+
#endif
40404047
}
40414048
else {
40424049
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8424,10 +8431,17 @@ _PyTypes_InitSlotDefs(void)
84248431
for (slotdef *p = slotdefs; p->name; p++) {
84258432
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
84268433
assert(!p[1].name || p->offset <= p[1].offset);
8434+
#ifdef INTERN_NAME_STRINGS
84278435
p->name_strobj = PyUnicode_InternFromString(p->name);
84288436
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
84298437
return _PyStatus_NO_MEMORY();
84308438
}
8439+
#else
8440+
p->name_strobj = PyUnicode_FromString(p->name);
8441+
if (!p->name_strobj) {
8442+
return _PyStatus_NO_MEMORY();
8443+
}
8444+
#endif
84318445
}
84328446
slotdefs_initialized = 1;
84338447
return _PyStatus_OK();
@@ -8452,16 +8466,24 @@ update_slot(PyTypeObject *type, PyObject *name)
84528466
int offset;
84538467

84548468
assert(PyUnicode_CheckExact(name));
8469+
#ifdef INTERN_NAME_STRINGS
84558470
assert(PyUnicode_CHECK_INTERNED(name));
8471+
#endif
84568472

84578473
assert(slotdefs_initialized);
84588474
pp = ptrs;
84598475
for (p = slotdefs; p->name; p++) {
84608476
assert(PyUnicode_CheckExact(p->name_strobj));
84618477
assert(PyUnicode_CheckExact(name));
8478+
#ifdef INTERN_NAME_STRINGS
84628479
if (p->name_strobj == name) {
84638480
*pp++ = p;
84648481
}
8482+
#else
8483+
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
8484+
*pp++ = p;
8485+
}
8486+
#endif
84658487
}
84668488
*pp = NULL;
84678489
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,22 @@ extern "C" {
214214
# define OVERALLOCATE_FACTOR 4
215215
#endif
216216

217+
/* bpo-40521: Interned strings are shared by all interpreters. */
218+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
219+
# define INTERNED_STRINGS
220+
#endif
221+
222+
/* This dictionary holds all interned unicode strings. Note that references
223+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
224+
When the interned string reaches a refcnt of 0 the string deallocation
225+
function will delete the reference from this dictionary.
226+
227+
Another way to look at this is that to say that the actual reference
228+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
229+
*/
230+
#ifdef INTERNED_STRINGS
231+
static PyObject *interned = NULL;
232+
#endif
217233

218234
/* Forward declaration */
219235
static inline int
@@ -1950,20 +1966,21 @@ unicode_dealloc(PyObject *unicode)
19501966

19511967
case SSTATE_INTERNED_MORTAL:
19521968
{
1953-
struct _Py_unicode_state *state = get_unicode_state();
1969+
#ifdef INTERNED_STRINGS
19541970
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19551971
references (key and value) which were ignored by
19561972
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19571973
to prevent calling unicode_dealloc() again. Adjust refcnt after
19581974
PyDict_DelItem(). */
19591975
assert(Py_REFCNT(unicode) == 0);
19601976
Py_SET_REFCNT(unicode, 3);
1961-
if (PyDict_DelItem(state->interned, unicode) != 0) {
1977+
if (PyDict_DelItem(interned, unicode) != 0) {
19621978
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19631979
NULL);
19641980
}
19651981
assert(Py_REFCNT(unicode) == 1);
19661982
Py_SET_REFCNT(unicode, 0);
1983+
#endif
19671984
break;
19681985
}
19691986

@@ -11342,11 +11359,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1134211359
if (PyUnicode_CHECK_INTERNED(left))
1134311360
return 0;
1134411361

11362+
#ifdef INTERNED_STRINGS
1134511363
assert(_PyUnicode_HASH(right_uni) != -1);
1134611364
Py_hash_t hash = _PyUnicode_HASH(left);
1134711365
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1134811366
return 0;
1134911367
}
11368+
#endif
1135011369

1135111370
return unicode_compare_eq(left, right_uni);
1135211371
}
@@ -15591,21 +15610,21 @@ PyUnicode_InternInPlace(PyObject **p)
1559115610
return;
1559215611
}
1559315612

15613+
#ifdef INTERNED_STRINGS
1559415614
if (PyUnicode_READY(s) == -1) {
1559515615
PyErr_Clear();
1559615616
return;
1559715617
}
1559815618

15599-
struct _Py_unicode_state *state = get_unicode_state();
15600-
if (state->interned == NULL) {
15601-
state->interned = PyDict_New();
15602-
if (state->interned == NULL) {
15619+
if (interned == NULL) {
15620+
interned = PyDict_New();
15621+
if (interned == NULL) {
1560315622
PyErr_Clear(); /* Don't leave an exception */
1560415623
return;
1560515624
}
1560615625
}
1560715626

15608-
PyObject *t = PyDict_SetDefault(state->interned, s, s);
15627+
PyObject *t = PyDict_SetDefault(interned, s, s);
1560915628
if (t == NULL) {
1561015629
PyErr_Clear();
1561115630
return;
@@ -15622,9 +15641,13 @@ PyUnicode_InternInPlace(PyObject **p)
1562215641
this. */
1562315642
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1562415643
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15644+
#else
15645+
// PyDict expects that interned strings have their hash
15646+
// (PyASCIIObject.hash) already computed.
15647+
(void)unicode_hash(s);
15648+
#endif
1562515649
}
1562615650

15627-
1562815651
void
1562915652
PyUnicode_InternImmortal(PyObject **p)
1563015653
{
@@ -15658,25 +15681,29 @@ PyUnicode_InternFromString(const char *cp)
1565815681
void
1565915682
_PyUnicode_ClearInterned(PyInterpreterState *interp)
1566015683
{
15661-
struct _Py_unicode_state *state = &interp->unicode;
15662-
if (state->interned == NULL) {
15684+
if (!_Py_IsMainInterpreter(interp)) {
15685+
// interned dict is shared by all interpreters
1566315686
return;
1566415687
}
15665-
assert(PyDict_CheckExact(state->interned));
15688+
15689+
if (interned == NULL) {
15690+
return;
15691+
}
15692+
assert(PyDict_CheckExact(interned));
1566615693

1566715694
/* Interned unicode strings are not forcibly deallocated; rather, we give
1566815695
them their stolen references back, and then clear and DECREF the
1566915696
interned dict. */
1567015697

1567115698
#ifdef INTERNED_STATS
1567215699
fprintf(stderr, "releasing %zd interned strings\n",
15673-
PyDict_GET_SIZE(state->interned));
15700+
PyDict_GET_SIZE(interned));
1567415701

1567515702
Py_ssize_t immortal_size = 0, mortal_size = 0;
1567615703
#endif
1567715704
Py_ssize_t pos = 0;
1567815705
PyObject *s, *ignored_value;
15679-
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15706+
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1568015707
assert(PyUnicode_IS_READY(s));
1568115708

1568215709
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15707,8 +15734,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1570715734
mortal_size, immortal_size);
1570815735
#endif
1570915736

15710-
PyDict_Clear(state->interned);
15711-
Py_CLEAR(state->interned);
15737+
PyDict_Clear(interned);
15738+
Py_CLEAR(interned);
1571215739
}
1571315740

1571415741

@@ -16079,8 +16106,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
1607916106
static inline int
1608016107
unicode_is_finalizing(void)
1608116108
{
16082-
struct _Py_unicode_state *state = get_unicode_state();
16083-
return (state->interned == NULL);
16109+
return (interned == NULL);
1608416110
}
1608516111
#endif
1608616112

@@ -16090,8 +16116,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1609016116
{
1609116117
struct _Py_unicode_state *state = &interp->unicode;
1609216118

16093-
// _PyUnicode_ClearInterned() must be called before
16094-
assert(state->interned == NULL);
16119+
if (_Py_IsMainInterpreter(interp)) {
16120+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16121+
assert(interned == NULL);
16122+
}
1609516123

1609616124
_PyUnicode_FiniEncodings(&state->fs_codec);
1609716125

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy