Skip to content

Commit 72c260c

Browse files
authored
[3.10] bpo-46006: Revert "bpo-40521: Per-interpreter interned strings (GH-20085)" (GH-30422) (GH-30425)
This reverts commit ea25180. Keep "assert(interned == NULL);" in _PyUnicode_Fini(), but only for the main interpreter. Keep _PyUnicode_ClearInterned() changes avoiding the creation of a temporary Python list object. Leave the PyInterpreterState structure unchanged to keep the ABI backward compatibility with Python 3.10.0: rename the "interned" member to "unused_interned". (cherry picked from commit 35d6540)
1 parent 861a9aa commit 72c260c

File tree

4 files changed

+76
-26
lines changed

4 files changed

+76
-26
lines changed

Include/internal/pycore_interp.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,9 @@ struct _Py_unicode_state {
7171
PyObject *latin1[256];
7272
struct _Py_unicode_fs_codec fs_codec;
7373

74-
/* This dictionary holds all interned unicode strings. Note that references
75-
to strings in this dictionary are *not* counted in the string's ob_refcnt.
76-
When the interned string reaches a refcnt of 0 the string deallocation
77-
function will delete the reference from this dictionary.
78-
79-
Another way to look at this is that to say that the actual reference
80-
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
81-
*/
82-
PyObject *interned;
74+
// Unused member kept for ABI backward compatibility with Python 3.10.0:
75+
// see bpo-46006.
76+
PyObject *unused_interned;
8377

8478
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
8579
struct _Py_unicode_ids ids;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fix a regression when a type method like ``__init__()`` is modified in a
2+
subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type
3+
``update_slot()``. Revert the change which made the Unicode dictionary of
4+
interned strings compatible with subinterpreters: the internal interned
5+
dictionary is shared again by all interpreters. Patch by Victor Stinner.

Objects/typeobject.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,11 @@ typedef struct PySlot_Offset {
5050
} PySlot_Offset;
5151

5252

53+
/* bpo-40521: Interned strings are shared by all subinterpreters */
54+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
55+
# define INTERN_NAME_STRINGS
56+
#endif
57+
5358
/* alphabetical order */
5459
_Py_IDENTIFIER(__abstractmethods__);
5560
_Py_IDENTIFIER(__annotations__);
@@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
39883993
if (name == NULL)
39893994
return -1;
39903995
}
3996+
#ifdef INTERN_NAME_STRINGS
39913997
if (!PyUnicode_CHECK_INTERNED(name)) {
39923998
PyUnicode_InternInPlace(&name);
39933999
if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
39974003
return -1;
39984004
}
39994005
}
4006+
#endif
40004007
}
40014008
else {
40024009
/* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void)
83448351
for (slotdef *p = slotdefs; p->name; p++) {
83458352
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
83468353
assert(!p[1].name || p->offset <= p[1].offset);
8354+
#ifdef INTERN_NAME_STRINGS
83478355
p->name_strobj = PyUnicode_InternFromString(p->name);
83488356
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
83498357
return _PyStatus_NO_MEMORY();
83508358
}
8359+
#else
8360+
p->name_strobj = PyUnicode_FromString(p->name);
8361+
if (!p->name_strobj) {
8362+
return _PyStatus_NO_MEMORY();
8363+
}
8364+
#endif
83518365
}
83528366
slotdefs_initialized = 1;
83538367
return _PyStatus_OK();
@@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name)
83728386
int offset;
83738387

83748388
assert(PyUnicode_CheckExact(name));
8389+
#ifdef INTERN_NAME_STRINGS
83758390
assert(PyUnicode_CHECK_INTERNED(name));
8391+
#endif
83768392

83778393
assert(slotdefs_initialized);
83788394
pp = ptrs;
83798395
for (p = slotdefs; p->name; p++) {
83808396
assert(PyUnicode_CheckExact(p->name_strobj));
83818397
assert(PyUnicode_CheckExact(name));
8398+
#ifdef INTERN_NAME_STRINGS
83828399
if (p->name_strobj == name) {
83838400
*pp++ = p;
83848401
}
8402+
#else
8403+
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
8404+
*pp++ = p;
8405+
}
8406+
#endif
83858407
}
83868408
*pp = NULL;
83878409
for (pp = ptrs; *pp; pp++) {

Objects/unicodeobject.c

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,22 @@ extern "C" {
211211
# define OVERALLOCATE_FACTOR 4
212212
#endif
213213

214+
/* bpo-40521: Interned strings are shared by all interpreters. */
215+
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
216+
# define INTERNED_STRINGS
217+
#endif
218+
219+
/* This dictionary holds all interned unicode strings. Note that references
220+
to strings in this dictionary are *not* counted in the string's ob_refcnt.
221+
When the interned string reaches a refcnt of 0 the string deallocation
222+
function will delete the reference from this dictionary.
223+
224+
Another way to look at this is that to say that the actual reference
225+
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
226+
*/
227+
#ifdef INTERNED_STRINGS
228+
static PyObject *interned = NULL;
229+
#endif
214230

215231
static struct _Py_unicode_state*
216232
get_unicode_state(void)
@@ -1936,20 +1952,21 @@ unicode_dealloc(PyObject *unicode)
19361952

19371953
case SSTATE_INTERNED_MORTAL:
19381954
{
1939-
struct _Py_unicode_state *state = get_unicode_state();
1955+
#ifdef INTERNED_STRINGS
19401956
/* Revive the dead object temporarily. PyDict_DelItem() removes two
19411957
references (key and value) which were ignored by
19421958
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
19431959
to prevent calling unicode_dealloc() again. Adjust refcnt after
19441960
PyDict_DelItem(). */
19451961
assert(Py_REFCNT(unicode) == 0);
19461962
Py_SET_REFCNT(unicode, 3);
1947-
if (PyDict_DelItem(state->interned, unicode) != 0) {
1963+
if (PyDict_DelItem(interned, unicode) != 0) {
19481964
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
19491965
NULL);
19501966
}
19511967
assert(Py_REFCNT(unicode) == 1);
19521968
Py_SET_REFCNT(unicode, 0);
1969+
#endif
19531970
break;
19541971
}
19551972

@@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1160011617
if (PyUnicode_CHECK_INTERNED(left))
1160111618
return 0;
1160211619

11620+
#ifdef INTERNED_STRINGS
1160311621
assert(_PyUnicode_HASH(right_uni) != -1);
1160411622
Py_hash_t hash = _PyUnicode_HASH(left);
1160511623
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
1160611624
return 0;
1160711625
}
11626+
#endif
1160811627

1160911628
return unicode_compare_eq(left, right_uni);
1161011629
}
@@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p)
1583315852
return;
1583415853
}
1583515854

15855+
#ifdef INTERNED_STRINGS
1583615856
if (PyUnicode_READY(s) == -1) {
1583715857
PyErr_Clear();
1583815858
return;
1583915859
}
1584015860

15841-
struct _Py_unicode_state *state = get_unicode_state();
15842-
if (state->interned == NULL) {
15843-
state->interned = PyDict_New();
15844-
if (state->interned == NULL) {
15861+
if (interned == NULL) {
15862+
interned = PyDict_New();
15863+
if (interned == NULL) {
1584515864
PyErr_Clear(); /* Don't leave an exception */
1584615865
return;
1584715866
}
1584815867
}
1584915868

15850-
PyObject *t = PyDict_SetDefault(state->interned, s, s);
15869+
PyObject *t = PyDict_SetDefault(interned, s, s);
1585115870
if (t == NULL) {
1585215871
PyErr_Clear();
1585315872
return;
@@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p)
1586415883
this. */
1586515884
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
1586615885
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
15886+
#else
15887+
// PyDict expects that interned strings have their hash
15888+
// (PyASCIIObject.hash) already computed.
15889+
(void)unicode_hash(s);
15890+
#endif
1586715891
}
1586815892

15869-
1587015893
void
1587115894
PyUnicode_InternImmortal(PyObject **p)
1587215895
{
@@ -15900,25 +15923,29 @@ PyUnicode_InternFromString(const char *cp)
1590015923
void
1590115924
_PyUnicode_ClearInterned(PyInterpreterState *interp)
1590215925
{
15903-
struct _Py_unicode_state *state = &interp->unicode;
15904-
if (state->interned == NULL) {
15926+
if (!_Py_IsMainInterpreter(interp)) {
15927+
// interned dict is shared by all interpreters
15928+
return;
15929+
}
15930+
15931+
if (interned == NULL) {
1590515932
return;
1590615933
}
15907-
assert(PyDict_CheckExact(state->interned));
15934+
assert(PyDict_CheckExact(interned));
1590815935

1590915936
/* Interned unicode strings are not forcibly deallocated; rather, we give
1591015937
them their stolen references back, and then clear and DECREF the
1591115938
interned dict. */
1591215939

1591315940
#ifdef INTERNED_STATS
1591415941
fprintf(stderr, "releasing %zd interned strings\n",
15915-
PyDict_GET_SIZE(state->interned));
15942+
PyDict_GET_SIZE(interned));
1591615943

1591715944
Py_ssize_t immortal_size = 0, mortal_size = 0;
1591815945
#endif
1591915946
Py_ssize_t pos = 0;
1592015947
PyObject *s, *ignored_value;
15921-
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
15948+
while (PyDict_Next(interned, &pos, &s, &ignored_value)) {
1592215949
assert(PyUnicode_IS_READY(s));
1592315950

1592415951
switch (PyUnicode_CHECK_INTERNED(s)) {
@@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1594915976
mortal_size, immortal_size);
1595015977
#endif
1595115978

15952-
PyDict_Clear(state->interned);
15953-
Py_CLEAR(state->interned);
15979+
PyDict_Clear(interned);
15980+
Py_CLEAR(interned);
1595415981
}
1595515982

1595615983

@@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp)
1632216349
{
1632316350
struct _Py_unicode_state *state = &interp->unicode;
1632416351

16325-
// _PyUnicode_ClearInterned() must be called before
16326-
assert(state->interned == NULL);
16352+
if (_Py_IsMainInterpreter(interp)) {
16353+
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
16354+
assert(interned == NULL);
16355+
}
1632716356

1632816357
_PyUnicode_FiniEncodings(&state->fs_codec);
1632916358

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy