diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index bfd082b588256b..4307b61ca36aaf 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -71,15 +71,9 @@ struct _Py_unicode_state { PyObject *latin1[256]; struct _Py_unicode_fs_codec fs_codec; - /* This dictionary holds all interned unicode strings. Note that references - to strings in this dictionary are *not* counted in the string's ob_refcnt. - When the interned string reaches a refcnt of 0 the string deallocation - function will delete the reference from this dictionary. - - Another way to look at this is that to say that the actual reference - count of a string is: s->ob_refcnt + (s->state ? 2 : 0) - */ - PyObject *interned; + // Unused member kept for ABI backward compatibility with Python 3.10.0: + // see bpo-46006. + PyObject *unused_interned; // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() struct _Py_unicode_ids ids; diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst new file mode 100644 index 00000000000000..3acd2b09390a8d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-05-17-13-47.bpo-46006.hdH5Vn.rst @@ -0,0 +1,5 @@ +Fix a regression when a type method like ``__init__()`` is modified in a +subinterpreter. Fix a regression in ``_PyUnicode_EqualToASCIIId()`` and type +``update_slot()``. Revert the change which made the Unicode dictionary of +interned strings compatible with subinterpreters: the internal interned +dictionary is shared again by all interpreters. Patch by Victor Stinner. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 02046e5f2ebefe..b23e36a420fa0c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -50,6 +50,11 @@ typedef struct PySlot_Offset { } PySlot_Offset; +/* bpo-40521: Interned strings are shared by all subinterpreters */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define INTERN_NAME_STRINGS +#endif + /* alphabetical order */ _Py_IDENTIFIER(__abstractmethods__); _Py_IDENTIFIER(__annotations__); @@ -3988,6 +3993,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) if (name == NULL) return -1; } +#ifdef INTERN_NAME_STRINGS if (!PyUnicode_CHECK_INTERNED(name)) { PyUnicode_InternInPlace(&name); if (!PyUnicode_CHECK_INTERNED(name)) { @@ -3997,6 +4003,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value) return -1; } } +#endif } else { /* Will fail in _PyObject_GenericSetAttrWithDict. */ @@ -8344,10 +8351,17 @@ _PyTypes_InitSlotDefs(void) for (slotdef *p = slotdefs; p->name; p++) { /* Slots must be ordered by their offset in the PyHeapTypeObject. */ assert(!p[1].name || p->offset <= p[1].offset); +#ifdef INTERN_NAME_STRINGS p->name_strobj = PyUnicode_InternFromString(p->name); if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) { return _PyStatus_NO_MEMORY(); } +#else + p->name_strobj = PyUnicode_FromString(p->name); + if (!p->name_strobj) { + return _PyStatus_NO_MEMORY(); + } +#endif } slotdefs_initialized = 1; return _PyStatus_OK(); @@ -8372,16 +8386,24 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; assert(PyUnicode_CheckExact(name)); +#ifdef INTERN_NAME_STRINGS assert(PyUnicode_CHECK_INTERNED(name)); +#endif assert(slotdefs_initialized); pp = ptrs; for (p = slotdefs; p->name; p++) { assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CheckExact(name)); +#ifdef INTERN_NAME_STRINGS if (p->name_strobj == name) { *pp++ = p; } +#else + if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) { + *pp++ = p; + } +#endif } *pp = NULL; for (pp = ptrs; *pp; pp++) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c72871074b3ebd..077cf8d7f45605 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -211,6 +211,22 @@ extern "C" { # define OVERALLOCATE_FACTOR 4 #endif +/* bpo-40521: Interned strings are shared by all interpreters. */ +#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS +# define INTERNED_STRINGS +#endif + +/* This dictionary holds all interned unicode strings. Note that references + to strings in this dictionary are *not* counted in the string's ob_refcnt. + When the interned string reaches a refcnt of 0 the string deallocation + function will delete the reference from this dictionary. + + Another way to look at this is that to say that the actual reference + count of a string is: s->ob_refcnt + (s->state ? 2 : 0) +*/ +#ifdef INTERNED_STRINGS +static PyObject *interned = NULL; +#endif static struct _Py_unicode_state* get_unicode_state(void) @@ -1936,7 +1952,7 @@ unicode_dealloc(PyObject *unicode) case SSTATE_INTERNED_MORTAL: { - struct _Py_unicode_state *state = get_unicode_state(); +#ifdef INTERNED_STRINGS /* Revive the dead object temporarily. PyDict_DelItem() removes two references (key and value) which were ignored by PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2 @@ -1944,12 +1960,13 @@ unicode_dealloc(PyObject *unicode) PyDict_DelItem(). */ assert(Py_REFCNT(unicode) == 0); Py_SET_REFCNT(unicode, 3); - if (PyDict_DelItem(state->interned, unicode) != 0) { + if (PyDict_DelItem(interned, unicode) != 0) { _PyErr_WriteUnraisableMsg("deletion of interned string failed", NULL); } assert(Py_REFCNT(unicode) == 1); Py_SET_REFCNT(unicode, 0); +#endif break; } @@ -11600,11 +11617,13 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; +#ifdef INTERNED_STRINGS assert(_PyUnicode_HASH(right_uni) != -1); Py_hash_t hash = _PyUnicode_HASH(left); if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { return 0; } +#endif return unicode_compare_eq(left, right_uni); } @@ -15833,21 +15852,21 @@ PyUnicode_InternInPlace(PyObject **p) return; } +#ifdef INTERNED_STRINGS if (PyUnicode_READY(s) == -1) { PyErr_Clear(); return; } - struct _Py_unicode_state *state = get_unicode_state(); - if (state->interned == NULL) { - state->interned = PyDict_New(); - if (state->interned == NULL) { + if (interned == NULL) { + interned = PyDict_New(); + if (interned == NULL) { PyErr_Clear(); /* Don't leave an exception */ return; } } - PyObject *t = PyDict_SetDefault(state->interned, s, s); + PyObject *t = PyDict_SetDefault(interned, s, s); if (t == NULL) { PyErr_Clear(); return; @@ -15864,9 +15883,13 @@ PyUnicode_InternInPlace(PyObject **p) this. */ Py_SET_REFCNT(s, Py_REFCNT(s) - 2); _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL; +#else + // PyDict expects that interned strings have their hash + // (PyASCIIObject.hash) already computed. + (void)unicode_hash(s); +#endif } - void PyUnicode_InternImmortal(PyObject **p) { @@ -15900,11 +15923,15 @@ PyUnicode_InternFromString(const char *cp) void _PyUnicode_ClearInterned(PyInterpreterState *interp) { - struct _Py_unicode_state *state = &interp->unicode; - if (state->interned == NULL) { + if (!_Py_IsMainInterpreter(interp)) { + // interned dict is shared by all interpreters + return; + } + + if (interned == NULL) { return; } - assert(PyDict_CheckExact(state->interned)); + assert(PyDict_CheckExact(interned)); /* Interned unicode strings are not forcibly deallocated; rather, we give them their stolen references back, and then clear and DECREF the @@ -15912,13 +15939,13 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) #ifdef INTERNED_STATS fprintf(stderr, "releasing %zd interned strings\n", - PyDict_GET_SIZE(state->interned)); + PyDict_GET_SIZE(interned)); Py_ssize_t immortal_size = 0, mortal_size = 0; #endif Py_ssize_t pos = 0; PyObject *s, *ignored_value; - while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) { + while (PyDict_Next(interned, &pos, &s, &ignored_value)) { assert(PyUnicode_IS_READY(s)); switch (PyUnicode_CHECK_INTERNED(s)) { @@ -15949,8 +15976,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) mortal_size, immortal_size); #endif - PyDict_Clear(state->interned); - Py_CLEAR(state->interned); + PyDict_Clear(interned); + Py_CLEAR(interned); } @@ -16322,8 +16349,10 @@ _PyUnicode_Fini(PyInterpreterState *interp) { struct _Py_unicode_state *state = &interp->unicode; - // _PyUnicode_ClearInterned() must be called before - assert(state->interned == NULL); + if (_Py_IsMainInterpreter(interp)) { + // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini() + assert(interned == NULL); + } _PyUnicode_FiniEncodings(&state->fs_codec); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy