From 0e995ad361ec91755581bb06693fba9d0bf6c49e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 12 Mar 2025 15:26:52 -0700 Subject: [PATCH 01/45] wip: update type slots, stop-the-world --- Include/internal/pycore_interp.h | 1 + Objects/typeobject.c | 53 ++++++++++++++++++++++ Python/clinic/sysmodule.c.h | 20 +++++++- Python/pystate.c | 2 + Python/sysmodule.c | 22 +++++++++ Tools/tsan/suppressions_free_threading.txt | 6 +-- 6 files changed, 100 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index c247c7270d1caa..0b801df1126b61 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -57,6 +57,7 @@ struct _stoptheworld_state { bool requested; // Set when a pause is requested. bool world_stopped; // Set when the world is stopped. bool is_global; // Set when contained in PyRuntime struct. + Py_ssize_t world_stops; // Number of times the world was stopped. PyEvent stop_event; // Set when thread_countdown reaches zero. Py_ssize_t thread_countdown; // Number of threads that must pause. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bc840ed51ffe4c..61f87cd8c9b7b8 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3490,6 +3490,9 @@ static int update_slot(PyTypeObject *, PyObject *); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); +#ifdef Py_GIL_DISABLED +static bool has_slotdef(PyObject *); +#endif /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -5943,6 +5946,23 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } +#ifdef Py_GIL_DISABLED +static int +update_slot_world_stopped(PyTypeObject *type, PyObject *name) +{ + // Modification of type slots is protected by the global type + // lock. However, type slots are read non-atomically without holding the + // type lock. So, we need to stop-the-world while modifying slots, in + // order to avoid data races. This is unfortunately quite expensive. + int ret; + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + ret = update_slot(type, name); + _PyEval_StartTheWorld(interp); + return ret; +} +#endif + static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -5972,9 +5992,15 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } +#if Py_GIL_DISABLED + if (is_dunder_name(name) && has_slotdef(name)) { + return update_slot_world_stopped(type, name); + } +#else if (is_dunder_name(name)) { return update_slot(type, name); } +#endif return 0; } @@ -11002,6 +11028,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } +#ifdef Py_GIL_DISABLED +// Return true if "name" corresponds to at least one slot definition. This is +// used to avoid calling update_slot() if is_dunder_name() is true but it's +// not actually a slot. +static bool +has_slotdef(PyObject *name) +{ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) { + return true; + } + } + return false; +} +#endif /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * @@ -11241,6 +11282,7 @@ fixup_slot_dispatchers(PyTypeObject *type) END_TYPE_LOCK(); } +// Called when __bases__ is re-assigned. static void update_all_slots(PyTypeObject* type) { @@ -11248,6 +11290,13 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); + // Similar to update_slot_world_stopped(), this is required to + // avoid races. We do it once here rather than once per-slot. +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); +#endif + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); @@ -11255,6 +11304,10 @@ update_all_slots(PyTypeObject* type) /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } + +#ifdef Py_GIL_DISABLED + _PyEval_StartTheWorld(interp); +#endif } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 1e53624d4d45d7..9a1444f636faa4 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1711,6 +1711,24 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) return return_value; } +PyDoc_STRVAR(sys__get_world_stops__doc__, +"_get_world_stops($module, /)\n" +"--\n" +"\n" +"Return the number of times the \"stop-the-world\" lock was used."); + +#define SYS__GET_WORLD_STOPS_METHODDEF \ + {"_get_world_stops", (PyCFunction)sys__get_world_stops, METH_NOARGS, sys__get_world_stops__doc__}, + +static PyObject * +sys__get_world_stops_impl(PyObject *module); + +static PyObject * +sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys__get_world_stops_impl(module); +} + #ifndef SYS_GETWINDOWSVERSION_METHODDEF #define SYS_GETWINDOWSVERSION_METHODDEF #endif /* !defined(SYS_GETWINDOWSVERSION_METHODDEF) */ @@ -1754,4 +1772,4 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=1e5f608092c12636 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=35e1d29a46e54085 input=a9049054013a1b77]*/ diff --git a/Python/pystate.c b/Python/pystate.c index fcd12d1b933360..68cf495d15313d 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2355,6 +2355,7 @@ start_the_world(struct _stoptheworld_state *stw) assert(PyMutex_IsLocked(&stw->mutex)); HEAD_LOCK(runtime); + stw->world_stops++; stw->requested = 0; stw->world_stopped = 0; // Switch threads back to the detached state. @@ -2730,6 +2731,7 @@ _PyGILState_Fini(PyInterpreterState *interp) return; } interp->runtime->gilstate.autoInterpreterState = NULL; + //fprintf(stderr, "world stops %zd\n", interp->stoptheworld.world_stops); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index adaa5ee1c74aa5..ee4ee2765c1bfe 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2527,6 +2527,27 @@ sys__is_gil_enabled_impl(PyObject *module) #endif } +/*[clinic input] +sys._get_world_stops + +Return the number of times the "stop-the-world" condition was true. +[clinic start generated code]*/ + +static PyObject * +sys__get_world_stops_impl(PyObject *module) +/*[clinic end generated code: output=7886d32b71a94e72 input=44a9bde7e07b30e3]*/ +{ + Py_ssize_t stops; +#ifdef Py_GIL_DISABLED + PyInterpreterState *interp = _PyInterpreterState_GET(); + stops = interp->stoptheworld.world_stops; +#else + stops = 0; +#endif + return PyLong_FromLong(stops); +} + + static PerfMapState perf_map_state; @@ -2703,6 +2724,7 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF + SYS__GET_WORLD_STOPS_METHODDEF SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index fd47c85af1adb1..d2763714ecf1b7 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -14,7 +14,7 @@ race_top:assign_version_tag race_top:_multiprocessing_SemLock_acquire_impl -race_top:_Py_slot_tp_getattr_hook +#race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_multiprocessing_SemLock_release_impl @@ -22,7 +22,7 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -race_top:type_modified_unlocked +#race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -32,7 +32,7 @@ race_top:rangeiter_next race_top:mi_block_set_nextx # gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -race_top:update_one_slot +#race_top:update_one_slot # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create From b173f1769c638f21c100e55aefc5e0e60cc84609 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 13 Mar 2025 15:03:04 -0700 Subject: [PATCH 02/45] Remove unneeded atomics for tp_flags. --- Include/internal/pycore_object.h | 2 +- Include/object.h | 6 +----- Objects/typeobject.c | 6 ++---- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0b686b416ec193..7a0793f7b44474 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -311,7 +311,7 @@ extern int _PyDict_CheckConsistency(PyObject *mp, int check_content); // Fast inlined version of PyType_HasFeature() static inline int _PyType_HasFeature(PyTypeObject *type, unsigned long feature) { - return ((FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags) & feature) != 0); + return ((type->tp_flags) & feature) != 0; } extern void _PyType_InitCache(PyInterpreterState *interp); diff --git a/Include/object.h b/Include/object.h index 8cc83abb8574e3..d23918c2da67f9 100644 --- a/Include/object.h +++ b/Include/object.h @@ -776,11 +776,7 @@ PyType_HasFeature(PyTypeObject *type, unsigned long feature) // PyTypeObject is opaque in the limited C API flags = PyType_GetFlags(type); #else -# ifdef Py_GIL_DISABLED - flags = _Py_atomic_load_ulong_relaxed(&type->tp_flags); -# else - flags = type->tp_flags; -# endif + flags = type->tp_flags; #endif return ((flags & feature) != 0); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 61f87cd8c9b7b8..ad8431908f1922 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -353,9 +353,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // held when flags are modified. ASSERT_TYPE_LOCK_HELD(); } - // Since PyType_HasFeature() reads the flags without holding the type - // lock, we need an atomic store here. - FT_ATOMIC_STORE_ULONG_RELAXED(tp->tp_flags, flags); + tp->tp_flags = flags; } static void @@ -3690,7 +3688,7 @@ type_init(PyObject *cls, PyObject *args, PyObject *kwds) unsigned long PyType_GetFlags(PyTypeObject *type) { - return FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags); + return type->tp_flags; } From d4ce112310c8bfe2683a2ccedc4323ec591c771f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 13 Mar 2025 16:25:24 -0700 Subject: [PATCH 03/45] Use stop-the-world for tp_flag changes too. --- Objects/typeobject.c | 45 ++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ad8431908f1922..96802e588a6e7d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -84,6 +84,17 @@ class object "PyObject *" "&PyBaseObject_Type" #endif +// Modification of type slots and type flags is protected by the global type +// lock. However, the slots and flags are read non-atomically without holding +// the type lock. So, we need to stop-the-world while modifying these, in +// order to avoid data races. This is unfortunately a bit expensive. +#ifdef Py_GIL_DISABLED +// If defined, type slot updates (after initial creation) will stop-the-world. +#define TYPE_SLOT_UPDATE_NEEDS_STOP +// If defined, type flag updates (after initial creation) will stop-the-world. +#define TYPE_FLAGS_UPDATE_NEEDS_STOP +#endif + #define PyTypeObject_CAST(op) ((PyTypeObject *)(op)) typedef struct PySlot_Offset { @@ -1582,9 +1593,10 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) BEGIN_TYPE_LOCK(); type_modified_unlocked(type); if (abstract) - type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); + _PyType_SetFlags(type, 0, Py_TPFLAGS_IS_ABSTRACT); else - type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); + _PyType_SetFlags(type, Py_TPFLAGS_IS_ABSTRACT, 0); + END_TYPE_LOCK(); return 0; @@ -5780,7 +5792,17 @@ void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { BEGIN_TYPE_LOCK(); - type_set_flags_with_mask(self, mask, flags); + unsigned long new_flags = (self->tp_flags & ~mask) | flags; + if (new_flags != self->tp_flags) { +#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); +#endif + self->tp_flags = new_flags; +#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP + _PyEval_StartTheWorld(interp); +#endif + } END_TYPE_LOCK(); } @@ -5944,14 +5966,10 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP static int update_slot_world_stopped(PyTypeObject *type, PyObject *name) { - // Modification of type slots is protected by the global type - // lock. However, type slots are read non-atomically without holding the - // type lock. So, we need to stop-the-world while modifying slots, in - // order to avoid data races. This is unfortunately quite expensive. int ret; PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); @@ -5990,7 +6008,7 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } -#if Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP if (is_dunder_name(name) && has_slotdef(name)) { return update_slot_world_stopped(type, name); } @@ -11026,10 +11044,9 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP // Return true if "name" corresponds to at least one slot definition. This is -// used to avoid calling update_slot() if is_dunder_name() is true but it's -// not actually a slot. +// a more accurate but more expensive test compared to is_dunder_name(). static bool has_slotdef(PyObject *name) { @@ -11288,9 +11305,9 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP // Similar to update_slot_world_stopped(), this is required to // avoid races. We do it once here rather than once per-slot. -#ifdef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); #endif @@ -11303,7 +11320,7 @@ update_all_slots(PyTypeObject* type) update_slot(type, p->name_strobj); } -#ifdef Py_GIL_DISABLED +#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP _PyEval_StartTheWorld(interp); #endif } From 44eb332fd5754f00cdd7c30de890738e1167580e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 11:54:59 -0700 Subject: [PATCH 04/45] Remove 'world_stops' and 'sys._get_world_stops'. This would be better to be part of the pystats info. --- Include/internal/pycore_interp.h | 1 - Python/clinic/sysmodule.c.h | 20 +------------------- Python/pystate.c | 2 -- Python/sysmodule.c | 22 ---------------------- 4 files changed, 1 insertion(+), 44 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 0b801df1126b61..c247c7270d1caa 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -57,7 +57,6 @@ struct _stoptheworld_state { bool requested; // Set when a pause is requested. bool world_stopped; // Set when the world is stopped. bool is_global; // Set when contained in PyRuntime struct. - Py_ssize_t world_stops; // Number of times the world was stopped. PyEvent stop_event; // Set when thread_countdown reaches zero. Py_ssize_t thread_countdown; // Number of threads that must pause. diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 9a1444f636faa4..1e53624d4d45d7 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1711,24 +1711,6 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) return return_value; } -PyDoc_STRVAR(sys__get_world_stops__doc__, -"_get_world_stops($module, /)\n" -"--\n" -"\n" -"Return the number of times the \"stop-the-world\" lock was used."); - -#define SYS__GET_WORLD_STOPS_METHODDEF \ - {"_get_world_stops", (PyCFunction)sys__get_world_stops, METH_NOARGS, sys__get_world_stops__doc__}, - -static PyObject * -sys__get_world_stops_impl(PyObject *module); - -static PyObject * -sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) -{ - return sys__get_world_stops_impl(module); -} - #ifndef SYS_GETWINDOWSVERSION_METHODDEF #define SYS_GETWINDOWSVERSION_METHODDEF #endif /* !defined(SYS_GETWINDOWSVERSION_METHODDEF) */ @@ -1772,4 +1754,4 @@ sys__get_world_stops(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=35e1d29a46e54085 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1e5f608092c12636 input=a9049054013a1b77]*/ diff --git a/Python/pystate.c b/Python/pystate.c index 68cf495d15313d..fcd12d1b933360 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2355,7 +2355,6 @@ start_the_world(struct _stoptheworld_state *stw) assert(PyMutex_IsLocked(&stw->mutex)); HEAD_LOCK(runtime); - stw->world_stops++; stw->requested = 0; stw->world_stopped = 0; // Switch threads back to the detached state. @@ -2731,7 +2730,6 @@ _PyGILState_Fini(PyInterpreterState *interp) return; } interp->runtime->gilstate.autoInterpreterState = NULL; - //fprintf(stderr, "world stops %zd\n", interp->stoptheworld.world_stops); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index ee4ee2765c1bfe..adaa5ee1c74aa5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2527,27 +2527,6 @@ sys__is_gil_enabled_impl(PyObject *module) #endif } -/*[clinic input] -sys._get_world_stops - -Return the number of times the "stop-the-world" condition was true. -[clinic start generated code]*/ - -static PyObject * -sys__get_world_stops_impl(PyObject *module) -/*[clinic end generated code: output=7886d32b71a94e72 input=44a9bde7e07b30e3]*/ -{ - Py_ssize_t stops; -#ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - stops = interp->stoptheworld.world_stops; -#else - stops = 0; -#endif - return PyLong_FromLong(stops); -} - - static PerfMapState perf_map_state; @@ -2724,7 +2703,6 @@ static PyMethodDef sys_methods[] = { #endif SYS__GET_CPU_COUNT_CONFIG_METHODDEF SYS__IS_GIL_ENABLED_METHODDEF - SYS__GET_WORLD_STOPS_METHODDEF SYS__DUMP_TRACELETS_METHODDEF {NULL, NULL} // sentinel }; From d132fab96784d297d30250d2c92c431651058ef2 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 12:06:43 -0700 Subject: [PATCH 05/45] Improve code comments. --- Objects/typeobject.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 96802e588a6e7d..65f6442edd2350 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5979,6 +5979,8 @@ update_slot_world_stopped(PyTypeObject *type, PyObject *name) } #endif +// Called by type_setattro(). Updates both the type dict and +// any type slots that correspond to the modified entry. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) @@ -11306,8 +11308,9 @@ update_all_slots(PyTypeObject* type) ASSERT_TYPE_LOCK_HELD(); #ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - // Similar to update_slot_world_stopped(), this is required to - // avoid races. We do it once here rather than once per-slot. + // Similar to update_slot_world_stopped(), this is required to avoid + // races. We don't use update_slot_world_stopped() here because we want + // to stop once rather than once per slot. PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); #endif From 658bcd52ab4a85134bed94eb6f30ce94a03924f7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 12:07:13 -0700 Subject: [PATCH 06/45] Remove TSAN suppressions that seem unneeded. --- Tools/tsan/suppressions_free_threading.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index d2763714ecf1b7..17a0d8dc6a0f01 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -14,7 +14,6 @@ race_top:assign_version_tag race_top:_multiprocessing_SemLock_acquire_impl -#race_top:_Py_slot_tp_getattr_hook race_top:dump_traceback race_top:fatal_error race_top:_multiprocessing_SemLock_release_impl @@ -22,7 +21,6 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:_PyObject_TryGetInstanceAttribute race_top:PyUnstable_InterpreterFrame_GetLine -#race_top:type_modified_unlocked race_top:write_thread_id # gh-129068: race on shared range iterators (test_free_threading.test_zip.ZipThreading.test_threading) @@ -31,9 +29,6 @@ race_top:rangeiter_next # gh-129748: test.test_free_threading.test_slots.TestSlots.test_object race_top:mi_block_set_nextx -# gh-127266: type slot updates are not thread-safe (test_opcache.test_load_attr_method_lazy_dict) -#race_top:update_one_slot - # https://gist.github.com/mpage/6962e8870606cfc960e159b407a0cb40 thread:pthread_create From ef2f07b47475faeb631f8ac89f8bc4eec865e6ed Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 14 Mar 2025 13:08:26 -0700 Subject: [PATCH 07/45] Add NEWS file. --- .../2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst new file mode 100644 index 00000000000000..b26977628de136 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-14-13-08-20.gh-issue-127266._tyfBp.rst @@ -0,0 +1,6 @@ +In the free-threaded build, avoid data races caused by updating type slots +or type flags after the type was initially created. For those (typically +rare) cases, use the stop-the-world mechanism. Remove the use of atomics +when reading or writing type flags. The use of atomics is not sufficient to +avoid races (since flags are sometimes read without a lock and without +atomics) and are no longer required. From ce8536db3f59037a44db845f148e9610dfd080ad Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 22 Mar 2025 15:34:45 -0700 Subject: [PATCH 08/45] Use mutex rather than critical sections. For TYPE_LOCK, replace critical sections with a simple mutex. A number of changes were needed to avoid deadlocking on reentrancy (trying to re-acquire the mutex). Split _PyType_LookupRefAndVersion() into a locked and unlocked version. Remove atomic operations where they are not required. Remove some cases of TYPE_LOCK being held that is not required. --- Include/internal/pycore_typeobject.h | 2 + Include/object.h | 4 +- Objects/typeobject.c | 578 +++++++++++++++++---------- 3 files changed, 372 insertions(+), 212 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 581153344a8e05..677212452eb9ee 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -144,6 +144,8 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; + // used to check correct usage of the above mutex + unsigned long long mutex_tid; // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE diff --git a/Include/object.h b/Include/object.h index d23918c2da67f9..82b7ede695d819 100644 --- a/Include/object.h +++ b/Include/object.h @@ -576,8 +576,8 @@ given type object has a specified feature. /* Objects behave like an unbound method */ #define Py_TPFLAGS_METHOD_DESCRIPTOR (1UL << 17) -/* Unused. Legacy flag */ -#define Py_TPFLAGS_VALID_VERSION_TAG (1UL << 19) +/* Type structure is potentially exposed (revealed) to other threads */ +#define Py_TPFLAGS_EXPOSED (1UL << 19) /* Type is abstract and cannot be instantiated */ #define Py_TPFLAGS_IS_ABSTRACT (1UL << 20) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 65f6442edd2350..2408a1cf3c85de 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -62,39 +62,100 @@ class object "PyObject *" "&PyBaseObject_Type" // in odd behaviors w.r.t. running with the GIL as the outer type lock could // be released and reacquired during a subclass update if there's contention // on the subclass lock. +// +// While modification of type slots and type flags is protected by the +// global types lock. However, the slots and flags are read non-atomically +// without holding the type lock. So, we need to stop-the-world while +// modifying these, in order to avoid data races. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) -#define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() -#define BEGIN_TYPE_DICT_LOCK(d) \ - Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) +#ifdef Py_DEBUG +// Used to check for correct use of the TYPE_LOCK mutex. It is a simple +// mutex and does not support re-entrancy. If we already hold the lock and +// try to acquire it again with the same thread, it is a bug on the code. +#define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +#endif + +static bool +types_world_is_stopped(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return interp->stoptheworld.world_stopped; +} + +// Checks that either the type has not yet been exposed (revealed) to other +// threads (it was newly created and the Py_TPFLAGS_EXPOSED flag is not set +// yet) or the world is stopped and we can safely update it without races. +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert((((tp)->tp_flags & Py_TPFLAGS_EXPOSED) == 0) || types_world_is_stopped()) + +static void +types_stop_world(void) +{ + assert(!types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StopTheWorld(interp); + assert(types_world_is_stopped()); +} + +static void +types_start_world(void) +{ + assert(types_world_is_stopped()); + PyInterpreterState *interp = _PyInterpreterState_GET(); + _PyEval_StartTheWorld(interp); + assert(!types_world_is_stopped()); +} + +static bool +types_mutex_is_owned(void) +{ + PyThread_ident_t tid = PyThread_get_thread_ident_ex(); + return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; +} + +static void +types_mutex_set_owned(PyThread_ident_t tid) +{ + _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); +} + +static void +types_mutex_lock(void) +{ + if (types_world_is_stopped()) { + return; + } + assert(!types_mutex_is_owned()); + PyMutex_Lock(TYPE_LOCK); + types_mutex_set_owned(PyThread_get_thread_ident_ex()); +} -#define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +static void +types_mutex_unlock(void) +{ + if (types_world_is_stopped()) { + return; + } + types_mutex_set_owned(0); + PyMutex_Unlock(TYPE_LOCK); +} #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + {if (!types_world_is_stopped()) assert(types_mutex_is_owned());} #else -#define BEGIN_TYPE_LOCK() -#define END_TYPE_LOCK() -#define BEGIN_TYPE_DICT_LOCK(d) -#define END_TYPE_DICT_LOCK() +#define types_mutex_lock() +#define types_mutex_unlock() +#define types_world_is_stopped() 1 +#define types_stop_world() +#define types_start_world() #define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) #endif -// Modification of type slots and type flags is protected by the global type -// lock. However, the slots and flags are read non-atomically without holding -// the type lock. So, we need to stop-the-world while modifying these, in -// order to avoid data races. This is unfortunately a bit expensive. -#ifdef Py_GIL_DISABLED -// If defined, type slot updates (after initial creation) will stop-the-world. -#define TYPE_SLOT_UPDATE_NEEDS_STOP -// If defined, type flag updates (after initial creation) will stop-the-world. -#define TYPE_FLAGS_UPDATE_NEEDS_STOP -#endif - #define PyTypeObject_CAST(op) ((PyTypeObject *)(op)) typedef struct PySlot_Offset { @@ -111,6 +172,9 @@ releasebuffer_call_python(PyObject *self, Py_buffer *buffer); static PyObject * slot_tp_new(PyTypeObject *type, PyObject *args, PyObject *kwds); +static PyObject * +lookup_method_optional_lock(PyObject *self, PyObject *attr, int *unbound, bool lock_held); + static PyObject * lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound); @@ -364,6 +428,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // held when flags are modified. ASSERT_TYPE_LOCK_HELD(); } + ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -371,6 +436,7 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -496,10 +562,8 @@ _PyType_GetBases(PyTypeObject *self) { PyObject *res; - BEGIN_TYPE_LOCK(); res = lookup_tp_bases(self); Py_INCREF(res); - END_TYPE_LOCK(); return res; } @@ -508,6 +572,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); + ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -551,35 +616,21 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_TYPE_LOCK_HELD(); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { -#ifdef Py_GIL_DISABLED - PyObject *mro = _Py_atomic_load_ptr_relaxed(&self->tp_mro); - if (mro == NULL) { - return NULL; - } - if (_Py_TryIncrefCompare(&self->tp_mro, mro)) { - return mro; - } - - BEGIN_TYPE_LOCK(); - mro = lookup_tp_mro(self); - Py_XINCREF(mro); - END_TYPE_LOCK(); - return mro; -#else + // This is safe in the free-threaded build because tp_mro cannot be + // assigned unless the type is new or if world is stopped. return Py_XNewRef(lookup_tp_mro(self)); -#endif } static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { + ASSERT_NEW_OR_STOPPED(self); assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_mro can probably be statically allocated for each @@ -1008,10 +1059,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); - END_TYPE_LOCK(); + types_mutex_unlock(); return 0; } @@ -1142,9 +1193,9 @@ PyType_Modified(PyTypeObject *type) return; } - BEGIN_TYPE_LOCK(); + types_mutex_lock(); type_modified_unlocked(type); - END_TYPE_LOCK(); + types_mutex_unlock(); } static int @@ -1169,15 +1220,16 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) { int unbound; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (custom) { PyObject *mro_meth, *type_mro_meth; - mro_meth = lookup_maybe_method( - (PyObject *)type, &_Py_ID(mro), &unbound); + mro_meth = lookup_method_optional_lock( + (PyObject *)type, &_Py_ID(mro), &unbound, true); if (mro_meth == NULL) { goto clear; } - type_mro_meth = lookup_maybe_method( - (PyObject *)&PyType_Type, &_Py_ID(mro), &unbound); + type_mro_meth = lookup_method_optional_lock( + (PyObject *)&PyType_Type, &_Py_ID(mro), &unbound, true); if (type_mro_meth == NULL) { Py_DECREF(mro_meth); goto clear; @@ -1229,9 +1281,9 @@ void _PyType_SetVersion(PyTypeObject *tp, unsigned int version) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); set_version_unlocked(tp, version); - END_TYPE_LOCK(); + types_mutex_unlock(); } PyTypeObject * @@ -1254,7 +1306,11 @@ _PyType_LookupByVersion(unsigned int version) unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp) { +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_uint32_acquire(&tp->tp_version_tag); +#else return tp->tp_version_tag; +#endif } @@ -1317,9 +1373,9 @@ int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); int assigned; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assigned = assign_version_tag(interp, type); - END_TYPE_LOCK(); + types_mutex_unlock(); return assigned; } @@ -1590,14 +1646,13 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) return -1; } - BEGIN_TYPE_LOCK(); + types_stop_world(); type_modified_unlocked(type); if (abstract) - _PyType_SetFlags(type, 0, Py_TPFLAGS_IS_ABSTRACT); + type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else - _PyType_SetFlags(type, Py_TPFLAGS_IS_ABSTRACT, 0); - - END_TYPE_LOCK(); + type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); + types_start_world(); return 0; } @@ -1619,7 +1674,6 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); PyObject *mro; - BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(type); if (mro == NULL) { mro = Py_None; @@ -1627,7 +1681,6 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) Py_INCREF(mro); } - END_TYPE_LOCK(); return mro; } @@ -1651,6 +1704,7 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -1722,6 +1776,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) return -1; } assert(new_bases != NULL); + assert(types_world_is_stopped()); if (!PyTuple_Check(new_bases)) { PyErr_Format(PyExc_TypeError, @@ -1856,9 +1911,18 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); int res; - BEGIN_TYPE_LOCK(); + bool need_stop = !types_world_is_stopped(); + if (need_stop) { + // This function can be re-entered. We want to avoid trying to stop + // the world a second time if that happens. Example call chain: + // mro_invoke() -> call_unbound_noarg() -> type_setattro() -> + // type_set_bases() + types_stop_world(); + } res = type_set_bases_unlocked(type, new_bases); - END_TYPE_LOCK(); + if (need_stop) { + types_start_world(); + } return res; } @@ -2810,10 +2874,26 @@ _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or return res; } +#ifdef Py_GIL_DISABLED +static PyObject *type_lookup_lock_held(PyTypeObject *type, PyObject *name, unsigned int *version); +#endif + static PyObject * -lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) +lookup_method_optional_lock(PyObject *self, PyObject *attr, int *unbound, bool lock_held) { - PyObject *res = _PyType_LookupRef(Py_TYPE(self), attr); + PyObject *res; +#ifdef Py_GIL_DISABLED + if (lock_held) { + // The TYPE_LOCK mutex is already held by the caller. + res = type_lookup_lock_held(Py_TYPE(self), attr, NULL); + } + else { + // If the MCACHE lookup fails, this will acquire the TYPE_LOCK mutex. + res = _PyType_LookupRef(Py_TYPE(self), attr); + } +#else + res = _PyType_LookupRef(Py_TYPE(self), attr); +#endif if (res == NULL) { return NULL; } @@ -2832,6 +2912,12 @@ lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) return res; } +static PyObject * +lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) +{ + return lookup_method_optional_lock(self, attr, unbound, false); +} + static PyObject * lookup_method(PyObject *self, PyObject *attr, int *unbound) { @@ -2956,11 +3042,16 @@ tail_contains(PyObject *tuple, Py_ssize_t whence, PyObject *o) static PyObject * class_name(PyObject *cls) { +#ifdef Py_GIL_DISABLED + // Avoid re-entrancy and use tp_name + return type_name(cls, NULL); +#else PyObject *name; if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } return name; +#endif } static int @@ -3125,12 +3216,11 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation_unlocked(PyTypeObject *type) +mro_implementation(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); - if (!_PyType_IsReady(type)) { - if (PyType_Ready(type) < 0) + int res = PyType_Ready(type); + if (res < 0) return NULL; } @@ -3208,16 +3298,6 @@ mro_implementation_unlocked(PyTypeObject *type) return result; } -static PyObject * -mro_implementation(PyTypeObject *type) -{ - PyObject *mro; - BEGIN_TYPE_LOCK(); - mro = mro_implementation_unlocked(type); - END_TYPE_LOCK(); - return mro; -} - /*[clinic input] type.mro @@ -3289,20 +3369,29 @@ mro_invoke(PyTypeObject *type) PyObject *new_mro; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { int unbound; - PyObject *mro_meth = lookup_method( - (PyObject *)type, &_Py_ID(mro), &unbound); - if (mro_meth == NULL) + PyObject *mro_meth = lookup_method_optional_lock( + (PyObject *)type, &_Py_ID(mro), &unbound, true); + if (mro_meth == NULL) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_AttributeError, "mro"); + } return NULL; + } + // FIXME: review unlock + types_mutex_unlock(); + // Possible re-entrancy, we don't know what this custom mro() method might do mro_result = call_unbound_noarg(unbound, mro_meth, (PyObject *)type); + types_mutex_lock(); Py_DECREF(mro_meth); } else { - mro_result = mro_implementation_unlocked(type); + mro_result = mro_implementation(type); } if (mro_result == NULL) return NULL; @@ -3352,6 +3441,7 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3401,9 +3491,8 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; - BEGIN_TYPE_LOCK(); + ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); - END_TYPE_LOCK(); return res; } @@ -4482,6 +4571,7 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); + type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5195,6 +5285,7 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); + type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: if (PyErr_Occurred()) { @@ -5329,7 +5420,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5356,7 +5447,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } - END_TYPE_LOCK(); + types_mutex_unlock(); if (res == NULL) { PyErr_Format( @@ -5488,8 +5579,6 @@ PyObject_GetItemData(PyObject *obj) static PyObject * find_name_in_mro(PyTypeObject *type, PyObject *name, int *error) { - ASSERT_TYPE_LOCK_HELD(); - Py_hash_t hash = _PyObject_HashFast(name); if (hash == -1) { *error = -1; @@ -5567,12 +5656,15 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio return old_name; } -#if Py_GIL_DISABLED - static void -update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) +maybe_update_cache(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) { + if (version_tag == 0) { + return; // 0 is not a valid version + } + PyObject *old_value; +#if Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5584,16 +5676,16 @@ update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, return; } - PyObject *old_value = update_cache(entry, name, version_tag, value); + old_value = update_cache(entry, name, version_tag, value); // Then update sequence to the next valid value _PySeqLock_UnlockWrite(&entry->sequence); - +#else + old_value = update_cache(entry, name, version_tag, value); +#endif Py_DECREF(old_value); } -#endif - void _PyTypes_AfterFork(void) { @@ -5611,22 +5703,93 @@ _PyTypes_AfterFork(void) #endif } -/* Internal API to look for a name through the MRO. - This returns a strong reference, and doesn't set an exception! - If nonzero, version is set to the value of type->tp_version at the time of - the lookup. -*/ -PyObject * -_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) +static struct type_cache_entry * +type_get_cache_entry(PyTypeObject *type, PyObject *name) { + unsigned int h = MCACHE_HASH_METHOD(type, name); + struct type_cache *cache = get_type_cache(); + return &cache->hashtable[h]; +} + +// Implementation of _PyType_LookupRefAndVersion() for non-freethreaded build +// or for when the TYPE_LOCK mutex is already held. +static PyObject * +type_lookup_lock_held(PyTypeObject *type, PyObject *name, unsigned int *version) +{ + ASSERT_TYPE_LOCK_HELD(); PyObject *res; int error; PyInterpreterState *interp = _PyInterpreterState_GET(); - unsigned int h = MCACHE_HASH_METHOD(type, name); - struct type_cache *cache = get_type_cache(); - struct type_cache_entry *entry = &cache->hashtable[h]; + struct type_cache_entry *entry = type_get_cache_entry(type, name); + if (entry->version == type->tp_version_tag && + entry->name == name) { + assert(type->tp_version_tag); + OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); + Py_XINCREF(entry->value); + if (version != NULL) { + *version = entry->version; + } + return entry->value; + } + OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); + OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); + + /* We may end up clearing live exceptions below, so make sure it's ours. */ + assert(!PyErr_Occurred()); + + unsigned int assigned_version = 0; // 0 is not a valid version + if (MCACHE_CACHEABLE_NAME(name)) { + if (assign_version_tag(interp, type)) { + assigned_version = type->tp_version_tag; + } + } + // Calling find_name_in_mro() might cause the type version to change. For + // example, if a __hash__ or __eq__ method mutates the types. Since this + // case is expected to be rare, it seems better to not explicitly check + // for it (by checking if the version changed) and to do the useless cache + // update anyhow. + res = find_name_in_mro(type, name, &error); + + if (!error) { + /* Only put NULL results into cache if there was no error. */ + maybe_update_cache(entry, name, assigned_version, res); + if (version != NULL) { + *version = assigned_version; + } + return res; + } + else { + // It's not ideal to clear the error condition, but this function is + // documented as not setting an exception, and I don't want to change + // that. E.g., when PyType_Ready() can't proceed, it won't set the + // "ready" flag, so future attempts to ready the same type will call + // it again -- hopefully in a context that propagates the exception + // out. + if (error == -1) { + PyErr_Clear(); + } + if (version != NULL) { + // 0 is not a valid version + *version = 0; + } + return NULL; + } +} + #ifdef Py_GIL_DISABLED +// Implementation of _PyType_LookupRefAndVersion() for freethreaded build for +// when the TYPE_LOCK mutex is not already held. Avoids acquiring the lock if +// the lookup can be completed using the cache. +PyObject * +type_lookup_gil_disabled(PyTypeObject *type, PyObject *name, unsigned int *version) +{ + PyObject *res; + int error; + PyInterpreterState *interp = _PyInterpreterState_GET(); + + struct type_cache_entry *entry = type_get_cache_entry(type, name); // synchronize-with other writing threads by doing an acquire load on the sequence while (1) { uint32_t sequence = _PySeqLock_BeginRead(&entry->sequence); @@ -5657,48 +5820,42 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve break; } } -#else - if (entry->version == type->tp_version_tag && - entry->name == name) { - assert(type->tp_version_tag); - OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); - OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); - Py_XINCREF(entry->value); - if (version != NULL) { - *version = entry->version; - } - return entry->value; - } -#endif OBJECT_STAT_INC_COND(type_cache_misses, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_misses, is_dunder_name(name)); /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); - // We need to atomically do the lookup and capture the version before - // anyone else can modify our mro or mutate the type. - - int has_version = 0; - unsigned int assigned_version = 0; - BEGIN_TYPE_LOCK(); - res = find_name_in_mro(type, name, &error); + unsigned int assigned_version = 0; // 0 is not a valid version if (MCACHE_CACHEABLE_NAME(name)) { - has_version = assign_version_tag(interp, type); - assigned_version = type->tp_version_tag; - } - END_TYPE_LOCK(); - - /* Only put NULL results into cache if there was no error. */ - if (error) { - /* It's not ideal to clear the error condition, - but this function is documented as not setting - an exception, and I don't want to change that. - E.g., when PyType_Ready() can't proceed, it won't - set the "ready" flag, so future attempts to ready - the same type will call it again -- hopefully - in a context that propagates the exception out. - */ + types_mutex_lock(); + if (assign_version_tag(interp, type)) { + assigned_version = type->tp_version_tag; + } + types_mutex_unlock(); + } + // Calling find_name_in_mro() might cause the type version to change. For + // example, if a __hash__ or __eq__ method mutates the types. Since this + // case is expected to be rare, it seems better to not explicitly check + // for it (by checking if the version changed) and to do the useless cache + // update anyhow. + res = find_name_in_mro(type, name, &error); + + if (!error) { + /* Only put NULL results into cache if there was no error. */ + maybe_update_cache(entry, name, assigned_version, res); + if (version != NULL) { + *version = assigned_version; + } + return res; + } + else { + // It's not ideal to clear the error condition, but this function is + // documented as not setting an exception, and I don't want to change + // that. E.g., when PyType_Ready() can't proceed, it won't set the + // "ready" flag, so future attempts to ready the same type will call + // it again -- hopefully in a context that propagates the exception + // out. if (error == -1) { PyErr_Clear(); } @@ -5708,20 +5865,22 @@ _PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *ve } return NULL; } +} +#endif - if (has_version) { -#if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, assigned_version, res); +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ +PyObject * +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) +{ +#ifdef Py_GIL_DISABLED + return type_lookup_gil_disabled(type, name, version); #else - PyObject *old_value = update_cache(entry, name, assigned_version, res); - Py_DECREF(old_value); + return type_lookup_lock_held(type, name, version); #endif - } - if (version != NULL) { - // 0 is not a valid version - *version = has_version ? assigned_version : 0; - } - return res; } /* Internal API to look for a name through the MRO. @@ -5751,7 +5910,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return 0; } int can_cache; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(init); @@ -5759,7 +5918,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, if (can_cache) { FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); } - END_TYPE_LOCK(); + types_mutex_unlock(); return can_cache; } @@ -5770,7 +5929,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor return 0; } int can_cache; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): @@ -5784,33 +5943,29 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); } - END_TYPE_LOCK(); + types_mutex_unlock(); return can_cache; } void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); unsigned long new_flags = (self->tp_flags & ~mask) | flags; if (new_flags != self->tp_flags) { -#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif - self->tp_flags = new_flags; -#ifdef TYPE_FLAGS_UPDATE_NEEDS_STOP - _PyEval_StartTheWorld(interp); -#endif + types_stop_world(); + // can't use new_flags here since they could be out-of-date + self->tp_flags = (self->tp_flags & ~mask) | flags; + types_start_world(); } - END_TYPE_LOCK(); + types_mutex_unlock(); } int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version) { int err; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); err = validate(ty); if (!err) { if(assign_version_tag(_PyInterpreterState_GET(), ty)) { @@ -5820,7 +5975,7 @@ _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_ err = -1; } } - END_TYPE_LOCK(); + types_mutex_unlock(); return err; } @@ -5850,9 +6005,9 @@ set_flags_recursive(PyTypeObject *self, unsigned long mask, unsigned long flags) void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags) { - BEGIN_TYPE_LOCK(); + types_stop_world(); set_flags_recursive(self, mask, flags); - END_TYPE_LOCK(); + types_start_world(); } /* This is similar to PyObject_GenericGetAttr(), @@ -5966,15 +6121,14 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED static int update_slot_world_stopped(PyTypeObject *type, PyObject *name) { int ret; - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); + types_stop_world(); ret = update_slot(type, name); - _PyEval_StartTheWorld(interp); + types_start_world(); return ret; } #endif @@ -5985,6 +6139,8 @@ static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) { + ASSERT_TYPE_LOCK_HELD(); + // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor // of the current value as it can observe the cache in an inconsistent @@ -6006,12 +6162,16 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", ((PyTypeObject*)type)->tp_name, name); + // FIXME: reentrancy possible + types_mutex_unlock(); _PyObject_SetAttributeErrorContext((PyObject *)type, name); + types_mutex_lock(); return -1; } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED if (is_dunder_name(name) && has_slotdef(name)) { + // update is potentially changing one or more slots return update_slot_world_stopped(type, name); } #else @@ -6078,22 +6238,25 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { // We don't just do PyType_Ready because we could already be readying - BEGIN_TYPE_LOCK(); + types_mutex_lock(); dict = type->tp_dict; if (dict == NULL) { dict = type->tp_dict = PyDict_New(); } - END_TYPE_LOCK(); + types_mutex_unlock(); if (dict == NULL) { res = -1; goto done; } } - BEGIN_TYPE_DICT_LOCK(dict); + // FIXME: can this deadlock? if so, how to avoid + types_mutex_lock(); + Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); assert(_PyType_CheckConsistency(type)); - END_TYPE_DICT_LOCK(); + Py_END_CRITICAL_SECTION(); + types_mutex_unlock(); done: Py_DECREF(name); @@ -6189,10 +6352,10 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { - BEGIN_TYPE_LOCK(); + types_mutex_lock(); type_clear_flags(type, Py_TPFLAGS_READY); set_version_unlocked(type, 0); - END_TYPE_LOCK(); + types_mutex_unlock(); } _PyStaticType_ClearWeakRefs(interp, type); @@ -7023,13 +7186,12 @@ object_set_class(PyObject *self, PyObject *value, void *closure) } #ifdef Py_GIL_DISABLED - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); + types_stop_world(); #endif PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); #ifdef Py_GIL_DISABLED - _PyEval_StartTheWorld(interp); + types_start_world(); #endif if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { @@ -8428,6 +8590,7 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8503,6 +8666,7 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8700,6 +8864,7 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -8791,14 +8956,14 @@ PyType_Ready(PyTypeObject *type) } int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } - END_TYPE_LOCK(); + types_mutex_unlock(); return res; } @@ -8832,9 +8997,9 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); res = type_ready(self, initial); - END_TYPE_LOCK(); + types_mutex_unlock(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9340,9 +9505,9 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) } int res; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); res = hackcheck_unlocked(self, func, what); - END_TYPE_LOCK(); + types_mutex_unlock(); return res; } @@ -10563,9 +10728,9 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); - END_TYPE_LOCK(); + types_mutex_unlock(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11046,7 +11211,7 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) #undef ptrs } -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP +#ifdef Py_GIL_DISABLED // Return true if "name" corresponds to at least one slot definition. This is // a more accurate but more expensive test compared to is_dunder_name(). static bool @@ -11119,6 +11284,7 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11235,6 +11401,7 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11253,6 +11420,8 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); + assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11289,14 +11458,14 @@ fixup_slot_dispatchers(PyTypeObject *type) // This lock isn't strictly necessary because the type has not been // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro // where we'd like to assert that the type is locked. - BEGIN_TYPE_LOCK(); + types_mutex_lock(); assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } - END_TYPE_LOCK(); + types_mutex_unlock(); } // Called when __bases__ is re-assigned. @@ -11306,14 +11475,7 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); - -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - // Similar to update_slot_world_stopped(), this is required to avoid - // races. We don't use update_slot_world_stopped() here because we want - // to stop once rather than once per slot. - PyInterpreterState *interp = _PyInterpreterState_GET(); - _PyEval_StopTheWorld(interp); -#endif + ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); @@ -11322,10 +11484,6 @@ update_all_slots(PyTypeObject* type) /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } - -#ifdef TYPE_SLOT_UPDATE_NEEDS_STOP - _PyEval_StartTheWorld(interp); -#endif } @@ -11596,10 +11754,10 @@ PyType_Freeze(PyTypeObject *type) return -1; } - BEGIN_TYPE_LOCK(); + types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); type_modified_unlocked(type); - END_TYPE_LOCK(); + types_start_world(); return 0; } @@ -11664,14 +11822,14 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; - BEGIN_TYPE_LOCK(); + types_mutex_lock(); mro = lookup_tp_mro(su_obj_type); /* keep a strong reference to mro because su_obj_type->tp_mro can be replaced during PyDict_GetItemRef(dict, name, &res) and because another thread can modify it after we end the critical section below */ Py_XINCREF(mro); - END_TYPE_LOCK(); + types_mutex_unlock(); if (mro == NULL) return NULL; From ca00e74171ecbfce4b3dbf1b1b6b26f3db1ef776 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 03:10:26 -0700 Subject: [PATCH 09/45] Fix non-debug build. --- Include/internal/pycore_typeobject.h | 1 + Objects/typeobject.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index c6b040ea949311..1a4f89fd2449a0 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -32,6 +32,7 @@ extern "C" { #define _Py_TYPE_BASE_VERSION_TAG (2<<16) #define _Py_MAX_GLOBAL_TYPE_VERSION_TAG (_Py_TYPE_BASE_VERSION_TAG - 1) + /* runtime lifecycle */ extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e009394de2bcee..a0c27ac8a184f1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -72,12 +72,10 @@ class object "PyObject *" "&PyBaseObject_Type" // modifying these, in order to avoid data races. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#ifdef Py_DEBUG // Used to check for correct use of the TYPE_LOCK mutex. It is a simple // mutex and does not support re-entrancy. If we already hold the lock and // try to acquire it again with the same thread, it is a bug on the code. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid -#endif static bool types_world_is_stopped(void) @@ -110,12 +108,14 @@ types_start_world(void) assert(!types_world_is_stopped()); } +#ifdef Py_DEBUG static bool types_mutex_is_owned(void) { PyThread_ident_t tid = PyThread_get_thread_ident_ex(); return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; } +#endif static void types_mutex_set_owned(PyThread_ident_t tid) From 6db454246303204720c25fe5b5784793d184e36d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 12:31:47 -0700 Subject: [PATCH 10/45] Improve comments. --- Objects/typeobject.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a0c27ac8a184f1..2dcc68a75b08e4 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -60,23 +60,32 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for mutation of types. This avoids having to take +// There's a global lock for types that ensures that the tp_version_tag is +// correctly updated if the type is modified. This avoids having to take // additional locks while doing various subclass processing which may result // in odd behaviors w.r.t. running with the GIL as the outer type lock could // be released and reacquired during a subclass update if there's contention // on the subclass lock. // -// While modification of type slots and type flags is protected by the -// global types lock. However, the slots and flags are read non-atomically -// without holding the type lock. So, we need to stop-the-world while -// modifying these, in order to avoid data races. +// Note that this lock does not protect when updating type slots or the +// tp_flags member. Instead, we either ensure those updates are done before +// the type has been revealed to other threads or we only do those updates +// while the stop-the-world mechanism is active. The slots and flags are read +// in many places without holding a lock and without atomics. +// +// Since TYPE_LOCK is used as regular mutex, we must take special care about +// potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to +// ensure that we are not trying to re-acquire the mutex when it is already +// held by the current thread. There are a couple cases when we release the +// mutex, when we call functions that might re-enter. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -// Used to check for correct use of the TYPE_LOCK mutex. It is a simple +// Used to check for correct use of the TYPE_LOCK mutex. It is a regular // mutex and does not support re-entrancy. If we already hold the lock and // try to acquire it again with the same thread, it is a bug on the code. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +// Return true if the world is currently stopped. static bool types_world_is_stopped(void) { @@ -109,6 +118,7 @@ types_start_world(void) } #ifdef Py_DEBUG +// Return true if the TYPE_LOCK mutex is held by the current thread. static bool types_mutex_is_owned(void) { @@ -117,6 +127,7 @@ types_mutex_is_owned(void) } #endif +// Set the TID of the thread currently holding TYPE_LOCK. static void types_mutex_set_owned(PyThread_ident_t tid) { @@ -3401,6 +3412,7 @@ mro_invoke(PyTypeObject *type) if (mro_result == NULL) return NULL; + // FIXME: possible re-entrancy, drop lock? new_mro = PySequence_Tuple(mro_result); Py_DECREF(mro_result); if (new_mro == NULL) { @@ -4576,6 +4588,7 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); + // After this point, other threads can potentally use this type. type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5290,6 +5303,7 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); + // After this point, other threads can potentally use this type. type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: From 75d6b71a0169539c1a798c425c053127b35b046c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 26 Mar 2025 23:50:39 -0700 Subject: [PATCH 11/45] Avoid unused function warning. --- Objects/typeobject.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b7311e7f224dc0..7c9b7712e174bd 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5821,6 +5821,7 @@ type_assign_version_lock_held(PyTypeObject *type) } } +#ifdef Py_GIL_DISABLED static unsigned int type_assign_version(PyTypeObject *type) { @@ -5830,6 +5831,7 @@ type_assign_version(PyTypeObject *type) types_mutex_unlock(); return version; } +#endif static unsigned int type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, From 895a86a6a85a30892500d42f6d84a29a94afd363 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:05:11 -0700 Subject: [PATCH 12/45] Remove unwanted suppression (bad merge). --- Tools/tsan/suppressions_free_threading.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 40b4bc933bc058..771a7997143a0f 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -13,7 +13,6 @@ # These warnings trigger directly in a CPython function. race_top:assign_version_tag -race_top:_multiprocessing_SemLock_acquire_impl race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode From 65e40f4856b460e2212ac4f7b6978371e5222f22 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:37:14 -0700 Subject: [PATCH 13/45] Fixes based on review feedback. * In type_set_bases(), always use stop-the-world. * Fixes for mro_invoke(). We need to do different things depending on if there is a custom mro() method and where we are called from. If from type_ready(), need the TYPE_LOCK mutex. If from assignment to __bases__, need stop-the-world. * Remove some places TYPE_LOCK is acquired when it no longer needs to be. * Add comments to some _lock_held() functions to note that they might be called with the world stopped, rather than holding the lock. * Remove update_slot_world_stopped() by inlining it. --- Objects/typeobject.c | 106 ++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7c9b7712e174bd..9f3c104240bb70 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -156,7 +156,7 @@ types_mutex_unlock(void) } #define ASSERT_TYPE_LOCK_HELD() \ - {if (!types_world_is_stopped()) assert(types_mutex_is_owned());} + assert(types_world_is_stopped() || types_mutex_is_owned()) #else @@ -432,7 +432,8 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) if (tp->tp_flags & Py_TPFLAGS_READY) { // It's possible the type object has been exposed to other threads // if it's been marked ready. In that case, the type lock should be - // held when flags are modified. + // held when flags are modified (or the stop-the-world mechanism should + // be active). ASSERT_TYPE_LOCK_HELD(); } ASSERT_NEW_OR_STOPPED(tp); @@ -1923,18 +1924,9 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); int res; - bool need_stop = !types_world_is_stopped(); - if (need_stop) { - // This function can be re-entered. We want to avoid trying to stop - // the world a second time if that happens. Example call chain: - // mro_invoke() -> call_unbound_noarg() -> type_setattro() -> - // type_set_bases() - types_stop_world(); - } + types_stop_world(); res = type_set_bases_unlocked(type, new_bases); - if (need_stop) { - types_start_world(); - } + types_start_world(); return res; } @@ -3487,16 +3479,48 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { + // Custom mro() method on metaclass. This is potentially + // re-entrant. We are called either from type_ready(), in which case + // the TYPE_LOCK mutex is held, or from type_set_bases() (assignment to + // __bases__), in which case we need to stop-the-world in order to avoid + // data races. + bool stopped = types_world_is_stopped(); + if (stopped) { + // Called for type_set_bases(), re-assigment of __bases__ + types_start_world(); + } + else { + // Called from type_ready() + ASSERT_TYPE_LOCK_HELD(); + types_mutex_unlock(); + } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); + if (mro_result != NULL) { + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); + } + else { + new_mro = NULL; + } + if (stopped) { + types_stop_world(); + } + else { + types_mutex_lock(); + } } else { + // In this case, the mro() method on the type object is being used and + // we know that these calls are not re-entrant. mro_result = mro_implementation(type); + if (mro_result != NULL) { + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); + } + else { + new_mro = NULL; + } } - if (mro_result == NULL) - return NULL; - - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); if (new_mro == NULL) { return NULL; } @@ -3549,9 +3573,7 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - types_mutex_unlock(); new_mro = mro_invoke(type); /* might cause reentrance */ - types_mutex_lock(); reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -5523,7 +5545,6 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; - types_mutex_lock(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5550,7 +5571,6 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } - types_mutex_unlock(); if (res == NULL) { PyErr_Format( @@ -5806,12 +5826,14 @@ _PyTypes_AfterFork(void) #endif } -// Try to assign a new type version tag, return it if -// successful. Return 0 if no version was assigned. +// Try to assign a new type version tag, return it if successful. Return 0 +// if no version was assigned. Note that this function can be called while +// holding the TYPE_LOCK mutex or when the stop-the-world mechanism is active. static unsigned int type_assign_version_lock_held(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + assert(!types_world_is_stopped()); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { return type->tp_version_tag; @@ -5916,6 +5938,8 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, return assigned_version; } +// Note that this function can be called while holding the TYPE_LOCK mutex or +// when the stop-the-world mechanism is active. static unsigned int type_lookup_lock_held(PyTypeObject *type, PyObject *name, _PyStackRef *out) { @@ -6019,7 +6043,6 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) { - types_mutex_lock(); unsigned long new_flags = (self->tp_flags & ~mask) | flags; if (new_flags != self->tp_flags) { types_stop_world(); @@ -6027,7 +6050,6 @@ _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags) self->tp_flags = (self->tp_flags & ~mask) | flags; types_start_world(); } - types_mutex_unlock(); } int @@ -6190,18 +6212,6 @@ _Py_type_getattro(PyObject *tp, PyObject *name) return _Py_type_getattro_impl(type, name, NULL); } -#ifdef Py_GIL_DISABLED -static int -update_slot_world_stopped(PyTypeObject *type, PyObject *name) -{ - int ret; - types_stop_world(); - ret = update_slot(type, name); - types_start_world(); - return ret; -} -#endif - // Called by type_setattro(). Updates both the type dict and // any type slots that correspond to the modified entry. static int @@ -6241,7 +6251,11 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, #ifdef Py_GIL_DISABLED if (is_dunder_name(name) && has_slotdef(name)) { // update is potentially changing one or more slots - return update_slot_world_stopped(type, name); + int ret; + types_stop_world(); + ret = update_slot(type, name); + types_start_world(); + return ret; } #else if (is_dunder_name(name)) { @@ -6319,7 +6333,7 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } - // FIXME: can this deadlock? if so, how to avoid + // FIXME: I'm not totaly sure this is safe from a deadlock. If so, how to avoid? types_mutex_lock(); Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); @@ -7254,14 +7268,10 @@ object_set_class(PyObject *self, PyObject *value, void *closure) return -1; } -#ifdef Py_GIL_DISABLED types_stop_world(); -#endif PyTypeObject *oldto = Py_TYPE(self); int res = object_set_class_world_stopped(self, newto); -#ifdef Py_GIL_DISABLED types_start_world(); -#endif if (res == 0) { if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) { Py_DECREF(oldto); @@ -10736,9 +10746,7 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - types_mutex_lock(); base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); - types_mutex_unlock(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11806,14 +11814,8 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; - types_mutex_lock(); mro = lookup_tp_mro(su_obj_type); - /* keep a strong reference to mro because su_obj_type->tp_mro can be - replaced during PyDict_GetItemRef(dict, name, &res) and because - another thread can modify it after we end the critical section - below */ Py_XINCREF(mro); - types_mutex_unlock(); if (mro == NULL) return NULL; From b68f1a13e306791bf79891a414dfd2a0b3a7ac5c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 31 Mar 2025 14:51:06 -0700 Subject: [PATCH 14/45] Remove spurious assert(). --- Objects/typeobject.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9f3c104240bb70..9905da23277923 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5833,7 +5833,6 @@ static unsigned int type_assign_version_lock_held(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); - assert(!types_world_is_stopped()); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { return type->tp_version_tag; From 1b844867741c2d831fa6fd5d90dcaada0070c8ff Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:40:14 -0700 Subject: [PATCH 15/45] Omit mutex_tid member from default build. --- Include/internal/pycore_interp_structs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 96355d52fa92f1..bb2967c39b878f 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -566,8 +566,10 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; +#ifdef Py_GIL_DISABLED // used to check correct usage of the above mutex unsigned long long mutex_tid; +#endif // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE From 2af9e4954d8cf58481203e21ce4185d1293209f4 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:40:35 -0700 Subject: [PATCH 16/45] Remove Py_TPFLAGS_EXPOSED flag and related logic. This was useful for debugging logic but we don't want to use an additional type flag. --- Include/object.h | 4 ++-- Objects/typeobject.c | 25 ------------------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/Include/object.h b/Include/object.h index 82b7ede695d819..d23918c2da67f9 100644 --- a/Include/object.h +++ b/Include/object.h @@ -576,8 +576,8 @@ given type object has a specified feature. /* Objects behave like an unbound method */ #define Py_TPFLAGS_METHOD_DESCRIPTOR (1UL << 17) -/* Type structure is potentially exposed (revealed) to other threads */ -#define Py_TPFLAGS_EXPOSED (1UL << 19) +/* Unused. Legacy flag */ +#define Py_TPFLAGS_VALID_VERSION_TAG (1UL << 19) /* Type is abstract and cannot be instantiated */ #define Py_TPFLAGS_IS_ABSTRACT (1UL << 20) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 9905da23277923..96d44484a31411 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -93,12 +93,6 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } -// Checks that either the type has not yet been exposed (revealed) to other -// threads (it was newly created and the Py_TPFLAGS_EXPOSED flag is not set -// yet) or the world is stopped and we can safely update it without races. -#define ASSERT_NEW_OR_STOPPED(tp) \ - assert((((tp)->tp_flags & Py_TPFLAGS_EXPOSED) == 0) || types_world_is_stopped()) - static void types_stop_world(void) { @@ -166,7 +160,6 @@ types_mutex_unlock(void) #define types_stop_world() #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) #endif @@ -436,7 +429,6 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // be active). ASSERT_TYPE_LOCK_HELD(); } - ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -444,7 +436,6 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -580,7 +571,6 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); - ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -638,7 +628,6 @@ _PyType_GetMRO(PyTypeObject *self) static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { - ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1717,7 +1706,6 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -3564,7 +3552,6 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3614,7 +3601,6 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; - ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); return res; } @@ -4694,8 +4680,6 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); - // After this point, other threads can potentally use this type. - type->tp_flags |= Py_TPFLAGS_EXPOSED; return (PyObject *)type; @@ -5409,8 +5393,6 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); - // After this point, other threads can potentally use this type. - type->tp_flags |= Py_TPFLAGS_EXPOSED; finally: if (PyErr_Occurred()) { @@ -8668,7 +8650,6 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8744,7 +8725,6 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8942,7 +8922,6 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -11275,7 +11254,6 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11392,7 +11370,6 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11411,7 +11388,6 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11466,7 +11442,6 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); From f65e87c3a787c5d270499b3ec6d8e2b5b0d58a20 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 10:47:07 -0700 Subject: [PATCH 17/45] Improve comment for TYPE_LOCK. --- Objects/typeobject.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 96d44484a31411..53afcd7e548488 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -60,29 +60,28 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED -// There's a global lock for types that ensures that the tp_version_tag is -// correctly updated if the type is modified. This avoids having to take -// additional locks while doing various subclass processing which may result -// in odd behaviors w.r.t. running with the GIL as the outer type lock could -// be released and reacquired during a subclass update if there's contention -// on the subclass lock. +// There's a global lock for types that ensures that tp_version_tag and +// _spec_cache are correctly updated if the type is modified. This avoids +// having to take additional locks while doing various subclass processing +// which may result in odd behaviors w.r.t. running with the GIL as the outer +// type lock could be released and reacquired during a subclass update if +// there's contention on the subclass lock. // // Note that this lock does not protect when updating type slots or the // tp_flags member. Instead, we either ensure those updates are done before // the type has been revealed to other threads or we only do those updates // while the stop-the-world mechanism is active. The slots and flags are read // in many places without holding a lock and without atomics. -// +#define TYPE_LOCK &PyInterpreterState_Get()->types.mutex + // Since TYPE_LOCK is used as regular mutex, we must take special care about // potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to // ensure that we are not trying to re-acquire the mutex when it is already // held by the current thread. There are a couple cases when we release the -// mutex, when we call functions that might re-enter. -#define TYPE_LOCK &PyInterpreterState_Get()->types.mutex - -// Used to check for correct use of the TYPE_LOCK mutex. It is a regular -// mutex and does not support re-entrancy. If we already hold the lock and -// try to acquire it again with the same thread, it is a bug on the code. +// mutex, when we call functions that might re-enter. If we already hold the +// lock and try to acquire it again with the same thread, it is a bug in the +// code. We could just let it deadlock but having an assert failure gives +// a more explicit error. #define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid // Return true if the world is currently stopped. From 395a6d3de63dbd9e98d77f17acbb7b3169864d98 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 1 Apr 2025 12:35:51 -0700 Subject: [PATCH 18/45] Re-add the ASSERT_NEW_OR_STOPPED() asserts. These seem like helpful asserts that could catch future bugs, e.g. if the typeobject.c code is refactored or significantly changed. So, I think it makes sense to allocate a ob_flags bit for this purpose. We can remove it later if desired since it's non-public. Move the ob_flags bit values into the object.h header file. --- Include/object.h | 6 ++++++ Include/refcount.h | 3 --- Objects/typeobject.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/Include/object.h b/Include/object.h index d23918c2da67f9..994cac1ad17501 100644 --- a/Include/object.h +++ b/Include/object.h @@ -620,6 +620,12 @@ given type object has a specified feature. #define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0) #define Py_TPFLAGS_HAVE_VERSION_TAG (1UL << 18) +// Flag values for ob_flags (16 bits available, if SIZEOF_VOID_P > 4). +#define _Py_IMMORTAL_FLAGS (1 << 0) +#define _Py_STATICALLY_ALLOCATED_FLAG (1 << 2) +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) +#define _Py_TYPE_REVEALED_FLAG (1 << 3) +#endif #define Py_CONSTANT_NONE 0 #define Py_CONSTANT_FALSE 1 diff --git a/Include/refcount.h b/Include/refcount.h index 177bbdaf0c5977..ebd1dba6d15e1a 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -19,9 +19,6 @@ immortal. The latter should be the only instances that require cleanup during runtime finalization. */ -#define _Py_STATICALLY_ALLOCATED_FLAG 4 -#define _Py_IMMORTAL_FLAGS 1 - #if SIZEOF_VOID_P > 4 /* In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 53afcd7e548488..073e919c715d17 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -92,6 +92,19 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } +// Checks that either the type has not yet been revealed (exposed) to +// other threads or the world is stopped and we can safely update it +// without races. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might +// be revealed. +#if SIZEOF_VOID_P > 4 +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) == 0 || \ + types_world_is_stopped()) +#else +#define ASSERT_NEW_OR_STOPPED(tp) +#endif + static void types_stop_world(void) { @@ -159,6 +172,7 @@ types_mutex_unlock(void) #define types_stop_world() #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) #endif @@ -428,6 +442,7 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) // be active). ASSERT_TYPE_LOCK_HELD(); } + ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -435,6 +450,7 @@ static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -570,6 +586,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); + ASSERT_NEW_OR_STOPPED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -627,6 +644,7 @@ _PyType_GetMRO(PyTypeObject *self) static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { + ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1705,6 +1723,7 @@ static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; int res = mro_internal(type, &old_mro); @@ -3551,6 +3570,7 @@ static int mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; int reent; @@ -3600,6 +3620,7 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; + ASSERT_NEW_OR_STOPPED(type); res = mro_internal_unlocked(type, 0, p_old_mro); return res; } @@ -4679,6 +4700,10 @@ type_new_impl(type_new_ctx *ctx) } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif return (PyObject *)type; @@ -5392,6 +5417,10 @@ PyType_FromMetaclass( } assert(_PyType_CheckConsistency(type)); +#if defined(Py_GIL_DISABLED) && defined(Py_DEBUG) && SIZEOF_VOID_P > 4 + // After this point, other threads can potentally use this type. + ((PyObject*)type)->ob_flags |= _Py_TYPE_REVEALED_FLAG; +#endif finally: if (PyErr_Occurred()) { @@ -8649,6 +8678,7 @@ static int type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8724,6 +8754,7 @@ static int type_ready_inherit(PyTypeObject *type) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8921,6 +8952,7 @@ static int type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -11253,6 +11285,7 @@ static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11369,6 +11402,7 @@ static int update_slots_callback(PyTypeObject *type, void *data) { ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { @@ -11387,6 +11421,7 @@ update_slot(PyTypeObject *type, PyObject *name) int offset; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11441,6 +11476,7 @@ update_all_slots(PyTypeObject* type) pytype_slotdef *p; ASSERT_TYPE_LOCK_HELD(); + ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ type_modified_unlocked(type); From e4f87e57289e5572c20a22285f8f128023667189 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 11:36:10 -0700 Subject: [PATCH 19/45] Further cleanups of the locking in typeobject. Reduce the number of places and the regions where TYPE_LOCK is being held. Most significantly, don't take the lock inside type_ready(). This removes the need for some _lock_held variants of some functions. We don't need type_lookup_lock_held() anymore, for example. Rename type_set_bases_unlocked() to type_set_bases_world_stopped(). --- Objects/typeobject.c | 206 +++++++++++++------------------------------ 1 file changed, 62 insertions(+), 144 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 073e919c715d17..61f97e99849d19 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -92,19 +92,20 @@ types_world_is_stopped(void) return interp->stoptheworld.world_stopped; } -// Checks that either the type has not yet been revealed (exposed) to -// other threads or the world is stopped and we can safely update it -// without races. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and -// PyType_FromMetaclass() to indicate that a newly initialized type might -// be revealed. +// Checks that the type has not yet been revealed (exposed) to other +// threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and +// PyType_FromMetaclass() to indicate that a newly initialized type might be +// revealed. We only have ob_flags on 64-bit platforms. #if SIZEOF_VOID_P > 4 -#define ASSERT_NEW_OR_STOPPED(tp) \ - assert((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) == 0 || \ - types_world_is_stopped()) +#define TYPE_IS_REVEALED(tp) ((((PyObject *)(tp))->ob_flags & _Py_TYPE_REVEALED_FLAG) != 0) #else -#define ASSERT_NEW_OR_STOPPED(tp) +#define TYPE_IS_REVEALED(tp) 0 #endif +// Checks if we can safely update type slots or tp_flags. +#define ASSERT_NEW_OR_STOPPED(tp) \ + assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) + static void types_stop_world(void) { @@ -435,13 +436,6 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - if (tp->tp_flags & Py_TPFLAGS_READY) { - // It's possible the type object has been exposed to other threads - // if it's been marked ready. In that case, the type lock should be - // held when flags are modified (or the stop-the-world mechanism should - // be active). - ASSERT_TYPE_LOCK_HELD(); - } ASSERT_NEW_OR_STOPPED(tp); tp->tp_flags = flags; } @@ -449,7 +443,6 @@ type_set_flags(PyTypeObject *tp, unsigned long flags) static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); @@ -1214,9 +1207,6 @@ PyType_Modified(PyTypeObject *type) types_mutex_unlock(); } -static unsigned int type_lookup_lock_held(PyTypeObject *type, PyObject *name, - _PyStackRef *out); - static int is_subtype_with_mro(PyObject *a_mro, PyTypeObject *a, PyTypeObject *b); @@ -1230,8 +1220,8 @@ has_custom_mro(PyTypeObject *tp) _PyThreadState_PushCStackRef(tstate, &c_ref1); _PyThreadState_PushCStackRef(tstate, &c_ref2); - type_lookup_lock_held(Py_TYPE(tp), &_Py_ID(mro), &c_ref1.ref); - type_lookup_lock_held(&PyType_Type, &_Py_ID(mro), &c_ref2.ref); + _PyType_LookupStackRefAndVersion(Py_TYPE(tp), &_Py_ID(mro), &c_ref1.ref); + _PyType_LookupStackRefAndVersion(&PyType_Type, &_Py_ID(mro), &c_ref2.ref); int custom = !PyStackRef_Is(c_ref1.ref, c_ref2.ref); @@ -1255,14 +1245,24 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ - Py_ssize_t i, n; +#ifdef Py_GIL_DISABLED + unsigned int meta_version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(type)->tp_version_tag); +#endif + bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - ASSERT_TYPE_LOCK_HELD(); - if (!Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type)) { + types_mutex_lock(); + if (is_custom) { goto clear; } - n = PyTuple_GET_SIZE(bases); - for (i = 0; i < n; i++) { +#ifdef Py_GIL_DISABLED + if (meta_version != Py_TYPE(type)->tp_version_tag) { + // Raced with another thread mutating the metaclass, possible that mro() + // method was assigned. + goto clear; + } +#endif + Py_ssize_t n = PyTuple_GET_SIZE(bases); + for (Py_ssize_t i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); @@ -1274,7 +1274,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) goto clear; } } - return; + goto done; clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); @@ -1286,6 +1286,9 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) FT_ATOMIC_STORE_PTR_RELAXED( ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); } + +done: + types_mutex_unlock(); } /* @@ -1704,7 +1707,7 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) } static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, PyObject **); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1722,11 +1725,10 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, static int mro_hierarchy(PyTypeObject *type, PyObject *temp) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *old_mro; - int res = mro_internal(type, &old_mro); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1788,7 +1790,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases) +type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) { // Check arguments if (!check_set_special_type_attr(type, new_bases, "__bases__")) { @@ -1931,7 +1933,7 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); int res; types_stop_world(); - res = type_set_bases_unlocked(type, new_bases); + res = type_set_bases_world_stopped(type, new_bases); types_start_world(); return res; } @@ -2886,18 +2888,9 @@ _PyObject_LookupSpecialMethod(PyObject *self, PyObject *attr, PyObject **self_or static int lookup_method_ex(PyObject *self, PyObject *attr, _PyStackRef *out, - int raise_attribute_error, int lock_held) + int raise_attribute_error) { -#ifdef Py_GIL_DISABLED - if (lock_held) { - type_lookup_lock_held(Py_TYPE(self), attr, out); - } - else { - _PyType_LookupStackRefAndVersion(Py_TYPE(self), attr, out); - } -#else _PyType_LookupStackRefAndVersion(Py_TYPE(self), attr, out); -#endif if (PyStackRef_IsNull(*out)) { if (raise_attribute_error) { PyErr_SetObject(PyExc_AttributeError, attr); @@ -2931,13 +2924,13 @@ lookup_method_ex(PyObject *self, PyObject *attr, _PyStackRef *out, static int lookup_maybe_method(PyObject *self, PyObject *attr, _PyStackRef *out) { - return lookup_method_ex(self, attr, out, 0, 0); + return lookup_method_ex(self, attr, out, 0); } static int lookup_method(PyObject *self, PyObject *attr, _PyStackRef *out) { - return lookup_method_ex(self, attr, out, 1, 0); + return lookup_method_ex(self, attr, out, 1); } @@ -3495,11 +3488,6 @@ mro_invoke(PyTypeObject *type) // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } - else { - // Called from type_ready() - ASSERT_TYPE_LOCK_HELD(); - types_mutex_unlock(); - } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); if (mro_result != NULL) { new_mro = PySequence_Tuple(mro_result); @@ -3511,9 +3499,6 @@ mro_invoke(PyTypeObject *type) if (stopped) { types_stop_world(); } - else { - types_mutex_lock(); - } } else { // In this case, the mro() method on the type object is being used and @@ -3567,9 +3552,8 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *new_mro, *old_mro; @@ -3600,7 +3584,9 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { + types_mutex_lock(); type_modified_unlocked(type); + types_mutex_unlock(); } else { /* For static builtin types, this is only called during init @@ -3616,15 +3602,6 @@ mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) return 1; } -static int -mro_internal(PyTypeObject *type, PyObject **p_old_mro) -{ - int res; - ASSERT_NEW_OR_STOPPED(type); - res = mro_internal_unlocked(type, 0, p_old_mro); - return res; -} - /* Calculate the best base amongst multiple base classes. This is the first one that's on the path to the "solid base". */ @@ -5837,36 +5814,25 @@ _PyTypes_AfterFork(void) } // Try to assign a new type version tag, return it if successful. Return 0 -// if no version was assigned. Note that this function can be called while -// holding the TYPE_LOCK mutex or when the stop-the-world mechanism is active. +// if no version was assigned. static unsigned int -type_assign_version_lock_held(PyTypeObject *type) +type_assign_version(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); + unsigned int version; + types_mutex_lock(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { - return type->tp_version_tag; + version = type->tp_version_tag; } else { - return 0; + version = 0; } -} - -#ifdef Py_GIL_DISABLED -static unsigned int -type_assign_version(PyTypeObject *type) -{ - unsigned int version; - types_mutex_lock(); - version = type_assign_version_lock_held(type); types_mutex_unlock(); return version; } -#endif -static unsigned int -type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, - unsigned int (*assign_version)(PyTypeObject *)) +unsigned int +_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) { unsigned int h = MCACHE_HASH_METHOD(type, name); struct type_cache *cache = get_type_cache(); @@ -5916,7 +5882,7 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, PyObject *res; unsigned int assigned_version = 0; // 0 is not a valid version if (MCACHE_CACHEABLE_NAME(name)) { - assigned_version = assign_version(type); + assigned_version = type_assign_version(type); } // Calling find_name_in_mro() might cause the type version to change. For // example, if a __hash__ or __eq__ method mutates the types. Since this @@ -5947,25 +5913,6 @@ type_lookup_ex(PyTypeObject *type, PyObject *name, _PyStackRef *out, return assigned_version; } -// Note that this function can be called while holding the TYPE_LOCK mutex or -// when the stop-the-world mechanism is active. -static unsigned int -type_lookup_lock_held(PyTypeObject *type, PyObject *name, _PyStackRef *out) -{ - return type_lookup_ex(type, name, out, &type_assign_version_lock_held); -} - -unsigned int -_PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out) -{ -#ifdef Py_GIL_DISABLED - return type_lookup_ex(type, name, out, &type_assign_version); -#else - return type_lookup_ex(type, name, out, &type_assign_version_lock_held); -#endif - -} - /* Internal API to look for a name through the MRO. This returns a strong reference, and doesn't set an exception! If nonzero, version is set to the value of type->tp_version at the time of @@ -6330,12 +6277,15 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { // We don't just do PyType_Ready because we could already be readying - types_mutex_lock(); dict = type->tp_dict; if (dict == NULL) { + // FIXME: this can race with other threads. However, it is rare + // that PyType_Ready() is not called before the type is used from + // multiple threads. To do this safely, we have to ensure the + // type is not revealed to other threads or we stop-the-world before + // doing this assignment. dict = type->tp_dict = PyDict_New(); } - types_mutex_unlock(); if (dict == NULL) { res = -1; goto done; @@ -8677,7 +8627,6 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type, int initial) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -8689,7 +8638,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8753,7 +8702,6 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); /* Inherit special flags from dominant base */ @@ -8951,7 +8899,6 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int initial) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); @@ -9044,14 +8991,12 @@ PyType_Ready(PyTypeObject *type) } int res; - types_mutex_lock(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } - types_mutex_unlock(); return res; } @@ -9085,9 +9030,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; - types_mutex_lock(); res = type_ready(self, initial); - types_mutex_unlock(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9538,12 +9481,13 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) +hackcheck(PyObject *self, setattrofunc func, const char *what) { + if (!PyType_Check(self)) { + return 1; + } PyTypeObject *type = Py_TYPE(self); - ASSERT_TYPE_LOCK_HELD(); - PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -9585,20 +9529,6 @@ hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) return 1; } -static int -hackcheck(PyObject *self, setattrofunc func, const char *what) -{ - if (!PyType_Check(self)) { - return 1; - } - - int res; - types_mutex_lock(); - res = hackcheck_unlocked(self, func, what); - types_mutex_unlock(); - return res; -} - static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -10713,7 +10643,7 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) } static releasebufferproc -releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) +type_lookup_base_releasebuffer(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); @@ -10755,7 +10685,7 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); + base_releasebuffer = type_lookup_base_releasebuffer(self, buffer); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11284,7 +11214,6 @@ has_slotdef(PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); PyObject *descr; @@ -11401,7 +11330,6 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); pytype_slotdef **pp = (pytype_slotdef **)data; @@ -11420,8 +11348,6 @@ update_slot(PyTypeObject *type, PyObject *name) pytype_slotdef **pp; int offset; - ASSERT_TYPE_LOCK_HELD(); - ASSERT_NEW_OR_STOPPED(type); assert(types_world_is_stopped()); assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); @@ -11456,17 +11382,10 @@ update_slot(PyTypeObject *type, PyObject *name) static void fixup_slot_dispatchers(PyTypeObject *type) { - // This lock isn't strictly necessary because the type has not been - // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro - // where we'd like to assert that the type is locked. - types_mutex_lock(); - assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } - - types_mutex_unlock(); } // Called when __bases__ is re-assigned. @@ -11475,7 +11394,6 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; - ASSERT_TYPE_LOCK_HELD(); ASSERT_NEW_OR_STOPPED(type); /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ From 2a6655564533916aae0ec57e4564ee4b8b2c8820 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 11:41:46 -0700 Subject: [PATCH 20/45] Fix data race in resolve_slotdups(). The type_slots_pname and type_slots_ptrs are interpreter globals. Rather than trying to make access safe, just disable those caches in the free-threaded build. This cache is of dubious value anyhow. --- Include/internal/pycore_interp_structs.h | 3 +++ Objects/typeobject.c | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index bb2967c39b878f..246186f74bd853 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -670,8 +670,11 @@ struct _Py_interp_cached_objects { /* object.__reduce__ */ PyObject *objreduce; +#ifndef Py_GIL_DISABLED + /* resolve_slotdups() */ PyObject *type_slots_pname; pytype_slotdef *type_slots_ptrs[MAX_EQUIV]; +#endif /* TypeVar and related types */ PyTypeObject *generic_type; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 61f97e99849d19..b94a811262eb9c 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -11107,12 +11107,21 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) { /* XXX Maybe this could be optimized more -- but is it worth it? */ +#ifdef Py_GIL_DISABLED + pytype_slotdef *ptrs[MAX_EQUIV]; + pytype_slotdef **pp = ptrs; + /* Collect all slotdefs that match name into ptrs. */ + for (pytype_slotdef *p = slotdefs; p->name_strobj; p++) { + if (p->name_strobj == name) + *pp++ = p; + } + *pp = NULL; +#else /* pname and ptrs act as a little cache */ PyInterpreterState *interp = _PyInterpreterState_GET(); #define pname _Py_INTERP_CACHED_OBJECT(interp, type_slots_pname) #define ptrs _Py_INTERP_CACHED_OBJECT(interp, type_slots_ptrs) pytype_slotdef *p, **pp; - void **res, **ptr; if (pname != name) { /* Collect all slotdefs that match name into ptrs. */ @@ -11124,10 +11133,12 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) } *pp = NULL; } +#endif /* Look in all slots of the type matching the name. If exactly one of these has a filled-in slot, return a pointer to that slot. Otherwise, return NULL. */ + void **res, **ptr; res = NULL; for (pp = ptrs; *pp; pp++) { ptr = slotptr(type, (*pp)->offset); @@ -11137,9 +11148,11 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return NULL; res = ptr; } - return res; +#ifndef Py_GIL_DISABLED #undef pname #undef ptrs +#endif + return res; } #ifdef Py_GIL_DISABLED From 2efac2655b371511e146ae6465666c920dc6ac50 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 3 Apr 2025 12:10:18 -0700 Subject: [PATCH 21/45] Fix comment. --- Objects/typeobject.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b94a811262eb9c..b5cd4b85225464 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3478,11 +3478,9 @@ mro_invoke(PyTypeObject *type) const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { - // Custom mro() method on metaclass. This is potentially - // re-entrant. We are called either from type_ready(), in which case - // the TYPE_LOCK mutex is held, or from type_set_bases() (assignment to - // __bases__), in which case we need to stop-the-world in order to avoid - // data races. + // Custom mro() method on metaclass. This is potentially re-entrant. + // We are called either from type_ready() or from type_set_bases() + // (assignment to __bases__). bool stopped = types_world_is_stopped(); if (stopped) { // Called for type_set_bases(), re-assigment of __bases__ From f7d2d36599e277b144881072f162e9b15800c31f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Apr 2025 19:55:25 -0700 Subject: [PATCH 22/45] Make the init of tp_dict thread-safe. It is unlikely that tp_dict is not allocated when type_setattro() is called. However, if it is NULL, stop-the-world so we can safely assign it. --- Objects/typeobject.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b5cd4b85225464..30155f22a45c52 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6274,16 +6274,14 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) PyObject *dict = type->tp_dict; if (dict == NULL) { - // We don't just do PyType_Ready because we could already be readying - dict = type->tp_dict; - if (dict == NULL) { - // FIXME: this can race with other threads. However, it is rare - // that PyType_Ready() is not called before the type is used from - // multiple threads. To do this safely, we have to ensure the - // type is not revealed to other threads or we stop-the-world before - // doing this assignment. + // This is an unlikely case. PyType_Ready has not yet been done and + // we need to initialize tp_dict. We don't just do PyType_Ready + // because we could already be readying. + types_stop_world(); + if (type->tp_dict == NULL) { dict = type->tp_dict = PyDict_New(); } + types_start_world(); if (dict == NULL) { res = -1; goto done; From f3fd35a552728ab35b5af9afe34d6199c565cbd8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Apr 2025 19:57:46 -0700 Subject: [PATCH 23/45] Do some addtional locking simplification. Re-structure type_setattro() into a slot and non-slot case. For the slot case, we need to stop-the-world. For non-slot, we need to hold TYPE_LOCK. Remove unneeded locking in type_mro_modified(). This is only called if the type is new or if the world is stopped. In both cases, TYPE_LOCK doesn't need to be held. --- Objects/typeobject.c | 81 ++++++++++++++++---------------------------- 1 file changed, 30 insertions(+), 51 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 30155f22a45c52..316c214b2946fa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -144,9 +144,6 @@ types_mutex_set_owned(PyThread_ident_t tid) static void types_mutex_lock(void) { - if (types_world_is_stopped()) { - return; - } assert(!types_mutex_is_owned()); PyMutex_Lock(TYPE_LOCK); types_mutex_set_owned(PyThread_get_thread_ident_ex()); @@ -155,15 +152,11 @@ types_mutex_lock(void) static void types_mutex_unlock(void) { - if (types_world_is_stopped()) { - return; - } types_mutex_set_owned(0); PyMutex_Unlock(TYPE_LOCK); } -#define ASSERT_TYPE_LOCK_HELD() \ - assert(types_world_is_stopped() || types_mutex_is_owned()) +#define ASSERT_TYPE_LOCK_HELD() assert(types_mutex_is_owned()) #else @@ -1094,7 +1087,6 @@ PyType_Unwatch(int watcher_id, PyObject* obj) static void set_version_unlocked(PyTypeObject *tp, unsigned int version) { - ASSERT_TYPE_LOCK_HELD(); assert(version == 0 || (tp->tp_versions_used != _Py_ATTR_CACHE_UNUSED)); #ifndef Py_GIL_DISABLED PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1142,7 +1134,6 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_TYPE_LOCK_HELD(); if (type->tp_version_tag == 0) { return; } @@ -1233,6 +1224,7 @@ has_custom_mro(PyTypeObject *tp) static void type_mro_modified(PyTypeObject *type, PyObject *bases) { + ASSERT_NEW_OR_STOPPED(type); /* Check that all base classes or elements of the MRO of type are able to be cached. This function is called after the base @@ -1250,7 +1242,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) #endif bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - types_mutex_lock(); if (is_custom) { goto clear; } @@ -1274,7 +1265,7 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) goto clear; } } - goto done; + return; clear: assert(!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); @@ -1286,9 +1277,6 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) FT_ATOMIC_STORE_PTR_RELAXED( ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); } - -done: - types_mutex_unlock(); } /* @@ -3582,9 +3570,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - types_mutex_lock(); type_modified_unlocked(type); - types_mutex_unlock(); } else { /* For static builtin types, this is only called during init @@ -3693,9 +3679,7 @@ static int update_slot(PyTypeObject *, PyObject *); static void fixup_slot_dispatchers(PyTypeObject *); static int type_new_set_names(PyTypeObject *); static int type_new_init_subclass(PyTypeObject *, PyObject *); -#ifdef Py_GIL_DISABLED static bool has_slotdef(PyObject *); -#endif /* * Helpers for __dict__ descriptor. We don't want to expose the dicts @@ -6167,13 +6151,11 @@ _Py_type_getattro(PyObject *tp, PyObject *name) } // Called by type_setattro(). Updates both the type dict and -// any type slots that correspond to the modified entry. +// the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyObject *value, PyObject **old_value) { - ASSERT_TYPE_LOCK_HELD(); - // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor // of the current value as it can observe the cache in an inconsistent @@ -6195,28 +6177,9 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", ((PyTypeObject*)type)->tp_name, name); - types_mutex_unlock(); - // Note: reentrancy possible - _PyObject_SetAttributeErrorContext((PyObject *)type, name); - types_mutex_lock(); - return -1; + return -2; } -#ifdef Py_GIL_DISABLED - if (is_dunder_name(name) && has_slotdef(name)) { - // update is potentially changing one or more slots - int ret; - types_stop_world(); - ret = update_slot(type, name); - types_start_world(); - return ret; - } -#else - if (is_dunder_name(name)) { - return update_slot(type, name); - } -#endif - return 0; } @@ -6288,13 +6251,31 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } - // FIXME: I'm not totaly sure this is safe from a deadlock. If so, how to avoid? - types_mutex_lock(); - Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); - assert(_PyType_CheckConsistency(type)); - Py_END_CRITICAL_SECTION(); - types_mutex_unlock(); + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + types_stop_world(); + Py_BEGIN_CRITICAL_SECTION(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + if (res == 0) { + res = update_slot(type, name); + } + Py_END_CRITICAL_SECTION(); + types_start_world(); + } else { + // The name is not a slot, hold TYPE_LOCK while updating version tags. + types_mutex_lock(); + Py_BEGIN_CRITICAL_SECTION(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + Py_END_CRITICAL_SECTION(); + types_mutex_unlock(); + } + if (res == -2) { + // We handle the attribute error case here because reentrancy possible + // when calling this function. + _PyObject_SetAttributeErrorContext((PyObject *)type, name); + res = -1; + } + assert(res < 0 || _PyType_CheckConsistency(type)); done: Py_DECREF(name); @@ -11151,7 +11132,6 @@ resolve_slotdups(PyTypeObject *type, PyObject *name) return res; } -#ifdef Py_GIL_DISABLED // Return true if "name" corresponds to at least one slot definition. This is // a more accurate but more expensive test compared to is_dunder_name(). static bool @@ -11164,7 +11144,6 @@ has_slotdef(PyObject *name) } return false; } -#endif /* Common code for update_slots_callback() and fixup_slot_dispatchers(). * From 57c2a44171a7c03006234f8f32c8fd6a02336af7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 10 Apr 2025 16:19:18 -0700 Subject: [PATCH 24/45] Avoid acquiring the types mutex if version is set. This can potentially help quite a bit when the type version tag has already been assigned but the MCACHE is missed. We can complete the lookup without acquiring the types mutex. --- Objects/typeobject.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 316c214b2946fa..132db65625d84a 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5800,16 +5800,18 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version; - types_mutex_lock(); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (assign_version_tag(interp, type)) { - version = type->tp_version_tag; - } - else { - version = 0; + unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag); + if (version == 0) { + types_mutex_lock(); + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (assign_version_tag(interp, type)) { + version = type->tp_version_tag; + } + else { + version = 0; + } + types_mutex_unlock(); } - types_mutex_unlock(); return version; } From 7c0ccf548cba35ef44a4a1dbc1e5015ccc5f3264 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:24:12 -0700 Subject: [PATCH 25/45] Add check_invalid_reentrancy() call. --- Python/ceval.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/Python/ceval.c b/Python/ceval.c index 363f263ad2a083..4d493466169baf 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -138,6 +138,20 @@ #endif +static void +check_invalid_reentrancy(void) +{ +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) + // In the free-threaded build, the interpreter must not be re-entered if + // either the world-is-stopped or if the types mutex is held. If so, + // that's a bug somewhere (likely in the painfully complex typeobject code). + PyInterpreterState *interp = _PyInterpreterState_GET(); + assert(!interp->stoptheworld.world_stopped); + assert(interp->types.mutex_tid != PyThread_get_thread_ident_ex()); +#endif +} + + #ifdef Py_DEBUG static void dump_item(_PyStackRef item) @@ -959,6 +973,7 @@ PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); + check_invalid_reentrancy(); CALL_STAT_INC(pyeval_calls); #if USE_COMPUTED_GOTOS && !Py_TAIL_CALL_INTERP From 4fa77bb11c40c3c35f47a4f371baed5642dce498 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:25:16 -0700 Subject: [PATCH 26/45] Fix additional re-entrancy issues found. - check_set_special_type_attr() is potentially re-entrant. Fix by refactoring type_set_bases_world_stopped() into two functions, one to check the args and one to actually set tp_base. - the type watcher stuff is re-entrant. Fix by unlocking the mutex before handling those. - add `is_new_type` parameter to mro_internal() and friends. This explicitly tells us if we are initializing a new type or modifying the bases of an existing one. The stop-the-world mechanism is used in only the second case. - rename best_base() to find_best_base() --- Objects/typeobject.c | 92 ++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 34 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 132db65625d84a..0088704767d555 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1158,6 +1158,9 @@ type_modified_unlocked(PyTypeObject *type) // Notify registered type watchers, if any if (type->tp_watched) { + // We must unlock here because at least the PyErr_FormatUnraisable + // call is re-entrant and the watcher callback might be too. + types_mutex_unlock(); PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1174,6 +1177,7 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } + types_mutex_lock(); } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ @@ -1694,8 +1698,8 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) return mro; } -static PyTypeObject *best_base(PyObject *); -static int mro_internal(PyTypeObject *, int, PyObject **); +static PyTypeObject *find_best_base(PyObject *); +static int mro_internal(PyTypeObject *, int, PyObject **, bool); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1711,12 +1715,17 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); static int -mro_hierarchy(PyTypeObject *type, PyObject *temp) +mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) { - ASSERT_NEW_OR_STOPPED(type); - +#ifdef Py_DEBUG + if (is_new_type) { + assert(!TYPE_IS_REVEALED(type)); + } else { + assert(types_world_is_stopped()); + } +#endif PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro); + int res = mro_internal(type, 0, &old_mro, is_new_type); if (res <= 0) { /* error / reentrance */ return res; @@ -1766,7 +1775,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp); + res = mro_hierarchy(subclass, temp, is_new_type); if (res < 0) { break; } @@ -1778,14 +1787,13 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) } static int -type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) +type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments + // Check arguments, this re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } assert(new_bases != NULL); - assert(types_world_is_stopped()); if (!PyTuple_Check(new_bases)) { PyErr_Format(PyExc_TypeError, @@ -1830,26 +1838,34 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) } // Compute the new MRO and the new base class - PyTypeObject *new_base = best_base(new_bases); - if (new_base == NULL) + *best_base = find_best_base(new_bases); + if (*best_base == NULL) return -1; - if (!compatible_for_assignment(type->tp_base, new_base, "__bases__")) { + if (!compatible_for_assignment(type->tp_base, *best_base, "__bases__")) { return -1; } + return 0; +} + +static int +type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +{ + assert(types_world_is_stopped()); + PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; set_tp_bases(type, Py_NewRef(new_bases), 0); - type->tp_base = (PyTypeObject *)Py_NewRef(new_base); + type->tp_base = (PyTypeObject *)Py_NewRef(best_base); PyObject *temp = PyList_New(0); if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp) < 0) { + if (mro_hierarchy(type, temp, false) < 0) { goto undo; } Py_DECREF(temp); @@ -1881,7 +1897,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) return res; undo: - n = PyList_GET_SIZE(temp); + Py_ssize_t n = PyList_GET_SIZE(temp); for (Py_ssize_t i = n - 1; i >= 0; i--) { PyTypeObject *cls; PyObject *new_mro, *old_mro = NULL; @@ -1898,13 +1914,13 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases) bail: if (lookup_tp_bases(type) == new_bases) { - assert(type->tp_base == new_base); + assert(type->tp_base == best_base); set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); - Py_DECREF(new_base); + Py_DECREF(best_base); } else { Py_DECREF(old_bases); @@ -1919,9 +1935,13 @@ static int type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); + PyTypeObject *best_base; + if (type_check_new_bases(type, new_bases, &best_base) < 0) { + return -1; + } int res; types_stop_world(); - res = type_set_bases_world_stopped(type, new_bases); + res = type_set_bases_world_stopped(type, new_bases, best_base); types_start_world(); return res; } @@ -3458,7 +3478,7 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type) +mro_invoke(PyTypeObject *type, bool is_new_type) { PyObject *mro_result; PyObject *new_mro; @@ -3467,10 +3487,8 @@ mro_invoke(PyTypeObject *type) if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. - // We are called either from type_ready() or from type_set_bases() - // (assignment to __bases__). - bool stopped = types_world_is_stopped(); - if (stopped) { + // We are called either from type_ready() or from type_set_bases(). + if (!is_new_type) { // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } @@ -3482,7 +3500,7 @@ mro_invoke(PyTypeObject *type) else { new_mro = NULL; } - if (stopped) { + if (!is_new_type) { types_stop_world(); } } @@ -3538,9 +3556,15 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_type) { - ASSERT_NEW_OR_STOPPED(type); +#ifdef Py_DEBUG + if (is_new_type) { + assert(!TYPE_IS_REVEALED(type)); + } else { + assert(types_world_is_stopped()); + } +#endif PyObject *new_mro, *old_mro; int reent; @@ -3549,7 +3573,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type); /* might cause reentrance */ + new_mro = mro_invoke(type, is_new_type); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3590,7 +3614,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) This is the first one that's on the path to the "solid base". */ static PyTypeObject * -best_base(PyObject *bases) +find_best_base(PyObject *bases) { Py_ssize_t i, n; PyTypeObject *base, *winner, *candidate; @@ -4725,7 +4749,7 @@ type_new_get_bases(type_new_ctx *ctx, PyObject **type) } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(ctx->bases); + PyTypeObject *base = find_best_base(ctx->bases); if (base == NULL) { return -1; } @@ -5140,12 +5164,12 @@ PyType_FromMetaclass( } /* Calculate best base, and check that all bases are type objects */ - PyTypeObject *base = best_base(bases); // borrowed ref + PyTypeObject *base = find_best_base(bases); // borrowed ref if (base == NULL) { goto finally; } - // best_base should check Py_TPFLAGS_BASETYPE & raise a proper exception, - // here we just check its work + // find_best_base() should check Py_TPFLAGS_BASETYPE & raise a proper + // exception, here we just check its work assert(_PyType_HasFeature(base, Py_TPFLAGS_BASETYPE)); /* Calculate sizes */ @@ -8617,7 +8641,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL) < 0) { + if (mro_internal(type, initial, NULL, true) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); From caf6554415ef00e3f819b49752b50d6d2f5835a8 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 20:37:30 -0700 Subject: [PATCH 27/45] Add comment explaining class_name() code. --- Objects/typeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0088704767d555..93f3d39c5c7faa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3159,7 +3159,8 @@ static PyObject * class_name(PyObject *cls) { #ifdef Py_GIL_DISABLED - // Avoid re-entrancy and use tp_name + // Avoid re-entrancy and use tp_name. This is only used for the + // mro_implementation() sanity check of the computed mro. return type_name(cls, NULL); #else PyObject *name; From 90ea541dceddfae060ac889ee5d06da68e1e4f1f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 22:39:58 -0700 Subject: [PATCH 28/45] Use atomic load to avoid thread safety issue. --- Python/ceval.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/ceval.c b/Python/ceval.c index 1a1ca5bfb50a45..e57ca0a71fcc89 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -147,7 +147,8 @@ check_invalid_reentrancy(void) // that's a bug somewhere (likely in the painfully complex typeobject code). PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!interp->stoptheworld.world_stopped); - assert(interp->types.mutex_tid != PyThread_get_thread_ident_ex()); + assert(_Py_atomic_load_ullong_relaxed(&interp->types.mutex_tid) != + PyThread_get_thread_ident_ex()); #endif } From 0dc0faf4c71a523a4fcdb962231766ff48afa881 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 22:43:59 -0700 Subject: [PATCH 29/45] Fix default debug build. --- Objects/typeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7f5fbd1b77df69..79d394f9d4b30e 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -167,6 +167,7 @@ types_mutex_unlock(void) #define types_start_world() #define ASSERT_TYPE_LOCK_HELD() #define ASSERT_NEW_OR_STOPPED(tp) +#define TYPE_IS_REVEALED(tp) 0 #endif From 956e5d1bb447f64bd54f05183d9fa59226e13354 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 16 Apr 2025 23:14:28 -0700 Subject: [PATCH 30/45] Move declaration to avoid syntax error. --- Objects/typeobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 79d394f9d4b30e..1cb989c698c5b1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1856,6 +1856,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje { assert(types_world_is_stopped()); + Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; @@ -1899,7 +1900,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje return res; undo: - Py_ssize_t n = PyList_GET_SIZE(temp); + n = PyList_GET_SIZE(temp); for (Py_ssize_t i = n - 1; i >= 0; i--) { PyTypeObject *cls; PyObject *new_mro, *old_mro = NULL; From 2bb710c6d7930cc603062a1f40fc568eb3a95a6f Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 11:10:22 -0700 Subject: [PATCH 31/45] Remove _PyType_GetVersionForCurrentState, unused. --- Include/internal/pycore_typeobject.h | 1 - Objects/typeobject.c | 12 ------------ 2 files changed, 13 deletions(-) diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 1a4f89fd2449a0..0ee7d555c56cdd 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -134,7 +134,6 @@ extern int _PyType_AddMethod(PyTypeObject *, PyMethodDef *); extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, unsigned long flags); -extern unsigned int _PyType_GetVersionForCurrentState(PyTypeObject *tp); PyAPI_FUNC(void) _PyType_SetVersion(PyTypeObject *tp, unsigned int version); PyTypeObject *_PyType_LookupByVersion(unsigned int version); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1cb989c698c5b1..b1830ae87d1d12 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1319,18 +1319,6 @@ _PyType_LookupByVersion(unsigned int version) #endif } -unsigned int -_PyType_GetVersionForCurrentState(PyTypeObject *tp) -{ -#ifdef Py_GIL_DISABLED - return _Py_atomic_load_uint32_acquire(&tp->tp_version_tag); -#else - return tp->tp_version_tag; -#endif -} - - - #define MAX_VERSIONS_PER_CLASS 1000 #if _Py_ATTR_CACHE_UNUSED < MAX_VERSIONS_PER_CLASS #error "_Py_ATTR_CACHE_UNUSED must be bigger than max" From 3a9bc96bc5682ff773bb2e6bd88db1fbb48e9775 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:25:47 -0700 Subject: [PATCH 32/45] Fix for possible re-entrancy in has_custom_mro(). --- Objects/typeobject.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b1830ae87d1d12..19ffc5519b9c72 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1228,7 +1228,7 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases) +type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) { ASSERT_NEW_OR_STOPPED(type); /* @@ -1243,21 +1243,24 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ -#ifdef Py_GIL_DISABLED - unsigned int meta_version = FT_ATOMIC_LOAD_UINT32_RELAXED(Py_TYPE(type)->tp_version_tag); -#endif - bool is_custom = !Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type); - - if (is_custom) { - goto clear; - } -#ifdef Py_GIL_DISABLED - if (meta_version != Py_TYPE(type)->tp_version_tag) { - // Raced with another thread mutating the metaclass, possible that mro() - // method was assigned. - goto clear; + if (!Py_IS_TYPE(type, &PyType_Type)) { + unsigned int meta_version = Py_TYPE(type)->tp_version_tag; + if (!is_new_type) { + types_start_world(); + } + // This can be re-entrant. + bool is_custom = has_custom_mro(type); + if (!is_new_type) { + types_stop_world(); + } + if (meta_version != Py_TYPE(type)->tp_version_tag) { + // metaclass changed during call of has_custom_mro() + goto clear; + } + if (is_custom) { + goto clear; + } } -#endif Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); @@ -3598,10 +3601,10 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro); + type_mro_modified(type, new_mro, is_new_type); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type)); + type_mro_modified(type, lookup_tp_bases(type), is_new_type); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { From d742a53d6711deb0d140e29fb9a90c128ff9412b Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:27:27 -0700 Subject: [PATCH 33/45] Use correct FT_ATOMIC_ macro. --- Objects/typeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 19ffc5519b9c72..191cafd0332b4f 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -48,7 +48,7 @@ class object "PyObject *" "&PyBaseObject_Type" & ((1 << MCACHE_SIZE_EXP) - 1)) #define MCACHE_HASH_METHOD(type, name) \ - MCACHE_HASH(FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag), \ + MCACHE_HASH(FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag), \ ((Py_ssize_t)(name)) >> 3) #define MCACHE_CACHEABLE_NAME(name) \ PyUnicode_CheckExact(name) && \ @@ -5838,7 +5838,7 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version = FT_ATOMIC_LOAD_UINT32_RELAXED((type)->tp_version_tag); + unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); if (version == 0) { types_mutex_lock(); PyInterpreterState *interp = _PyInterpreterState_GET(); From e7480c348b3b52fc21b9b56261eb1b1d70112213 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:28:13 -0700 Subject: [PATCH 34/45] Remove TSAN suppression for 'assign_version_tag'. --- Tools/tsan/suppressions_free_threading.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 7d6cd6c10f5218..404c30157362aa 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -12,7 +12,6 @@ # These warnings trigger directly in a CPython function. -race_top:assign_version_tag race_top:dump_traceback race_top:fatal_error race_top:_PyFrame_GetCode @@ -41,4 +40,4 @@ race:list_inplace_repeat_lock_held # PyObject_Realloc internally does memcpy which isn't atomic so can race # with non-locking reads. See #132070 -race:PyObject_Realloc \ No newline at end of file +race:PyObject_Realloc From e2ea2819a344982ff8abea479e0f7004b9099690 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 12:39:47 -0700 Subject: [PATCH 35/45] Small efficiency fix for types_mutex_set_owned(). --- Objects/typeobject.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 191cafd0332b4f..8adde94d5244f6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -132,7 +132,6 @@ types_mutex_is_owned(void) PyThread_ident_t tid = PyThread_get_thread_ident_ex(); return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; } -#endif // Set the TID of the thread currently holding TYPE_LOCK. static void @@ -140,19 +139,24 @@ types_mutex_set_owned(PyThread_ident_t tid) { _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); } +#endif static void types_mutex_lock(void) { assert(!types_mutex_is_owned()); PyMutex_Lock(TYPE_LOCK); - types_mutex_set_owned(PyThread_get_thread_ident_ex()); +#ifdef Py_DEBUG + types_mutex_set_owned(_Py_ThreadId()); +#endif } static void types_mutex_unlock(void) { +#ifdef Py_DEBUG types_mutex_set_owned(0); +#endif PyMutex_Unlock(TYPE_LOCK); } From 935bfcacdb6f1dd47804e930515cc9f346683fdd Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 14:53:22 -0700 Subject: [PATCH 36/45] Revert to using critical section with TYPE_LOCK. --- Include/internal/pycore_interp_structs.h | 4 - Objects/typeobject.c | 123 ++++++++--------------- Python/ceval.c | 6 +- 3 files changed, 46 insertions(+), 87 deletions(-) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index f816cdf0a40fcc..cd037d0bf9dc65 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -566,10 +566,6 @@ struct types_state { managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; } for_extensions; PyMutex mutex; -#ifdef Py_GIL_DISABLED - // used to check correct usage of the above mutex - unsigned long long mutex_tid; -#endif // Borrowed references to type objects whose // tp_version_tag % TYPE_VERSION_CACHE_SIZE diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 8adde94d5244f6..9232c11eee1a26 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -73,17 +73,14 @@ class object "PyObject *" "&PyBaseObject_Type" // while the stop-the-world mechanism is active. The slots and flags are read // in many places without holding a lock and without atomics. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex +#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) +#define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() -// Since TYPE_LOCK is used as regular mutex, we must take special care about -// potential re-entrant code paths. We use TYPE_LOCK_TID in debug builds to -// ensure that we are not trying to re-acquire the mutex when it is already -// held by the current thread. There are a couple cases when we release the -// mutex, when we call functions that might re-enter. If we already hold the -// lock and try to acquire it again with the same thread, it is a bug in the -// code. We could just let it deadlock but having an assert failure gives -// a more explicit error. -#define TYPE_LOCK_TID &PyInterpreterState_Get()->types.mutex_tid +#define BEGIN_TYPE_DICT_LOCK(d) \ + Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) +#define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() +#ifdef Py_DEBUG // Return true if the world is currently stopped. static bool types_world_is_stopped(void) @@ -91,6 +88,7 @@ types_world_is_stopped(void) PyInterpreterState *interp = _PyInterpreterState_GET(); return interp->stoptheworld.world_stopped; } +#endif // Checks that the type has not yet been revealed (exposed) to other // threads. The _Py_TYPE_REVEALED_FLAG flag is set by type_new() and @@ -102,6 +100,9 @@ types_world_is_stopped(void) #define TYPE_IS_REVEALED(tp) 0 #endif +#define ASSERT_TYPE_LOCK_HELD() \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + // Checks if we can safely update type slots or tp_flags. #define ASSERT_NEW_OR_STOPPED(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) @@ -124,54 +125,18 @@ types_start_world(void) assert(!types_world_is_stopped()); } -#ifdef Py_DEBUG -// Return true if the TYPE_LOCK mutex is held by the current thread. -static bool -types_mutex_is_owned(void) -{ - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - return _Py_atomic_load_ullong_relaxed(TYPE_LOCK_TID) == tid; -} - -// Set the TID of the thread currently holding TYPE_LOCK. -static void -types_mutex_set_owned(PyThread_ident_t tid) -{ - _Py_atomic_store_ullong_relaxed(TYPE_LOCK_TID, tid); -} -#endif - -static void -types_mutex_lock(void) -{ - assert(!types_mutex_is_owned()); - PyMutex_Lock(TYPE_LOCK); -#ifdef Py_DEBUG - types_mutex_set_owned(_Py_ThreadId()); -#endif -} - -static void -types_mutex_unlock(void) -{ -#ifdef Py_DEBUG - types_mutex_set_owned(0); -#endif - PyMutex_Unlock(TYPE_LOCK); -} - -#define ASSERT_TYPE_LOCK_HELD() assert(types_mutex_is_owned()) - #else -#define types_mutex_lock() -#define types_mutex_unlock() +#define BEGIN_TYPE_LOCK() +#define END_TYPE_LOCK() +#define BEGIN_TYPE_DICT_LOCK(d) +#define END_TYPE_DICT_LOCK() +#define ASSERT_TYPE_LOCK_HELD() +#define TYPE_IS_REVEALED(tp) 0 +#define ASSERT_NEW_OR_STOPPED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() -#define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) -#define TYPE_IS_REVEALED(tp) 0 #endif @@ -1067,10 +1032,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification - types_mutex_lock(); + BEGIN_TYPE_LOCK(); assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); - types_mutex_unlock(); + END_TYPE_LOCK(); return 0; } @@ -1164,9 +1129,8 @@ type_modified_unlocked(PyTypeObject *type) // Notify registered type watchers, if any if (type->tp_watched) { - // We must unlock here because at least the PyErr_FormatUnraisable - // call is re-entrant and the watcher callback might be too. - types_mutex_unlock(); + // Note that PyErr_FormatUnraisable is re-entrant and the watcher + // callback might be too. PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1183,7 +1147,6 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } - types_mutex_lock(); } set_version_unlocked(type, 0); /* 0 is not a valid version tag */ @@ -1203,9 +1166,9 @@ PyType_Modified(PyTypeObject *type) return; } - types_mutex_lock(); + BEGIN_TYPE_LOCK(); type_modified_unlocked(type); - types_mutex_unlock(); + END_TYPE_LOCK(); } static int @@ -1304,9 +1267,9 @@ This is similar to func_version_cache. void _PyType_SetVersion(PyTypeObject *tp, unsigned int version) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); set_version_unlocked(tp, version); - types_mutex_unlock(); + END_TYPE_LOCK(); } PyTypeObject * @@ -1384,9 +1347,9 @@ int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); int assigned; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); assigned = assign_version_tag(interp, type); - types_mutex_unlock(); + END_TYPE_LOCK(); return assigned; } @@ -5844,7 +5807,7 @@ type_assign_version(PyTypeObject *type) { unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); if (version == 0) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { version = type->tp_version_tag; @@ -5852,7 +5815,7 @@ type_assign_version(PyTypeObject *type) else { version = 0; } - types_mutex_unlock(); + END_TYPE_LOCK(); } return version; } @@ -5985,7 +5948,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, return 0; } int can_cache; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; #ifdef Py_GIL_DISABLED can_cache = can_cache && _PyObject_HasDeferredRefcount(init); @@ -5993,7 +5956,7 @@ _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, if (can_cache) { FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); } - types_mutex_unlock(); + END_TYPE_LOCK(); return can_cache; } @@ -6004,7 +5967,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor return 0; } int can_cache; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); can_cache = ((PyTypeObject*)ht)->tp_version_tag == tp_version; // This pointer is invalidated by PyType_Modified (see the comment on // struct _specialization_cache): @@ -6018,7 +5981,7 @@ _PyType_CacheGetItemForSpecialization(PyHeapTypeObject *ht, PyObject *descriptor FT_ATOMIC_STORE_PTR_RELEASE(ht->_spec_cache.getitem, descriptor); FT_ATOMIC_STORE_UINT32_RELAXED(ht->_spec_cache.getitem_version, version); } - types_mutex_unlock(); + END_TYPE_LOCK(); return can_cache; } @@ -6038,7 +6001,7 @@ int _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_version) { int err; - types_mutex_lock(); + BEGIN_TYPE_LOCK(); err = validate(ty); if (!err) { if(assign_version_tag(_PyInterpreterState_GET(), ty)) { @@ -6048,7 +6011,7 @@ _PyType_Validate(PyTypeObject *ty, _py_validate_type validate, unsigned int *tp_ err = -1; } } - types_mutex_unlock(); + END_TYPE_LOCK(); return err; } @@ -6298,6 +6261,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) if (is_dunder_name(name) && has_slotdef(name)) { // The name corresponds to a type slot. types_stop_world(); + // Since the world is stopped, we actually don't need this critical + // section. However, this internally calls dict methods that assert + // that the dict object mutex is held. So, we acquire it here to + // avoid those assert failing. Py_BEGIN_CRITICAL_SECTION(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); if (res == 0) { @@ -6306,12 +6273,10 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) Py_END_CRITICAL_SECTION(); types_start_world(); } else { - // The name is not a slot, hold TYPE_LOCK while updating version tags. - types_mutex_lock(); - Py_BEGIN_CRITICAL_SECTION(dict); + // The name is not a slot. + BEGIN_TYPE_DICT_LOCK(dict); res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); - Py_END_CRITICAL_SECTION(); - types_mutex_unlock(); + END_TYPE_DICT_LOCK(); } if (res == -2) { // We handle the attribute error case here because reentrancy possible @@ -6415,10 +6380,10 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, clear_static_type_objects(interp, type, isbuiltin, final); if (final) { - types_mutex_lock(); + BEGIN_TYPE_LOCK(); type_clear_flags(type, Py_TPFLAGS_READY); set_version_unlocked(type, 0); - types_mutex_unlock(); + END_TYPE_LOCK(); } _PyStaticType_ClearWeakRefs(interp, type); diff --git a/Python/ceval.c b/Python/ceval.c index e57ca0a71fcc89..68991f72395e3b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -143,12 +143,10 @@ check_invalid_reentrancy(void) { #if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) // In the free-threaded build, the interpreter must not be re-entered if - // either the world-is-stopped or if the types mutex is held. If so, - // that's a bug somewhere (likely in the painfully complex typeobject code). + // the world-is-stopped. If so, that's a bug somewhere (quite likely in + // the painfully complex typeobject code). PyInterpreterState *interp = _PyInterpreterState_GET(); assert(!interp->stoptheworld.world_stopped); - assert(_Py_atomic_load_ullong_relaxed(&interp->types.mutex_tid) != - PyThread_get_thread_ident_ex()); #endif } From a81e9e395335c1cdf9e9ec41dcb1808d92836fba Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 16:45:10 -0700 Subject: [PATCH 37/45] Invalidate type cache before calling watchers. This avoids the watcher callback potentially using invalid cache entries. --- Objects/typeobject.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1f5269ddcbb8e3..366705d883d5c1 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1127,6 +1127,14 @@ type_modified_unlocked(PyTypeObject *type) } } + set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // This field *must* be invalidated if the type is modified (see the + // comment on struct _specialization_cache): + FT_ATOMIC_STORE_PTR_RELAXED( + ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); + } + // Notify registered type watchers, if any if (type->tp_watched) { // Note that PyErr_FormatUnraisable is re-entrant and the watcher @@ -1148,14 +1156,6 @@ type_modified_unlocked(PyTypeObject *type) bits >>= 1; } } - - set_version_unlocked(type, 0); /* 0 is not a valid version tag */ - if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - // This field *must* be invalidated if the type is modified (see the - // comment on struct _specialization_cache): - FT_ATOMIC_STORE_PTR_RELAXED( - ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); - } } void From f5df0c3a05afdff894d431b47c2d03c49f6cac94 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 21 Apr 2025 21:09:04 -0700 Subject: [PATCH 38/45] Fixes for type_modified_unlocked(). * Since type_modified_unlocked() can be called with the world stopped then we need to re-start if making re-entrant calls (like calling type watcher callbacks). * Re-order code before type_modified_unlocked(). This fixes a potential bug if called functions end up re-assigning the type version tag. If they do, the cache can contain invalid entries. We should do the dictionary update first then set the version to zero. * Replace 'is_new_type' with 'world_stopped' boolean parameter. This seems a bit more clear. --- Objects/typeobject.c | 91 ++++++++++++++++++++++++++------------------ 1 file changed, 54 insertions(+), 37 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 366705d883d5c1..c785817c31c131 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1088,7 +1088,7 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } static void -type_modified_unlocked(PyTypeObject *type) +type_modified_unlocked(PyTypeObject *type, bool world_stopped) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -1105,6 +1105,15 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ +#ifdef Py_DEBUG + if (world_stopped) { + assert(types_world_is_stopped()); + } else { + if (TYPE_IS_REVEALED(type)) { + ASSERT_TYPE_LOCK_HELD(); + } + } +#endif if (type->tp_version_tag == 0) { return; } @@ -1122,7 +1131,7 @@ type_modified_unlocked(PyTypeObject *type) if (subclass == NULL) { continue; } - type_modified_unlocked(subclass); + type_modified_unlocked(subclass, world_stopped); Py_DECREF(subclass); } } @@ -1139,6 +1148,9 @@ type_modified_unlocked(PyTypeObject *type) if (type->tp_watched) { // Note that PyErr_FormatUnraisable is re-entrant and the watcher // callback might be too. + if (world_stopped) { + types_start_world(); + } PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; @@ -1155,6 +1167,9 @@ type_modified_unlocked(PyTypeObject *type) i++; bits >>= 1; } + if (world_stopped) { + types_stop_world(); + } } } @@ -1167,7 +1182,7 @@ PyType_Modified(PyTypeObject *type) } BEGIN_TYPE_LOCK(); - type_modified_unlocked(type); + type_modified_unlocked(type, false); END_TYPE_LOCK(); } @@ -1195,7 +1210,7 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) +type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) { ASSERT_NEW_OR_STOPPED(type); /* @@ -1212,12 +1227,12 @@ type_mro_modified(PyTypeObject *type, PyObject *bases, bool is_new_type) */ if (!Py_IS_TYPE(type, &PyType_Type)) { unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - if (!is_new_type) { + if (world_stopped) { types_start_world(); } // This can be re-entrant. bool is_custom = has_custom_mro(type); - if (!is_new_type) { + if (world_stopped) { types_stop_world(); } if (meta_version != Py_TYPE(type)->tp_version_tag) { @@ -1621,7 +1636,7 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) } types_stop_world(); - type_modified_unlocked(type); + type_modified_unlocked(type, true); if (abstract) type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else @@ -1675,17 +1690,17 @@ static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); static int -mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) +mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) { #ifdef Py_DEBUG - if (is_new_type) { + if (!world_stopped) { assert(!TYPE_IS_REVEALED(type)); } else { assert(types_world_is_stopped()); } #endif PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro, is_new_type); + int res = mro_internal(type, 0, &old_mro, world_stopped); if (res <= 0) { /* error / reentrance */ return res; @@ -1735,7 +1750,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp, bool is_new_type) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp, is_new_type); + res = mro_hierarchy(subclass, temp, world_stopped); if (res < 0) { break; } @@ -1826,7 +1841,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp, false) < 0) { + if (mro_hierarchy(type, temp, true) < 0) { goto undo; } Py_DECREF(temp); @@ -3464,7 +3479,7 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type, bool is_new_type) +mro_invoke(PyTypeObject *type, bool world_stopped) { PyObject *mro_result; PyObject *new_mro; @@ -3474,7 +3489,7 @@ mro_invoke(PyTypeObject *type, bool is_new_type) if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. // We are called either from type_ready() or from type_set_bases(). - if (!is_new_type) { + if (world_stopped) { // Called for type_set_bases(), re-assigment of __bases__ types_start_world(); } @@ -3486,7 +3501,7 @@ mro_invoke(PyTypeObject *type, bool is_new_type) else { new_mro = NULL; } - if (!is_new_type) { + if (world_stopped) { types_stop_world(); } } @@ -3542,10 +3557,10 @@ mro_invoke(PyTypeObject *type, bool is_new_type) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_type) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_stopped) { #ifdef Py_DEBUG - if (is_new_type) { + if (!world_stopped) { assert(!TYPE_IS_REVEALED(type)); } else { assert(types_world_is_stopped()); @@ -3559,7 +3574,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type, is_new_type); /* might cause reentrance */ + new_mro = mro_invoke(type, world_stopped); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3573,14 +3588,14 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool is_new_ set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro, is_new_type); + type_mro_modified(type, new_mro, world_stopped); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type), is_new_type); + type_mro_modified(type, lookup_tp_bases(type), world_stopped); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - type_modified_unlocked(type); + type_modified_unlocked(type, world_stopped); } else { /* For static builtin types, this is only called during init @@ -6162,7 +6177,7 @@ _Py_type_getattro(PyObject *tp, PyObject *name) // the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, - PyObject *value, PyObject **old_value) + PyObject *value, PyObject **old_value, bool world_stopped) { // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor @@ -6174,13 +6189,6 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - /* Clear the VALID_VERSION flag of 'type' and all its - subclasses. This could possibly be unified with the - update_subclasses() recursion in update_slot(), but carefully: - they each have their own conditions on which to stop - recursing into subclasses. */ - type_modified_unlocked(type); - if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { PyErr_Format(PyExc_AttributeError, "type object '%.50s' has no attribute '%U'", @@ -6188,6 +6196,13 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -2; } + /* Clear the VALID_VERSION flag of 'type' and all its + subclasses. This could possibly be unified with the + update_subclasses() recursion in update_slot(), but carefully: + they each have their own conditions on which to stop + recursing into subclasses. */ + type_modified_unlocked(type, world_stopped); + return 0; } @@ -6267,7 +6282,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // that the dict object mutex is held. So, we acquire it here to // avoid those assert failing. Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value, true); if (res == 0) { res = update_slot(type, name); } @@ -6276,7 +6292,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } else { // The name is not a slot. BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value, false); END_TYPE_DICT_LOCK(); } if (res == -2) { @@ -8625,7 +8642,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL, true) < 0) { + if (mro_internal(type, initial, NULL, false) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -11434,15 +11451,15 @@ update_all_slots(PyTypeObject* type) { pytype_slotdef *p; - ASSERT_NEW_OR_STOPPED(type); - - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type); + assert(types_world_is_stopped()); for (p = slotdefs; p->name; p++) { /* update_slot returns int but can't actually fail */ update_slot(type, p->name_strobj); } + + /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ + type_modified_unlocked(type, true); } @@ -11715,7 +11732,7 @@ PyType_Freeze(PyTypeObject *type) types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); - type_modified_unlocked(type); + type_modified_unlocked(type, true); types_start_world(); return 0; From 7db281c7e04c0ab4e722089910021e98b5c65a3e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 22 Apr 2025 16:18:36 -0700 Subject: [PATCH 39/45] Major re-work, TYPE_LOCK protects more things. Rather than trying to update tp_base, tp_bases, and tp_mro while the world is stopped, hold TYPE_LOCK when updating them. This is much more similar to how the base version of the code works and requires many fewer changes. Using stop-the-world turned out to be difficult to make work correctly, mostly due to custom mro() methods but also for some other reasons (for example, functions that potentially re-enter the interpreter). This re-work requires that we can call stop-the-world inside a critical section and have the mutex still held after re-starting. This seems to work and I've added asserts to confirm it. Stop-the-world is now only used when updating either tp_flags or other type slots and not the three type members listed above. --- Objects/typeobject.c | 338 ++++++++++++++++++++++--------------------- 1 file changed, 175 insertions(+), 163 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index c785817c31c131..0eaff5009ff092 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -61,13 +61,14 @@ class object "PyObject *" "&PyBaseObject_Type" #ifdef Py_GIL_DISABLED // There's a global lock for types that ensures that tp_version_tag and -// _spec_cache are correctly updated if the type is modified. This avoids -// having to take additional locks while doing various subclass processing -// which may result in odd behaviors w.r.t. running with the GIL as the outer -// type lock could be released and reacquired during a subclass update if -// there's contention on the subclass lock. +// _spec_cache are correctly updated if the type is modified. It also protects +// tp_mro, tp_bases, and tp_base. This avoids having to take additional locks +// while doing various subclass processing which may result in odd behaviors +// w.r.t. running with the GIL as the outer type lock could be released and +// reacquired during a subclass update if there's contention on the subclass +// lock. // -// Note that this lock does not protect when updating type slots or the +// Note that this lock does not protect updates of other type slots or the // tp_flags member. Instead, we either ensure those updates are done before // the type has been revealed to other threads or we only do those updates // while the stop-the-world mechanism is active. The slots and flags are read @@ -78,6 +79,7 @@ class object "PyObject *" "&PyBaseObject_Type" #define BEGIN_TYPE_DICT_LOCK(d) \ Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) + #define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() #ifdef Py_DEBUG @@ -101,12 +103,15 @@ types_world_is_stopped(void) #endif #define ASSERT_TYPE_LOCK_HELD() \ - _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) + if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } // Checks if we can safely update type slots or tp_flags. #define ASSERT_NEW_OR_STOPPED(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) +#define ASSERT_NEW_OR_LOCKED(tp) \ + if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } + static void types_stop_world(void) { @@ -134,6 +139,7 @@ types_start_world(void) #define ASSERT_TYPE_LOCK_HELD() #define TYPE_IS_REVEALED(tp) 0 #define ASSERT_NEW_OR_STOPPED(tp) +#define ASSERT_NEW_OR_LOCKED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() @@ -532,8 +538,10 @@ _PyType_GetBases(PyTypeObject *self) { PyObject *res; + BEGIN_TYPE_LOCK(); res = lookup_tp_bases(self); Py_INCREF(res); + END_TYPE_LOCK(); return res; } @@ -542,7 +550,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); - ASSERT_NEW_OR_STOPPED(self); + ASSERT_NEW_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -587,21 +595,35 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { + ASSERT_NEW_OR_LOCKED(self); return self->tp_mro; } PyObject * _PyType_GetMRO(PyTypeObject *self) { - // This is safe in the free-threaded build because tp_mro cannot be - // assigned unless the type is new or if world is stopped. +#ifdef Py_GIL_DISABLED + PyObject *mro = _Py_atomic_load_ptr_relaxed(&self->tp_mro); + if (mro == NULL) { + return NULL; + } + if (_Py_TryIncrefCompare(&self->tp_mro, mro)) { + return mro; + } + + BEGIN_TYPE_LOCK(); + mro = lookup_tp_mro(self); + Py_XINCREF(mro); + END_TYPE_LOCK(); + return mro; +#else return Py_XNewRef(lookup_tp_mro(self)); +#endif } static inline void set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { - ASSERT_NEW_OR_STOPPED(self); if (mro != NULL) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { @@ -1088,7 +1110,7 @@ set_version_unlocked(PyTypeObject *tp, unsigned int version) } static void -type_modified_unlocked(PyTypeObject *type, bool world_stopped) +type_modified_unlocked(PyTypeObject *type) { /* Invalidate any cached data for the specified type and all subclasses. This function is called after the base @@ -1105,15 +1127,7 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) We don't assign new version tags eagerly, but only as needed. */ -#ifdef Py_DEBUG - if (world_stopped) { - assert(types_world_is_stopped()); - } else { - if (TYPE_IS_REVEALED(type)) { - ASSERT_TYPE_LOCK_HELD(); - } - } -#endif + ASSERT_NEW_OR_LOCKED(type); if (type->tp_version_tag == 0) { return; } @@ -1131,7 +1145,7 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) if (subclass == NULL) { continue; } - type_modified_unlocked(subclass, world_stopped); + type_modified_unlocked(subclass); Py_DECREF(subclass); } } @@ -1146,17 +1160,14 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) // Notify registered type watchers, if any if (type->tp_watched) { - // Note that PyErr_FormatUnraisable is re-entrant and the watcher - // callback might be too. - if (world_stopped) { - types_start_world(); - } PyInterpreterState *interp = _PyInterpreterState_GET(); int bits = type->tp_watched; int i = 0; while (bits) { assert(i < TYPE_MAX_WATCHERS); if (bits & 1) { + // Note that PyErr_FormatUnraisable is potentially re-entrant + // and the watcher callback might be too. PyType_WatchCallback cb = interp->type_watchers[i]; if (cb && (cb(type) < 0)) { PyErr_FormatUnraisable( @@ -1167,9 +1178,6 @@ type_modified_unlocked(PyTypeObject *type, bool world_stopped) i++; bits >>= 1; } - if (world_stopped) { - types_stop_world(); - } } } @@ -1182,7 +1190,7 @@ PyType_Modified(PyTypeObject *type) } BEGIN_TYPE_LOCK(); - type_modified_unlocked(type, false); + type_modified_unlocked(type); END_TYPE_LOCK(); } @@ -1210,9 +1218,8 @@ has_custom_mro(PyTypeObject *tp) } static void -type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) +type_mro_modified(PyTypeObject *type, PyObject *bases) { - ASSERT_NEW_OR_STOPPED(type); /* Check that all base classes or elements of the MRO of type are able to be cached. This function is called after the base @@ -1225,16 +1232,11 @@ type_mro_modified(PyTypeObject *type, PyObject *bases, bool world_stopped) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ + ASSERT_TYPE_LOCK_HELD(); if (!Py_IS_TYPE(type, &PyType_Type)) { unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - if (world_stopped) { - types_start_world(); - } // This can be re-entrant. bool is_custom = has_custom_mro(type); - if (world_stopped) { - types_stop_world(); - } if (meta_version != Py_TYPE(type)->tp_version_tag) { // metaclass changed during call of has_custom_mro() goto clear; @@ -1635,13 +1637,16 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) return -1; } + BEGIN_TYPE_LOCK(); + type_modified_unlocked(type); types_stop_world(); - type_modified_unlocked(type, true); if (abstract) type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); else type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); types_start_world(); + ASSERT_TYPE_LOCK_HELD(); + END_TYPE_LOCK(); return 0; } @@ -1663,6 +1668,7 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) PyTypeObject *type = PyTypeObject_CAST(tp); PyObject *mro; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(type); if (mro == NULL) { mro = Py_None; @@ -1670,11 +1676,12 @@ type_get_mro(PyObject *tp, void *Py_UNUSED(closure)) Py_INCREF(mro); } + END_TYPE_LOCK(); return mro; } static PyTypeObject *find_best_base(PyObject *); -static int mro_internal(PyTypeObject *, int, PyObject **, bool); +static int mro_internal(PyTypeObject *, int, PyObject **); static int type_is_subtype_base_chain(PyTypeObject *, PyTypeObject *); static int compatible_for_assignment(PyTypeObject *, PyTypeObject *, const char *); static int add_subclass(PyTypeObject*, PyTypeObject*); @@ -1689,18 +1696,15 @@ static int update_subclasses(PyTypeObject *type, PyObject *attr_name, static int recurse_down_subclasses(PyTypeObject *type, PyObject *name, update_callback callback, void *data); +// Compute tp_mro for this type and all of its subclasses. This +// is called after __bases__ is assigned to an existing type. static int -mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) +mro_hierarchy(PyTypeObject *type, PyObject *temp) { -#ifdef Py_DEBUG - if (!world_stopped) { - assert(!TYPE_IS_REVEALED(type)); - } else { - assert(types_world_is_stopped()); - } -#endif + ASSERT_TYPE_LOCK_HELD(); + PyObject *old_mro; - int res = mro_internal(type, 0, &old_mro, world_stopped); + int res = mro_internal(type, 0, &old_mro); if (res <= 0) { /* error / reentrance */ return res; @@ -1750,7 +1754,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp, bool world_stopped) Py_ssize_t n = PyList_GET_SIZE(subclasses); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *subclass = _PyType_CAST(PyList_GET_ITEM(subclasses, i)); - res = mro_hierarchy(subclass, temp, world_stopped); + res = mro_hierarchy(subclass, temp); if (res < 0) { break; } @@ -1825,9 +1829,9 @@ type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **bes } static int -type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) +type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, PyTypeObject *best_base) { - assert(types_world_is_stopped()); + ASSERT_TYPE_LOCK_HELD(); Py_ssize_t n; PyObject *old_bases = lookup_tp_bases(type); @@ -1841,7 +1845,7 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje if (temp == NULL) { goto bail; } - if (mro_hierarchy(type, temp, true) < 0) { + if (mro_hierarchy(type, temp) < 0) { goto undo; } Py_DECREF(temp); @@ -1859,7 +1863,10 @@ type_set_bases_world_stopped(PyTypeObject *type, PyObject *new_bases, PyTypeObje add to all new_bases */ remove_all_subclasses(type, old_bases); res = add_all_subclasses(type, new_bases); + types_stop_world(); update_all_slots(type); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); } else { res = 0; @@ -1912,13 +1919,13 @@ type_set_bases(PyObject *tp, PyObject *new_bases, void *Py_UNUSED(closure)) { PyTypeObject *type = PyTypeObject_CAST(tp); PyTypeObject *best_base; - if (type_check_new_bases(type, new_bases, &best_base) < 0) { - return -1; - } int res; - types_stop_world(); - res = type_set_bases_world_stopped(type, new_bases, best_base); - types_start_world(); + BEGIN_TYPE_LOCK(); + res = type_check_new_bases(type, new_bases, &best_base); + if (res == 0) { + res = type_set_bases_unlocked(type, new_bases, best_base); + } + END_TYPE_LOCK(); return res; } @@ -3158,17 +3165,12 @@ tail_contains(PyObject *tuple, Py_ssize_t whence, PyObject *o) static PyObject * class_name(PyObject *cls) { -#ifdef Py_GIL_DISABLED - // Avoid re-entrancy and use tp_name. This is only used for the - // mro_implementation() sanity check of the computed mro. - return type_name(cls, NULL); -#else PyObject *name; + // Note that this is potentially re-entrant. if (PyObject_GetOptionalAttr(cls, &_Py_ID(__name__), &name) == 0) { name = PyObject_Repr(cls); } return name; -#endif } static int @@ -3333,8 +3335,10 @@ pmerge(PyObject *acc, PyObject **to_merge, Py_ssize_t to_merge_size) } static PyObject * -mro_implementation(PyTypeObject *type) +mro_implementation_unlocked(PyTypeObject *type) { + ASSERT_TYPE_LOCK_HELD(); + if (!_PyType_IsReady(type)) { if (PyType_Ready(type) < 0) return NULL; @@ -3414,6 +3418,16 @@ mro_implementation(PyTypeObject *type) return result; } +static PyObject * +mro_implementation(PyTypeObject *type) +{ + PyObject *mro; + BEGIN_TYPE_LOCK(); + mro = mro_implementation_unlocked(type); + END_TYPE_LOCK(); + return mro; +} + /*[clinic input] type.mro @@ -3479,44 +3493,30 @@ mro_check(PyTypeObject *type, PyObject *mro) - through a finalizer of the return value of mro(). */ static PyObject * -mro_invoke(PyTypeObject *type, bool world_stopped) +mro_invoke(PyTypeObject *type) { PyObject *mro_result; PyObject *new_mro; + ASSERT_TYPE_LOCK_HELD(); + const int custom = !Py_IS_TYPE(type, &PyType_Type); if (custom) { // Custom mro() method on metaclass. This is potentially re-entrant. // We are called either from type_ready() or from type_set_bases(). - if (world_stopped) { - // Called for type_set_bases(), re-assigment of __bases__ - types_start_world(); - } mro_result = call_method_noarg((PyObject *)type, &_Py_ID(mro)); - if (mro_result != NULL) { - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); - } - else { - new_mro = NULL; - } - if (world_stopped) { - types_stop_world(); - } } else { // In this case, the mro() method on the type object is being used and // we know that these calls are not re-entrant. - mro_result = mro_implementation(type); - if (mro_result != NULL) { - new_mro = PySequence_Tuple(mro_result); - Py_DECREF(mro_result); - } - else { - new_mro = NULL; - } + mro_result = mro_implementation_unlocked(type); } + if (mro_result == NULL) + return NULL; + + new_mro = PySequence_Tuple(mro_result); + Py_DECREF(mro_result); if (new_mro == NULL) { return NULL; } @@ -3557,15 +3557,9 @@ mro_invoke(PyTypeObject *type, bool world_stopped) - Returns -1 in case of an error. */ static int -mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_stopped) +mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro) { -#ifdef Py_DEBUG - if (!world_stopped) { - assert(!TYPE_IS_REVEALED(type)); - } else { - assert(types_world_is_stopped()); - } -#endif + ASSERT_TYPE_LOCK_HELD(); PyObject *new_mro, *old_mro; int reent; @@ -3574,7 +3568,7 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_s Don't let old_mro be GC'ed and its address be reused for another object, like (suddenly!) a new tp_mro. */ old_mro = Py_XNewRef(lookup_tp_mro(type)); - new_mro = mro_invoke(type, world_stopped); /* might cause reentrance */ + new_mro = mro_invoke(type); /* might cause reentrance */ reent = (lookup_tp_mro(type) != old_mro); Py_XDECREF(old_mro); if (new_mro == NULL) { @@ -3588,14 +3582,14 @@ mro_internal(PyTypeObject *type, int initial, PyObject **p_old_mro, bool world_s set_tp_mro(type, new_mro, initial); - type_mro_modified(type, new_mro, world_stopped); + type_mro_modified(type, new_mro); /* corner case: the super class might have been hidden from the custom MRO */ - type_mro_modified(type, lookup_tp_bases(type), world_stopped); + type_mro_modified(type, lookup_tp_bases(type)); // XXX Expand this to Py_TPFLAGS_IMMUTABLETYPE? if (!(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)) { - type_modified_unlocked(type, world_stopped); + type_modified_unlocked(type); } else { /* For static builtin types, this is only called during init @@ -5535,6 +5529,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; + BEGIN_TYPE_LOCK(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5561,6 +5556,7 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) break; } } + END_TYPE_LOCK(); if (res == NULL) { PyErr_Format( @@ -5770,14 +5766,21 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio } static void -maybe_update_cache(struct type_cache_entry *entry, PyObject *name, - unsigned int version_tag, PyObject *value) +maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, + PyObject *name, unsigned int version_tag, PyObject *value) { if (version_tag == 0) { return; // 0 is not a valid version } + // Calling find_name_in_mro() might cause the type version to change. + // For example, if a __hash__ or __eq__ method mutates the types. + // This case is expected to be rare but we check for it here and avoid + // replacing a valid cache entry with a known to be stale one. + if (FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag) != version_tag) { + return; // version changed during lookup + } PyObject *old_value; -#if Py_GIL_DISABLED +#ifdef Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5821,9 +5824,8 @@ _PyTypes_AfterFork(void) static unsigned int type_assign_version(PyTypeObject *type) { - unsigned int version = FT_ATOMIC_LOAD_UINT_RELAXED((type)->tp_version_tag); + unsigned int version = type->tp_version_tag; if (version == 0) { - BEGIN_TYPE_LOCK(); PyInterpreterState *interp = _PyInterpreterState_GET(); if (assign_version_tag(interp, type)) { version = type->tp_version_tag; @@ -5831,7 +5833,6 @@ type_assign_version(PyTypeObject *type) else { version = 0; } - END_TYPE_LOCK(); } return version; } @@ -5885,17 +5886,14 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef assert(!PyErr_Occurred()); PyObject *res; + int error; unsigned int assigned_version = 0; // 0 is not a valid version + BEGIN_TYPE_LOCK(); if (MCACHE_CACHEABLE_NAME(name)) { assigned_version = type_assign_version(type); } - // Calling find_name_in_mro() might cause the type version to change. For - // example, if a __hash__ or __eq__ method mutates the types. Since this - // case is expected to be rare, it seems better to not explicitly check - // for it (by checking if the version changed) and to do the useless cache - // update anyhow. - int error; res = find_name_in_mro(type, name, &error); + END_TYPE_LOCK(); /* Only put NULL results into cache if there was no error. */ if (error) { @@ -5913,7 +5911,7 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out = PyStackRef_NULL; return 0; } - maybe_update_cache(entry, name, assigned_version, res); + maybe_update_cache(type, entry, name, assigned_version, res); *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; return assigned_version; } @@ -6177,7 +6175,7 @@ _Py_type_getattro(PyObject *tp, PyObject *name) // the type versions. static int type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, - PyObject *value, PyObject **old_value, bool world_stopped) + PyObject *value, PyObject **old_value) { // We don't want any re-entrancy between when we update the dict // and call type_modified_unlocked, including running the destructor @@ -6189,19 +6187,20 @@ type_update_dict(PyTypeObject *type, PyDictObject *dict, PyObject *name, return -1; } - if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { - PyErr_Format(PyExc_AttributeError, - "type object '%.50s' has no attribute '%U'", - ((PyTypeObject*)type)->tp_name, name); - return -2; - } - /* Clear the VALID_VERSION flag of 'type' and all its subclasses. This could possibly be unified with the update_subclasses() recursion in update_slot(), but carefully: they each have their own conditions on which to stop recursing into subclasses. */ - type_modified_unlocked(type, world_stopped); + type_modified_unlocked(type); + + if (_PyDict_SetItem_LockHeld(dict, name, value) < 0) { + PyErr_Format(PyExc_AttributeError, + "type object '%.50s' has no attribute '%U'", + ((PyTypeObject*)type)->tp_name, name); + _PyObject_SetAttributeErrorContext((PyObject *)type, name); + return -1; + } return 0; } @@ -6263,45 +6262,30 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // This is an unlikely case. PyType_Ready has not yet been done and // we need to initialize tp_dict. We don't just do PyType_Ready // because we could already be readying. - types_stop_world(); + BEGIN_TYPE_LOCK(); if (type->tp_dict == NULL) { dict = type->tp_dict = PyDict_New(); } - types_start_world(); + END_TYPE_LOCK(); if (dict == NULL) { res = -1; goto done; } } - if (is_dunder_name(name) && has_slotdef(name)) { - // The name corresponds to a type slot. - types_stop_world(); - // Since the world is stopped, we actually don't need this critical - // section. However, this internally calls dict methods that assert - // that the dict object mutex is held. So, we acquire it here to - // avoid those assert failing. - Py_BEGIN_CRITICAL_SECTION(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value, true); - if (res == 0) { + BEGIN_TYPE_DICT_LOCK(dict); + res = type_update_dict(type, (PyDictObject *)dict, name, value, + &old_value); + if (res == 0) { + if (is_dunder_name(name) && has_slotdef(name)) { + // The name corresponds to a type slot. + types_stop_world(); res = update_slot(type, name); + types_start_world(); + ASSERT_TYPE_LOCK_HELD(); } - Py_END_CRITICAL_SECTION(); - types_start_world(); - } else { - // The name is not a slot. - BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value, false); - END_TYPE_DICT_LOCK(); - } - if (res == -2) { - // We handle the attribute error case here because reentrancy possible - // when calling this function. - _PyObject_SetAttributeErrorContext((PyObject *)type, name); - res = -1; } + END_TYPE_DICT_LOCK(); assert(res < 0 || _PyType_CheckConsistency(type)); done: @@ -8631,7 +8615,7 @@ type_ready_preheader(PyTypeObject *type) static int type_ready_mro(PyTypeObject *type, int initial) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { if (!initial) { @@ -8642,7 +8626,7 @@ type_ready_mro(PyTypeObject *type, int initial) } /* Calculate method resolution order */ - if (mro_internal(type, initial, NULL, false) < 0) { + if (mro_internal(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8706,7 +8690,7 @@ inherit_patma_flags(PyTypeObject *type, PyTypeObject *base) { static int type_ready_inherit(PyTypeObject *type) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); /* Inherit special flags from dominant base */ PyTypeObject *base = type->tp_base; @@ -8903,7 +8887,7 @@ type_ready_post_checks(PyTypeObject *type) static int type_ready(PyTypeObject *type, int initial) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_TYPE_LOCK_HELD(); _PyObject_ASSERT((PyObject *)type, !is_readying(type)); start_readying(type); @@ -8995,12 +8979,14 @@ PyType_Ready(PyTypeObject *type) } int res; + BEGIN_TYPE_LOCK(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } + END_TYPE_LOCK(); return res; } @@ -9034,7 +9020,9 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, managed_static_type_state_init(interp, self, isbuiltin, initial); int res; + BEGIN_TYPE_LOCK(); res = type_ready(self, initial); + END_TYPE_LOCK(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -9485,13 +9473,12 @@ wrap_delitem(PyObject *self, PyObject *args, void *wrapped) https://mail.python.org/pipermail/python-dev/2003-April/034535.html */ static int -hackcheck(PyObject *self, setattrofunc func, const char *what) +hackcheck_unlocked(PyObject *self, setattrofunc func, const char *what) { - if (!PyType_Check(self)) { - return 1; - } PyTypeObject *type = Py_TYPE(self); + ASSERT_TYPE_LOCK_HELD(); + PyObject *mro = lookup_tp_mro(type); if (!mro) { /* Probably ok not to check the call in this case. */ @@ -9533,6 +9520,20 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) return 1; } +static int +hackcheck(PyObject *self, setattrofunc func, const char *what) +{ + if (!PyType_Check(self)) { + return 1; + } + + int res; + BEGIN_TYPE_LOCK(); + res = hackcheck_unlocked(self, func, what); + END_TYPE_LOCK(); + return res; +} + static PyObject * wrap_setattr(PyObject *self, PyObject *args, void *wrapped) { @@ -10680,7 +10681,7 @@ slot_bf_getbuffer(PyObject *self, Py_buffer *buffer, int flags) } static releasebufferproc -type_lookup_base_releasebuffer(PyObject *self, Py_buffer *buffer) +releasebuffer_maybe_call_super_unlocked(PyObject *self, Py_buffer *buffer) { PyTypeObject *self_type = Py_TYPE(self); PyObject *mro = lookup_tp_mro(self_type); @@ -10722,7 +10723,9 @@ releasebuffer_maybe_call_super(PyObject *self, Py_buffer *buffer) { releasebufferproc base_releasebuffer; - base_releasebuffer = type_lookup_base_releasebuffer(self, buffer); + BEGIN_TYPE_LOCK(); + base_releasebuffer = releasebuffer_maybe_call_super_unlocked(self, buffer); + END_TYPE_LOCK(); if (base_releasebuffer != NULL) { base_releasebuffer(self, buffer); @@ -11459,7 +11462,7 @@ update_all_slots(PyTypeObject* type) } /* Clear the VALID_VERSION flag of 'type' and all its subclasses. */ - type_modified_unlocked(type, true); + type_modified_unlocked(type); } @@ -11730,10 +11733,13 @@ PyType_Freeze(PyTypeObject *type) return -1; } + BEGIN_TYPE_LOCK(); types_stop_world(); type_add_flags(type, Py_TPFLAGS_IMMUTABLETYPE); - type_modified_unlocked(type, true); types_start_world(); + ASSERT_TYPE_LOCK_HELD(); + type_modified_unlocked(type); + END_TYPE_LOCK(); return 0; } @@ -11798,8 +11804,14 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * PyObject *mro, *res; Py_ssize_t i, n; + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(su_obj_type); + /* keep a strong reference to mro because su_obj_type->tp_mro can be + replaced during PyDict_GetItemRef(dict, name, &res) and because + another thread can modify it after we end the critical section + below */ Py_XINCREF(mro); + END_TYPE_LOCK(); if (mro == NULL) return NULL; From 986f23a7e959a3037a500a6c5c1d0e0fea0a9da7 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 22 Apr 2025 17:14:07 -0700 Subject: [PATCH 40/45] Fix non-debug build. --- Objects/typeobject.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0eaff5009ff092..e2d33af1bc7fa2 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -102,6 +102,7 @@ types_world_is_stopped(void) #define TYPE_IS_REVEALED(tp) 0 #endif +#ifdef Py_DEBUG #define ASSERT_TYPE_LOCK_HELD() \ if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } @@ -111,6 +112,11 @@ types_world_is_stopped(void) #define ASSERT_NEW_OR_LOCKED(tp) \ if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } +#else +#define ASSERT_TYPE_LOCK_HELD() +#define ASSERT_NEW_OR_STOPPED(tp) +#define ASSERT_NEW_OR_LOCKED(tp) +#endif static void types_stop_world(void) From c404ed4b19c3b85e82c1b8e6812940e96a6e5a40 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 12:52:16 -0700 Subject: [PATCH 41/45] Revert unneeded code changes. --- Objects/typeobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e2d33af1bc7fa2..d591c5873f82e6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -6269,7 +6269,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) // we need to initialize tp_dict. We don't just do PyType_Ready // because we could already be readying. BEGIN_TYPE_LOCK(); - if (type->tp_dict == NULL) { + dict = type->tp_dict; + if (dict == NULL) { dict = type->tp_dict = PyDict_New(); } END_TYPE_LOCK(); @@ -6280,8 +6281,8 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } BEGIN_TYPE_DICT_LOCK(dict); - res = type_update_dict(type, (PyDictObject *)dict, name, value, - &old_value); + res = type_update_dict(type, (PyDictObject *)dict, name, value, &old_value); + assert(_PyType_CheckConsistency(type)); if (res == 0) { if (is_dunder_name(name) && has_slotdef(name)) { // The name corresponds to a type slot. @@ -6292,7 +6293,6 @@ type_setattro(PyObject *self, PyObject *name, PyObject *value) } } END_TYPE_DICT_LOCK(); - assert(res < 0 || _PyType_CheckConsistency(type)); done: Py_DECREF(name); From 0eb77da532b67d760c76213228e815a395453be3 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 17:00:04 -0700 Subject: [PATCH 42/45] Restore comment --- Objects/typeobject.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d591c5873f82e6..fbb57f96102392 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5891,6 +5891,9 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef /* We may end up clearing live exceptions below, so make sure it's ours. */ assert(!PyErr_Occurred()); + // We need to atomically do the lookup and capture the version before + // anyone else can modify our mro or mutate the type. + PyObject *res; int error; unsigned int assigned_version = 0; // 0 is not a valid version From 16f15b2e3b62c8c95694796293927bfbbbacf183 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 23 Apr 2025 17:15:45 -0700 Subject: [PATCH 43/45] Revert more changes. These are not required as part of this PR and can be reviewed separately. --- Objects/typeobject.c | 128 ++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 76 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index fbb57f96102392..e088f11112854d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1156,14 +1156,6 @@ type_modified_unlocked(PyTypeObject *type) } } - set_version_unlocked(type, 0); /* 0 is not a valid version tag */ - if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - // This field *must* be invalidated if the type is modified (see the - // comment on struct _specialization_cache): - FT_ATOMIC_STORE_PTR_RELAXED( - ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); - } - // Notify registered type watchers, if any if (type->tp_watched) { PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -1185,6 +1177,14 @@ type_modified_unlocked(PyTypeObject *type) bits >>= 1; } } + + set_version_unlocked(type, 0); /* 0 is not a valid version tag */ + if (PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { + // This field *must* be invalidated if the type is modified (see the + // comment on struct _specialization_cache): + FT_ATOMIC_STORE_PTR_RELAXED( + ((PyHeapTypeObject *)type)->_spec_cache.getitem, NULL); + } } void @@ -1238,21 +1238,14 @@ type_mro_modified(PyTypeObject *type, PyObject *bases) Called from mro_internal, which will subsequently be called on each subclass when their mro is recursively updated. */ + Py_ssize_t i, n; + ASSERT_TYPE_LOCK_HELD(); - if (!Py_IS_TYPE(type, &PyType_Type)) { - unsigned int meta_version = Py_TYPE(type)->tp_version_tag; - // This can be re-entrant. - bool is_custom = has_custom_mro(type); - if (meta_version != Py_TYPE(type)->tp_version_tag) { - // metaclass changed during call of has_custom_mro() - goto clear; - } - if (is_custom) { - goto clear; - } + if (!Py_IS_TYPE(type, &PyType_Type) && has_custom_mro(type)) { + goto clear; } - Py_ssize_t n = PyTuple_GET_SIZE(bases); - for (Py_ssize_t i = 0; i < n; i++) { + n = PyTuple_GET_SIZE(bases); + for (i = 0; i < n; i++) { PyObject *b = PyTuple_GET_ITEM(bases, i); PyTypeObject *cls = _PyType_CAST(b); @@ -5771,22 +5764,12 @@ update_cache(struct type_cache_entry *entry, PyObject *name, unsigned int versio return old_name; } +#if Py_GIL_DISABLED + static void -maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, - PyObject *name, unsigned int version_tag, PyObject *value) +update_cache_gil_disabled(struct type_cache_entry *entry, PyObject *name, + unsigned int version_tag, PyObject *value) { - if (version_tag == 0) { - return; // 0 is not a valid version - } - // Calling find_name_in_mro() might cause the type version to change. - // For example, if a __hash__ or __eq__ method mutates the types. - // This case is expected to be rare but we check for it here and avoid - // replacing a valid cache entry with a known to be stale one. - if (FT_ATOMIC_LOAD_UINT_RELAXED(type->tp_version_tag) != version_tag) { - return; // version changed during lookup - } - PyObject *old_value; -#ifdef Py_GIL_DISABLED _PySeqLock_LockWrite(&entry->sequence); // update the entry @@ -5798,16 +5781,16 @@ maybe_update_cache(PyTypeObject *type, struct type_cache_entry *entry, return; } - old_value = update_cache(entry, name, version_tag, value); + PyObject *old_value = update_cache(entry, name, version_tag, value); // Then update sequence to the next valid value _PySeqLock_UnlockWrite(&entry->sequence); -#else - old_value = update_cache(entry, name, version_tag, value); -#endif + Py_DECREF(old_value); } +#endif + void _PyTypes_AfterFork(void) { @@ -5825,22 +5808,23 @@ _PyTypes_AfterFork(void) #endif } -// Try to assign a new type version tag, return it if successful. Return 0 -// if no version was assigned. -static unsigned int -type_assign_version(PyTypeObject *type) +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ +PyObject * +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) { - unsigned int version = type->tp_version_tag; - if (version == 0) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (assign_version_tag(interp, type)) { - version = type->tp_version_tag; - } - else { - version = 0; - } + _PyStackRef out; + unsigned int ver = _PyType_LookupStackRefAndVersion(type, name, &out); + if (version) { + *version = ver; } - return version; + if (PyStackRef_IsNull(out)) { + return NULL; + } + return PyStackRef_AsPyObjectSteal(out); } unsigned int @@ -5896,12 +5880,15 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef PyObject *res; int error; - unsigned int assigned_version = 0; // 0 is not a valid version + PyInterpreterState *interp = _PyInterpreterState_GET(); + int has_version = 0; + unsigned int assigned_version = 0; BEGIN_TYPE_LOCK(); + res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { - assigned_version = type_assign_version(type); + has_version = assign_version_tag(interp, type); + assigned_version = type->tp_version_tag; } - res = find_name_in_mro(type, name, &error); END_TYPE_LOCK(); /* Only put NULL results into cache if there was no error. */ @@ -5920,28 +5907,17 @@ _PyType_LookupStackRefAndVersion(PyTypeObject *type, PyObject *name, _PyStackRef *out = PyStackRef_NULL; return 0; } - maybe_update_cache(type, entry, name, assigned_version, res); - *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; - return assigned_version; -} -/* Internal API to look for a name through the MRO. - This returns a strong reference, and doesn't set an exception! - If nonzero, version is set to the value of type->tp_version at the time of - the lookup. -*/ -PyObject * -_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) -{ - _PyStackRef out; - unsigned int ver = _PyType_LookupStackRefAndVersion(type, name, &out); - if (version) { - *version = ver; - } - if (PyStackRef_IsNull(out)) { - return NULL; + if (has_version) { +#if Py_GIL_DISABLED + update_cache_gil_disabled(entry, name, assigned_version, res); +#else + PyObject *old_value = update_cache(entry, name, assigned_version, res); + Py_DECREF(old_value); +#endif } - return PyStackRef_AsPyObjectSteal(out); + *out = res ? PyStackRef_FromPyObjectSteal(res) : PyStackRef_NULL; + return has_version ? assigned_version : 0; } /* Internal API to look for a name through the MRO. From 64547e9b7fb4a9605062fdcbe026474581e0c198 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 24 Apr 2025 20:44:27 -0700 Subject: [PATCH 44/45] Reduce item list size for a few tests. Now that these slot updates use stop-the-world, these two tests are quite a lot slower. Reduce size of the items list from 1000 to 100. --- Lib/test/test_opcache.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index ac132465b227ad..f06f7ce1a7d49f 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -566,6 +566,7 @@ class TestRacesDoNotCrash(TestBase): # Careful with these. Bigger numbers have a higher chance of catching bugs, # but you can also burn through a *ton* of type/dict/function versions: ITEMS = 1000 + SMALL_ITEMS = 100 LOOPS = 4 WRITERS = 2 @@ -609,7 +610,7 @@ class C: __getitem__ = lambda self, item: None items = [] - for _ in range(self.ITEMS): + for _ in range(self.SMALL_ITEMS): item = C() items.append(item) return items @@ -780,7 +781,7 @@ class C: __getattribute__ = lambda self, name: None items = [] - for _ in range(self.ITEMS): + for _ in range(self.SMALL_ITEMS): item = C() items.append(item) return items From 5672352f32bf90fd1a9ff3e2a0bc0c4101c95915 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 28 Apr 2025 13:02:52 -0700 Subject: [PATCH 45/45] Minor code tidy. --- Objects/typeobject.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index e088f11112854d..811a27c6405ce2 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -107,15 +107,15 @@ types_world_is_stopped(void) if (!types_world_is_stopped()) { _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK); } // Checks if we can safely update type slots or tp_flags. -#define ASSERT_NEW_OR_STOPPED(tp) \ +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) \ assert(!TYPE_IS_REVEALED(tp) || types_world_is_stopped()) -#define ASSERT_NEW_OR_LOCKED(tp) \ +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) \ if (TYPE_IS_REVEALED(tp)) { ASSERT_TYPE_LOCK_HELD(); } #else #define ASSERT_TYPE_LOCK_HELD() -#define ASSERT_NEW_OR_STOPPED(tp) -#define ASSERT_NEW_OR_LOCKED(tp) +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) #endif static void @@ -144,8 +144,8 @@ types_start_world(void) #define END_TYPE_DICT_LOCK() #define ASSERT_TYPE_LOCK_HELD() #define TYPE_IS_REVEALED(tp) 0 -#define ASSERT_NEW_OR_STOPPED(tp) -#define ASSERT_NEW_OR_LOCKED(tp) +#define ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp) +#define ASSERT_NEW_TYPE_OR_LOCKED(tp) #define types_world_is_stopped() 1 #define types_stop_world() #define types_start_world() @@ -411,14 +411,14 @@ _PyStaticType_GetBuiltins(void) static void type_set_flags(PyTypeObject *tp, unsigned long flags) { - ASSERT_NEW_OR_STOPPED(tp); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); tp->tp_flags = flags; } static void type_set_flags_with_mask(PyTypeObject *tp, unsigned long mask, unsigned long flags) { - ASSERT_NEW_OR_STOPPED(tp); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(tp); unsigned long new_flags = (tp->tp_flags & ~mask) | flags; type_set_flags(tp, new_flags); } @@ -556,7 +556,7 @@ static inline void set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_Check(bases)); - ASSERT_NEW_OR_LOCKED(self); + ASSERT_NEW_TYPE_OR_LOCKED(self); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. @@ -601,7 +601,7 @@ clear_tp_bases(PyTypeObject *self, int final) static inline PyObject * lookup_tp_mro(PyTypeObject *self) { - ASSERT_NEW_OR_LOCKED(self); + ASSERT_NEW_TYPE_OR_LOCKED(self); return self->tp_mro; } @@ -1133,7 +1133,7 @@ type_modified_unlocked(PyTypeObject *type) We don't assign new version tags eagerly, but only as needed. */ - ASSERT_NEW_OR_LOCKED(type); + ASSERT_NEW_TYPE_OR_LOCKED(type); if (type->tp_version_tag == 0) { return; } @@ -1640,9 +1640,9 @@ type_set_abstractmethods(PyObject *tp, PyObject *value, void *Py_UNUSED(closure) type_modified_unlocked(type); types_stop_world(); if (abstract) - type_set_flags_with_mask(type, 0, Py_TPFLAGS_IS_ABSTRACT); + type_add_flags(type, Py_TPFLAGS_IS_ABSTRACT); else - type_set_flags_with_mask(type, Py_TPFLAGS_IS_ABSTRACT, 0); + type_clear_flags(type, Py_TPFLAGS_IS_ABSTRACT); types_start_world(); ASSERT_TYPE_LOCK_HELD(); END_TYPE_LOCK(); @@ -1767,7 +1767,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) static int type_check_new_bases(PyTypeObject *type, PyObject *new_bases, PyTypeObject **best_base) { - // Check arguments, this re-entrant due to the PySys_Audit() call + // Check arguments, this is re-entrant due to the PySys_Audit() call if (!check_set_special_type_attr(type, new_bases, "__bases__")) { return -1; } @@ -11252,7 +11252,7 @@ has_slotdef(PyObject *name) static pytype_slotdef * update_one_slot(PyTypeObject *type, pytype_slotdef *p) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(type); PyObject *descr; PyWrapperDescrObject *d; @@ -11375,7 +11375,7 @@ update_one_slot(PyTypeObject *type, pytype_slotdef *p) static int update_slots_callback(PyTypeObject *type, void *data) { - ASSERT_NEW_OR_STOPPED(type); + ASSERT_WORLD_STOPPED_OR_NEW_TYPE(type); pytype_slotdef **pp = (pytype_slotdef **)data; for (; *pp; pp++) { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy