From 6a1b24b0742d7c7931a45a76a5741401522a011d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 30 Jun 2025 15:22:17 -0700 Subject: [PATCH 1/2] Clear weakrefs created by finalizers. Weakrefs to unreachable garbage that are created during running of finalizers need to be cleared. This avoids exposing objects that have `tp_clear` called on them to Python-level code. --- Lib/test/test_gc.py | 20 ++++-- ...5-07-07-17-26-06.gh-issue-91636.GyHU72.rst | 3 + Python/gc.c | 64 +++++++++++++++++++ Python/gc_free_threading.c | 50 ++++++++++++++- 4 files changed, 127 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b4cbfb6d774080..85c43055d0dcec 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -262,9 +262,11 @@ class Cyclic(tuple): # finalizer. def __del__(self): - # 5. Create a weakref to `func` now. If we had created - # it earlier, it would have been cleared by the - # garbage collector before calling the finalizers. + # 5. Create a weakref to `func` now. In previous + # versions of Python, this would avoid having it + # cleared by the garbage collector before calling + # the finalizers. Now, weakrefs get cleared after + # calling finalizers. self[1].ref = weakref.ref(self[0]) # 6. Drop the global reference to `latefin`. The only @@ -293,14 +295,18 @@ def func(): # which will find `cyc` and `func` as garbage. gc.collect() - # 9. Previously, this would crash because `func_qualname` - # had been NULL-ed out by func_clear(). + # 9. Previously, this would crash because the weakref + # created in the finalizer revealed the function after + # `tp_clear` was called and `func_qualname` + # had been NULL-ed out by func_clear(). Now, we clear + # weakrefs to unreachable objects before calling `tp_clear` + # but after calling finalizers. print(f"{func=}") """ - # We're mostly just checking that this doesn't crash. rc, stdout, stderr = assert_python_ok("-c", code) self.assertEqual(rc, 0) - self.assertRegex(stdout, rb"""\A\s*func=\s*\z""") + # The `func` global is None because the weakref was cleared. + self.assertRegex(stdout, rb"""\A\s*func=None""") self.assertFalse(stderr) @refcount_test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst new file mode 100644 index 00000000000000..09c192f9c5657e --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-07-07-17-26-06.gh-issue-91636.GyHU72.rst @@ -0,0 +1,3 @@ +While performing garbage collection, clear weakrefs to unreachable objects +that are created during running of finalizers. If those weakrefs were are +not cleared, they could reveal unreachable objects. diff --git a/Python/gc.c b/Python/gc.c index 02135a3fb442d6..775b7cff9232b3 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1037,6 +1037,62 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } +/* Clear all weakrefs to unreachable objects. When this returns, no object in + * `unreachable` is weakly referenced anymore. + */ +static void +clear_weakrefs(PyGC_Head *unreachable) +{ + PyGC_Head *gc; + PyGC_Head *next; + + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + PyObject *op = FROM_GC(gc); + next = GC_NEXT(gc); + + if (PyWeakref_Check(op)) { + /* A weakref inside the unreachable set must be cleared. If we + * allow its callback to execute inside delete_garbage(), it + * could expose objects that have tp_clear already called on + * them. Or, it could resurrect unreachable objects. One way + * this can happen is if some container objects do not implement + * tp_traverse. Then, wr_object can be outside the unreachable + * set but can be deallocated as a result of breaking the + * reference cycle. If we don't clear the weakref, the callback + * will run and potentially cause a crash. See bpo-38006 for + * one example. + */ + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + /* It supports weakrefs. Does it have any? + * + * This is never triggered for static types so we can avoid the + * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + */ + wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs. + */ + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void debug_cycle(const char *msg, PyObject *op) { @@ -1751,6 +1807,14 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&final_unreachable); handle_resurrected_objects(&unreachable, &final_unreachable, to); + /* Clear weakrefs to objects in the unreachable set. No Python-level + * code must be allowed to access those unreachable objects. During + * delete_garbage(), finalizers outside the unreachable set might run + * and create new weakrefs. If those weakrefs were not cleared, they + * could reveal unreachable objects. + */ + clear_weakrefs(&final_unreachable); + /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 5aaa68c5b51f95..910b6435294451 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1494,7 +1494,7 @@ move_legacy_finalizer_reachable(struct collection_state *state) // Clear all weakrefs to unreachable objects. Weakrefs with callbacks are // enqueued in `wrcb_to_call`, but not invoked yet. static void -clear_weakrefs(struct collection_state *state) +handle_weakrefs(struct collection_state *state) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { @@ -1545,6 +1545,42 @@ clear_weakrefs(struct collection_state *state) } } +// Clear all weakrefs to unreachable objects. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable to ensure their + // callbacks will not be executed later from a `tp_clear()` + // inside delete_garbage(). That would be unsafe: it could + // resurrect a dead object or access a an already cleared object. + // See bpo-38006 for one example. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + } + } +} + static void call_weakref_callbacks(struct collection_state *state) { @@ -2211,7 +2247,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, interp->gc.long_lived_total = state->long_lived_total; // Clear weakrefs and enqueue callbacks (but do not call them). - clear_weakrefs(state); + handle_weakrefs(state); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2222,11 +2258,19 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, call_weakref_callbacks(state); finalize_garbage(state); - // Handle any objects that may have resurrected after the finalization. _PyEval_StopTheWorld(interp); + // Handle any objects that may have resurrected after the finalization. err = handle_resurrected_objects(state); // Clear free lists in all threads _PyGC_ClearAllFreeLists(interp); + if (err == 0) { + // Clear weakrefs to objects in the unreachable set. No Python-level + // code must be allowed to access those unreachable objects. During + // delete_garbage(), finalizers outside the unreachable set might + // run and create new weakrefs. If those weakrefs were not cleared, + // they could reveal unreachable objects. + clear_weakrefs(state); + } _PyEval_StartTheWorld(interp); if (err < 0) { From 412bc2af33132aaf048a157636a20b0c909e1d94 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 8 Jul 2025 09:28:17 -0700 Subject: [PATCH 2/2] Reduce code duplication. Add a boolean flag to indicate that callbacks should be enabled. This allows the same function to be used before and after finalizer execution. --- Python/gc.c | 77 ++++++++------------------------------ Python/gc_free_threading.c | 48 ++++-------------------- 2 files changed, 24 insertions(+), 101 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 775b7cff9232b3..88849a43680d2e 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -870,7 +870,7 @@ move_legacy_finalizer_reachable(PyGC_Head *finalizers) * no object in `unreachable` is weakly referenced anymore. */ static int -handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old, bool allow_callbacks) { PyGC_Head *gc; PyObject *op; /* generally FROM_GC(gc) */ @@ -879,7 +879,9 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) PyGC_Head *next; int num_freed = 0; - gc_list_init(&wrcb_to_call); + if (allow_callbacks) { + gc_list_init(&wrcb_to_call); + } /* Clear all weakrefs to the objects in unreachable. If such a weakref * also has a callback, move it into `wrcb_to_call` if the callback @@ -935,6 +937,11 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + + if (!allow_callbacks) { + continue; + } + if (wr->wr_callback == NULL) { /* no callback */ continue; @@ -987,6 +994,10 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) } } + if (!allow_callbacks) { + return 0; + } + /* Invoke the callbacks we decided to honor. It's safe to invoke them * because they can't reference unreachable objects. */ @@ -1037,62 +1048,6 @@ handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) return num_freed; } -/* Clear all weakrefs to unreachable objects. When this returns, no object in - * `unreachable` is weakly referenced anymore. - */ -static void -clear_weakrefs(PyGC_Head *unreachable) -{ - PyGC_Head *gc; - PyGC_Head *next; - - for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { - PyWeakReference **wrlist; - - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - - if (PyWeakref_Check(op)) { - /* A weakref inside the unreachable set must be cleared. If we - * allow its callback to execute inside delete_garbage(), it - * could expose objects that have tp_clear already called on - * them. Or, it could resurrect unreachable objects. One way - * this can happen is if some container objects do not implement - * tp_traverse. Then, wr_object can be outside the unreachable - * set but can be deallocated as a result of breaking the - * reference cycle. If we don't clear the weakref, the callback - * will run and potentially cause a crash. See bpo-38006 for - * one example. - */ - _PyWeakref_ClearRef((PyWeakReference *)op); - } - - if (! _PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { - continue; - } - - /* It supports weakrefs. Does it have any? - * - * This is never triggered for static types so we can avoid the - * (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). - */ - wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - - /* `op` may have some weakrefs. March over the list, clear - * all the weakrefs. - */ - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - /* _PyWeakref_ClearRef clears the weakref but leaves - * the callback pointer intact. Obscure: it also - * changes *wrlist. - */ - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - } - } -} - static void debug_cycle(const char *msg, PyObject *op) { @@ -1793,7 +1748,7 @@ gc_collect_region(PyThreadState *tstate, } /* Clear weakrefs and invoke callbacks as necessary. */ - stats->collected += handle_weakrefs(&unreachable, to); + stats->collected += handle_weakrefs(&unreachable, to, true); gc_list_validate_space(to, gcstate->visited_space); validate_list(to, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); @@ -1811,9 +1766,9 @@ gc_collect_region(PyThreadState *tstate, * code must be allowed to access those unreachable objects. During * delete_garbage(), finalizers outside the unreachable set might run * and create new weakrefs. If those weakrefs were not cleared, they - * could reveal unreachable objects. + * could reveal unreachable objects. Callbacks are not executed. */ - clear_weakrefs(&final_unreachable); + handle_weakrefs(&final_unreachable, NULL, false); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 910b6435294451..d46598b23b3b2f 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1492,9 +1492,9 @@ move_legacy_finalizer_reachable(struct collection_state *state) } // Clear all weakrefs to unreachable objects. Weakrefs with callbacks are -// enqueued in `wrcb_to_call`, but not invoked yet. +// optionally enqueued in `wrcb_to_call`, but not invoked yet. static void -handle_weakrefs(struct collection_state *state) +clear_weakrefs(struct collection_state *state, bool enqueue_callbacks) { PyObject *op; WORKSTACK_FOR_EACH(&state->unreachable, op) { @@ -1526,6 +1526,10 @@ handle_weakrefs(struct collection_state *state) _PyWeakref_ClearRef(wr); _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + if (!enqueue_callbacks) { + continue; + } + // We do not invoke callbacks for weakrefs that are themselves // unreachable. This is partly for historical reasons: weakrefs // predate safe object finalization, and a weakref that is itself @@ -1545,42 +1549,6 @@ handle_weakrefs(struct collection_state *state) } } -// Clear all weakrefs to unreachable objects. -static void -clear_weakrefs(struct collection_state *state) -{ - PyObject *op; - WORKSTACK_FOR_EACH(&state->unreachable, op) { - if (PyWeakref_Check(op)) { - // Clear weakrefs that are themselves unreachable to ensure their - // callbacks will not be executed later from a `tp_clear()` - // inside delete_garbage(). That would be unsafe: it could - // resurrect a dead object or access a an already cleared object. - // See bpo-38006 for one example. - _PyWeakref_ClearRef((PyWeakReference *)op); - } - - if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { - continue; - } - - // NOTE: This is never triggered for static types so we can avoid the - // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). - PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); - - // `op` may have some weakrefs. March over the list, clear - // all the weakrefs. - for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { - // _PyWeakref_ClearRef clears the weakref but leaves - // the callback pointer intact. Obscure: it also - // changes *wrlist. - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); - _PyWeakref_ClearRef(wr); - _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); - } - } -} - static void call_weakref_callbacks(struct collection_state *state) { @@ -2247,7 +2215,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, interp->gc.long_lived_total = state->long_lived_total; // Clear weakrefs and enqueue callbacks (but do not call them). - handle_weakrefs(state); + clear_weakrefs(state, true); _PyEval_StartTheWorld(interp); // Deallocate any object from the refcount merge step @@ -2269,7 +2237,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state, // delete_garbage(), finalizers outside the unreachable set might // run and create new weakrefs. If those weakrefs were not cleared, // they could reveal unreachable objects. - clear_weakrefs(state); + clear_weakrefs(state, false); } _PyEval_StartTheWorld(interp); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy