Skip to content

Commit 59470ef

Browse files
markshannonsrinivasreddy
authored andcommitted
pythonGH-126491: GC: Mark objects reachable from roots before doing cycle collection (pythonGH-127110)
* Mark almost all reachable objects before doing collection phase * Add stats for objects marked * Visit new frames before each increment * Update docs * Clearer calculation of work to do.
1 parent 43ceb14 commit 59470ef

File tree

14 files changed

+355
-103
lines changed

14 files changed

+355
-103
lines changed

Include/cpython/pystats.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ typedef struct _gc_stats {
9999
uint64_t collections;
100100
uint64_t object_visits;
101101
uint64_t objects_collected;
102+
uint64_t objects_transitively_reachable;
103+
uint64_t objects_not_transitively_reachable;
102104
} GCStats;
103105

104106
typedef struct _uop_stats {

Include/internal/pycore_frame.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame {
7575
_PyStackRef *stackpointer;
7676
uint16_t return_offset; /* Only relevant during a function call */
7777
char owner;
78+
char visited;
7879
/* Locals and stack */
7980
_PyStackRef localsplus[1];
8081
} _PyInterpreterFrame;
@@ -207,6 +208,7 @@ _PyFrame_Initialize(
207208
#endif
208209
frame->return_offset = 0;
209210
frame->owner = FRAME_OWNED_BY_THREAD;
211+
frame->visited = 0;
210212

211213
for (int i = null_locals_from; i < code->co_nlocalsplus; i++) {
212214
frame->localsplus[i] = PyStackRef_NULL;
@@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int
389391
frame->instr_ptr = _PyCode_CODE(code);
390392
#endif
391393
frame->owner = FRAME_OWNED_BY_THREAD;
394+
frame->visited = 0;
392395
frame->return_offset = 0;
393396

394397
#ifdef Py_GIL_DISABLED

Include/internal/pycore_gc.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ extern "C" {
1010

1111
/* GC information is stored BEFORE the object structure. */
1212
typedef struct {
13-
// Pointer to next object in the list.
13+
// Tagged pointer to next object in the list.
1414
// 0 means the object is not tracked
1515
uintptr_t _gc_next;
1616

17-
// Pointer to previous object in the list.
17+
// Tagged pointer to previous object in the list.
1818
// Lowest two bits are used for flags documented later.
1919
uintptr_t _gc_prev;
2020
} PyGC_Head;
@@ -284,6 +284,11 @@ struct gc_generation_stats {
284284
Py_ssize_t uncollectable;
285285
};
286286

287+
enum _GCPhase {
288+
GC_PHASE_MARK = 0,
289+
GC_PHASE_COLLECT = 1
290+
};
291+
287292
struct _gc_runtime_state {
288293
/* List of objects that still need to be cleaned up, singly linked
289294
* via their gc headers' gc_prev pointers. */
@@ -311,6 +316,7 @@ struct _gc_runtime_state {
311316
Py_ssize_t work_to_do;
312317
/* Which of the old spaces is the visited space */
313318
int visited_space;
319+
int phase;
314320

315321
#ifdef Py_GIL_DISABLED
316322
/* This is the number of objects that survived the last full

Include/internal/pycore_object.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,8 @@ static inline void _PyObject_GC_TRACK(
471471
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
472472
_PyGCHead_SET_NEXT(last, gc);
473473
_PyGCHead_SET_PREV(gc, last);
474-
/* Young objects will be moved into the visited space during GC, so set the bit here */
475-
gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space;
474+
uintptr_t not_visited = 1 ^ interp->gc.visited_space;
475+
gc->_gc_next = ((uintptr_t)generation0) | not_visited;
476476
generation0->_gc_prev = (uintptr_t)gc;
477477
#endif
478478
}

Include/internal/pycore_runtime_init.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError;
137137
{ .threshold = 0, }, \
138138
}, \
139139
.work_to_do = -5000, \
140+
.phase = GC_PHASE_MARK, \
140141
}, \
141142
.qsbr = { \
142143
.wr_seq = QSBR_INITIAL, \

InternalDocs/garbage_collector.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`.
477477
```
478478

479479

480+
Optimization: visiting reachable objects
481+
========================================
482+
483+
An object cannot be garbage if it can be reached.
484+
485+
To avoid having to identify reference cycles across the whole heap, we can
486+
reduce the amount of work done considerably by first moving most reachable objects
487+
to the `visited` space. Empirically, most reachable objects can be reached from a
488+
small set of global objects and local variables.
489+
This step does much less work per object, so reduces the time spent
490+
performing garbage collection by at least half.
491+
492+
> [!NOTE]
493+
> Objects that are not determined to be reachable by this pass are not necessarily
494+
> unreachable. We still need to perform the main algorithm to determine which objects
495+
> are actually unreachable.
496+
We use the same technique of forming a transitive closure as the incremental
497+
collector does to find reachable objects, seeding the list with some global
498+
objects and the currently executing frames.
499+
500+
This phase moves objects to the `visited` space, as follows:
501+
502+
1. All objects directly referred to by any builtin class, the `sys` module, the `builtins`
503+
module and all objects directly referred to from stack frames are added to a working
504+
set of reachable objects.
505+
2. Until this working set is empty:
506+
1. Pop an object from the set and move it to the `visited` space
507+
2. For each object directly reachable from that object:
508+
* If it is not already in `visited` space and it is a GC object,
509+
add it to the working set
510+
511+
512+
Before each increment of collection is performed, the stacks are scanned
513+
to check for any new stack frames that have been created since the last
514+
increment. All objects directly referred to from those stack frames are
515+
added to the working set.
516+
Then the above algorithm is repeated, starting from step 2.
517+
518+
480519
Optimization: reusing fields to save memory
481520
===========================================
482521

Lib/test/libregrtest/refleak.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ def get_pooled_int(value):
123123
xml_filename = 'refleak-xml.tmp'
124124
result = None
125125
dash_R_cleanup(fs, ps, pic, zdc, abcs)
126-
support.gc_collect()
127126

128127
for i in rep_range:
128+
support.gc_collect()
129129
current = refleak_helper._hunting_for_refleaks
130130
refleak_helper._hunting_for_refleaks = True
131131
try:

Lib/test/test_gc.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs):
3131
return C
3232
ContainerNoGC = None
3333

34+
try:
35+
import _testinternalcapi
36+
except ImportError:
37+
_testinternalcapi = None
38+
3439
### Support code
3540
###############################################################################
3641

@@ -1130,6 +1135,7 @@ def setUp(self):
11301135
def tearDown(self):
11311136
gc.disable()
11321137

1138+
@unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi")
11331139
@requires_gil_enabled("Free threading does not support incremental GC")
11341140
# Use small increments to emulate longer running process in a shorter time
11351141
@gc_threshold(200, 10)
@@ -1167,20 +1173,15 @@ def make_ll(depth):
11671173
enabled = gc.isenabled()
11681174
gc.enable()
11691175
olds = []
1176+
initial_heap_size = _testinternalcapi.get_tracked_heap_size()
11701177
for i in range(20_000):
11711178
newhead = make_ll(20)
11721179
count += 20
11731180
newhead.surprise = head
11741181
olds.append(newhead)
11751182
if len(olds) == 20:
1176-
stats = gc.get_stats()
1177-
young = stats[0]
1178-
incremental = stats[1]
1179-
old = stats[2]
1180-
collected = young['collected'] + incremental['collected'] + old['collected']
1181-
count += CORRECTION
1182-
live = count - collected
1183-
self.assertLess(live, 25000)
1183+
new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size
1184+
self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations")
11841185
del olds[:]
11851186
if not enabled:
11861187
gc.disable()
@@ -1322,7 +1323,8 @@ def test_refcount_errors(self):
13221323
from test.support import gc_collect, SuppressCrashReport
13231324
13241325
a = [1, 2, 3]
1325-
b = [a]
1326+
b = [a, a]
1327+
a.append(b)
13261328
13271329
# Avoid coredump when Py_FatalError() calls abort()
13281330
SuppressCrashReport().__enter__()
@@ -1332,6 +1334,8 @@ def test_refcount_errors(self):
13321334
# (to avoid deallocating it):
13331335
import ctypes
13341336
ctypes.pythonapi.Py_DecRef(ctypes.py_object(a))
1337+
del a
1338+
del b
13351339
13361340
# The garbage collector should now have a fatal error
13371341
# when it reaches the broken object
@@ -1360,7 +1364,7 @@ def test_refcount_errors(self):
13601364
self.assertRegex(stderr,
13611365
br'object type name: list')
13621366
self.assertRegex(stderr,
1363-
br'object repr : \[1, 2, 3\]')
1367+
br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]')
13641368

13651369

13661370
class GCTogglingTests(unittest.TestCase):
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Add a marking phase to the GC. All objects that can be transitively reached
2+
from builtin modules or the stacks are marked as reachable before cycle
3+
detection. This reduces the amount of work done by the GC by approximately
4+
half.

Modules/_testinternalcapi.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op)
20762076
return PyBool_FromLong(_PyObject_HasDeferredRefcount(op));
20772077
}
20782078

2079+
static PyObject *
2080+
get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored))
2081+
{
2082+
return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size);
2083+
}
20792084

20802085
static PyMethodDef module_functions[] = {
20812086
{"get_configs", get_configs, METH_NOARGS},
@@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = {
21742179
{"get_static_builtin_types", get_static_builtin_types, METH_NOARGS},
21752180
{"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS},
21762181
{"has_deferred_refcount", has_deferred_refcount, METH_O},
2182+
{"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS},
21772183
{NULL, NULL} /* sentinel */
21782184
};
21792185

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy