Skip to content

Commit 36518e6

Browse files
authored
GH-108362: Incremental GC implementation (GH-108038)
1 parent b4ba0f7 commit 36518e6

File tree

13 files changed

+647
-392
lines changed

13 files changed

+647
-392
lines changed

Doc/whatsnew/3.13.rst

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ Interpreter improvements:
9292
New Features
9393
============
9494

95+
* The cyclic garbage collector is now incremental.
96+
This means that maximum pause times are reduced,
97+
by an order of magnitude or more for larger heaps.
98+
9599
Improved Error Messages
96100
-----------------------
97101

@@ -101,6 +105,13 @@ Improved Error Messages
101105
variables. See also :ref:`using-on-controlling-color`.
102106
(Contributed by Pablo Galindo Salgado in :gh:`112730`.)
103107

108+
Incremental Garbage Collection
109+
------------------------------
110+
111+
* The cycle garbage collector is now incremental.
112+
This means that maximum pause times are reduced
113+
by an order of magnitude or more for larger heaps.
114+
104115
Other Language Changes
105116
======================
106117

@@ -232,6 +243,29 @@ fractions
232243
sign handling, minimum width and grouping. (Contributed by Mark Dickinson
233244
in :gh:`111320`.)
234245

246+
gc
247+
--
248+
* The cyclic garbage collector is now incremental, which changes the meanings
249+
of the results of :meth:`gc.get_threshold` and :meth:`gc.get_threshold` as
250+
well as :meth:`gc.get_count` and :meth:`gc.get_stats`.
251+
* :meth:`gc.get_threshold` returns a three-tuple for backwards compatibility,
252+
the first value is the threshold for young collections, as before, the second
253+
value determines the rate at which the old collection is scanned; the
254+
default is 10 and higher values mean that the old collection is scanned more slowly.
255+
The third value is meangless and is always zero.
256+
* :meth:`gc.set_threshold` ignores any items after the second.
257+
* :meth:`gc.get_count` and :meth:`gc.get_stats`.
258+
These functions return the same format of results as before.
259+
The only difference is that instead of the results refering to
260+
the young, aging and old generations, the results refer to the
261+
young generation and the aging and collecting spaces of the old generation.
262+
263+
In summary, code that attempted to manipulate the behavior of the cycle GC may
264+
not work as well as intended, but it is very unlikely to harmful.
265+
All other code will work just fine.
266+
Uses should avoid calling :meth:`gc.collect` unless their workload is episodic,
267+
but that has always been the case to some extent.
268+
235269
glob
236270
----
237271

Include/internal/pycore_gc.h

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,15 @@ static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) {
7171

7272
/* Bit flags for _gc_prev */
7373
/* Bit 0 is set when tp_finalize is called */
74-
#define _PyGC_PREV_MASK_FINALIZED (1)
74+
#define _PyGC_PREV_MASK_FINALIZED 1
7575
/* Bit 1 is set when the object is in generation which is GCed currently. */
76-
#define _PyGC_PREV_MASK_COLLECTING (2)
76+
#define _PyGC_PREV_MASK_COLLECTING 2
77+
78+
/* Bit 0 is set if the object belongs to old space 1 */
79+
#define _PyGC_NEXT_MASK_OLD_SPACE_1 1
80+
7781
/* The (N-2) most significant bits contain the real address. */
78-
#define _PyGC_PREV_SHIFT (2)
82+
#define _PyGC_PREV_SHIFT 2
7983
#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT)
8084

8185
/* set for debugging information */
@@ -101,18 +105,21 @@ typedef enum {
101105
// Lowest bit of _gc_next is used for flags only in GC.
102106
// But it is always 0 for normal code.
103107
static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) {
104-
uintptr_t next = gc->_gc_next;
108+
uintptr_t next = gc->_gc_next & _PyGC_PREV_MASK;
105109
return (PyGC_Head*)next;
106110
}
107111
static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) {
108-
gc->_gc_next = (uintptr_t)next;
112+
uintptr_t unext = (uintptr_t)next;
113+
assert((unext & ~_PyGC_PREV_MASK) == 0);
114+
gc->_gc_next = (gc->_gc_next & ~_PyGC_PREV_MASK) | unext;
109115
}
110116

111117
// Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags.
112118
static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) {
113119
uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK);
114120
return (PyGC_Head*)prev;
115121
}
122+
116123
static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) {
117124
uintptr_t uprev = (uintptr_t)prev;
118125
assert((uprev & ~_PyGC_PREV_MASK) == 0);
@@ -198,6 +205,13 @@ struct gc_generation {
198205
generations */
199206
};
200207

208+
struct gc_collection_stats {
209+
/* number of collected objects */
210+
Py_ssize_t collected;
211+
/* total number of uncollectable objects (put into gc.garbage) */
212+
Py_ssize_t uncollectable;
213+
};
214+
201215
/* Running stats per generation */
202216
struct gc_generation_stats {
203217
/* total number of collections */
@@ -219,8 +233,8 @@ struct _gc_runtime_state {
219233
int enabled;
220234
int debug;
221235
/* linked lists of container objects */
222-
struct gc_generation generations[NUM_GENERATIONS];
223-
PyGC_Head *generation0;
236+
struct gc_generation young;
237+
struct gc_generation old[2];
224238
/* a permanent generation which won't be collected */
225239
struct gc_generation permanent_generation;
226240
struct gc_generation_stats generation_stats[NUM_GENERATIONS];
@@ -233,22 +247,20 @@ struct _gc_runtime_state {
233247
/* This is the number of objects that survived the last full
234248
collection. It approximates the number of long lived objects
235249
tracked by the GC.
236-
237250
(by "full collection", we mean a collection of the oldest
238251
generation). */
239252
Py_ssize_t long_lived_total;
240-
/* This is the number of objects that survived all "non-full"
241-
collections, and are awaiting to undergo a full collection for
242-
the first time. */
243-
Py_ssize_t long_lived_pending;
253+
254+
Py_ssize_t work_to_do;
255+
/* Which of the old spaces is the visited space */
256+
int visited_space;
244257
};
245258

246259

247260
extern void _PyGC_InitState(struct _gc_runtime_state *);
248261

249-
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation,
250-
_PyGC_Reason reason);
251-
extern Py_ssize_t _PyGC_CollectNoFail(PyThreadState *tstate);
262+
extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason);
263+
extern void _PyGC_CollectNoFail(PyThreadState *tstate);
252264

253265
/* Freeze objects tracked by the GC and ignore them in future collections. */
254266
extern void _PyGC_Freeze(PyInterpreterState *interp);

Include/internal/pycore_object.h

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -125,19 +125,7 @@ static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
125125
}
126126
#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
127127

128-
static inline void _Py_SetImmortal(PyObject *op)
129-
{
130-
if (op) {
131-
#ifdef Py_GIL_DISABLED
132-
op->ob_tid = _Py_UNOWNED_TID;
133-
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
134-
op->ob_ref_shared = 0;
135-
#else
136-
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
137-
#endif
138-
}
139-
}
140-
#define _Py_SetImmortal(op) _Py_SetImmortal(_PyObject_CAST(op))
128+
extern void _Py_SetImmortal(PyObject *op);
141129

142130
// Makes an immortal object mortal again with the specified refcnt. Should only
143131
// be used during runtime finalization.
@@ -325,11 +313,12 @@ static inline void _PyObject_GC_TRACK(
325313
filename, lineno, __func__);
326314

327315
PyInterpreterState *interp = _PyInterpreterState_GET();
328-
PyGC_Head *generation0 = interp->gc.generation0;
316+
PyGC_Head *generation0 = &interp->gc.young.head;
329317
PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev);
330318
_PyGCHead_SET_NEXT(last, gc);
331319
_PyGCHead_SET_PREV(gc, last);
332320
_PyGCHead_SET_NEXT(gc, generation0);
321+
assert((gc->_gc_next & _PyGC_NEXT_MASK_OLD_SPACE_1) == 0);
333322
generation0->_gc_prev = (uintptr_t)gc;
334323
#endif
335324
}

Include/internal/pycore_runtime_init.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ extern PyTypeObject _PyExc_MemoryError;
160160
}, \
161161
.gc = { \
162162
.enabled = 1, \
163-
.generations = { \
164-
/* .head is set in _PyGC_InitState(). */ \
165-
{ .threshold = 700, }, \
166-
{ .threshold = 10, }, \
163+
.young = { .threshold = 2000, }, \
164+
.old = { \
167165
{ .threshold = 10, }, \
166+
{ .threshold = 0, }, \
168167
}, \
168+
.work_to_do = -5000, \
169169
}, \
170170
.object_state = _py_object_state_INIT(INTERP), \
171171
.dtoa = _dtoa_state_INIT(&(INTERP)), \

Lib/test/test_gc.py

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -383,19 +383,11 @@ def test_collect_generations(self):
383383
# each call to collect(N)
384384
x = []
385385
gc.collect(0)
386-
# x is now in gen 1
386+
# x is now in the old gen
387387
a, b, c = gc.get_count()
388-
gc.collect(1)
389-
# x is now in gen 2
390-
d, e, f = gc.get_count()
391-
gc.collect(2)
392-
# x is now in gen 3
393-
g, h, i = gc.get_count()
394-
# We don't check a, d, g since their exact values depends on
388+
# We don't check a since its exact values depends on
395389
# internal implementation details of the interpreter.
396390
self.assertEqual((b, c), (1, 0))
397-
self.assertEqual((e, f), (0, 1))
398-
self.assertEqual((h, i), (0, 0))
399391

400392
def test_trashcan(self):
401393
class Ouch:
@@ -846,16 +838,6 @@ def test_get_objects_generations(self):
846838
self.assertFalse(
847839
any(l is element for element in gc.get_objects(generation=2))
848840
)
849-
gc.collect(generation=1)
850-
self.assertFalse(
851-
any(l is element for element in gc.get_objects(generation=0))
852-
)
853-
self.assertFalse(
854-
any(l is element for element in gc.get_objects(generation=1))
855-
)
856-
self.assertTrue(
857-
any(l is element for element in gc.get_objects(generation=2))
858-
)
859841
gc.collect(generation=2)
860842
self.assertFalse(
861843
any(l is element for element in gc.get_objects(generation=0))
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Implements an incremental cyclic garbage collector. By collecting the old
2+
generation in increments, there is no need for a full heap scan. This can
3+
hugely reduce maximum pause time for programs with large heaps.
4+
5+
Reduces the number of generations from three to two. The old generation is
6+
split into two spaces, "aging" and "collecting".
7+
8+
Collection happens in two steps:: * First, the young generation is scanned
9+
and the survivors moved to the end of the aging space. * Then objects are
10+
taken from the collecting space, at such a rate that all cycles are
11+
collected eventually. Those objects are then scanned and the survivors
12+
moved to the end of the aging space. When the collecting space becomes
13+
empty, the two spaces are swapped.

Modules/gcmodule.c

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,12 @@ gc_set_threshold_impl(PyObject *module, int threshold0, int group_right_1,
158158
{
159159
GCState *gcstate = get_gc_state();
160160

161-
gcstate->generations[0].threshold = threshold0;
161+
gcstate->young.threshold = threshold0;
162162
if (group_right_1) {
163-
gcstate->generations[1].threshold = threshold1;
163+
gcstate->old[0].threshold = threshold1;
164164
}
165165
if (group_right_2) {
166-
gcstate->generations[2].threshold = threshold2;
167-
168-
/* generations higher than 2 get the same threshold */
169-
for (int i = 3; i < NUM_GENERATIONS; i++) {
170-
gcstate->generations[i].threshold = gcstate->generations[2].threshold;
171-
}
166+
gcstate->old[1].threshold = threshold2;
172167
}
173168
Py_RETURN_NONE;
174169
}
@@ -185,9 +180,9 @@ gc_get_threshold_impl(PyObject *module)
185180
{
186181
GCState *gcstate = get_gc_state();
187182
return Py_BuildValue("(iii)",
188-
gcstate->generations[0].threshold,
189-
gcstate->generations[1].threshold,
190-
gcstate->generations[2].threshold);
183+
gcstate->young.threshold,
184+
gcstate->old[0].threshold,
185+
0);
191186
}
192187

193188
/*[clinic input]
@@ -202,9 +197,9 @@ gc_get_count_impl(PyObject *module)
202197
{
203198
GCState *gcstate = get_gc_state();
204199
return Py_BuildValue("(iii)",
205-
gcstate->generations[0].count,
206-
gcstate->generations[1].count,
207-
gcstate->generations[2].count);
200+
gcstate->young.count,
201+
gcstate->old[gcstate->visited_space].count,
202+
gcstate->old[gcstate->visited_space^1].count);
208203
}
209204

210205
/*[clinic input]

Objects/object.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2387,6 +2387,21 @@ _Py_NewReferenceNoTotal(PyObject *op)
23872387
new_reference(op);
23882388
}
23892389

2390+
void
2391+
_Py_SetImmortal(PyObject *op)
2392+
{
2393+
if (PyObject_IS_GC(op) && _PyObject_GC_IS_TRACKED(op)) {
2394+
_PyObject_GC_UNTRACK(op);
2395+
}
2396+
#ifdef Py_GIL_DISABLED
2397+
op->ob_tid = _Py_UNOWNED_TID;
2398+
op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL;
2399+
op->ob_ref_shared = 0;
2400+
#else
2401+
op->ob_refcnt = _Py_IMMORTAL_REFCNT;
2402+
#endif
2403+
}
2404+
23902405
void
23912406
_Py_ResurrectReference(PyObject *op)
23922407
{

Objects/structseq.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,9 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
603603
PyStructSequence_Desc *desc,
604604
unsigned long tp_flags)
605605
{
606+
if (Py_TYPE(type) == NULL) {
607+
Py_SET_TYPE(type, &PyType_Type);
608+
}
606609
Py_ssize_t n_unnamed_members;
607610
Py_ssize_t n_members = count_members(desc, &n_unnamed_members);
608611
PyMemberDef *members = NULL;
@@ -618,7 +621,7 @@ _PyStructSequence_InitBuiltinWithFlags(PyInterpreterState *interp,
618621
}
619622
initialize_static_fields(type, desc, members, tp_flags);
620623

621-
_Py_SetImmortal(type);
624+
_Py_SetImmortal((PyObject *)type);
622625
}
623626
#ifndef NDEBUG
624627
else {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy