Skip to content

Commit 416abe3

Browse files
committed
Introduce a long lived section of the heap.
This adapts the allocation process to start from either end of the heap when searching for free space. The default behavior is identical to the existing behavior where it starts with the lowest block and looks higher. Now it can also look from the highest block and lower depending on the long_lived parameter to gc_alloc. As the heap fills, the two sections may overlap. When they overlap, a collect may be triggered in order to keep the long lived section compact. However, free space is always eligable for each type of allocation. By starting from either of the end of the heap we have ability to separate short lived objects from long lived ones. This separation reduces heap fragmentation because long lived objects are easy to densely pack. Most objects are short lived initially but may be made long lived when they are referenced by a type or module. This involves copying the memory and then letting the collect phase free the old portion. QSTR pools and chunks are always long lived because they are never freed. The reallocation, collection and free processes are largely unchanged. They simply also maintain an index to the highest free block as well as the lowest. These indices are used to speed up the allocation search until the next collect. In practice, this change may slightly slow down import statements with the benefit that memory is much less fragmented afterwards. For example, a test import into a 20k heap that leaves ~6k free previously had the largest continuous free space of ~400 bytes. After this change, the largest continuous free space is over 3400 bytes.
1 parent 56bd078 commit 416abe3

File tree

14 files changed

+378
-69
lines changed

14 files changed

+378
-69
lines changed

py/builtinimport.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include <assert.h>
3131

3232
#include "py/compile.h"
33+
#include "py/gc_long_lived.h"
3334
#include "py/objmodule.h"
3435
#include "py/persistentcode.h"
3536
#include "py/runtime.h"
@@ -144,6 +145,7 @@ STATIC void do_load_from_lexer(mp_obj_t module_obj, mp_lexer_t *lex) {
144145
// parse, compile and execute the module in its context
145146
mp_obj_dict_t *mod_globals = mp_obj_module_get_globals(module_obj);
146147
mp_parse_compile_execute(lex, MP_PARSE_FILE_INPUT, mod_globals, mod_globals);
148+
mp_obj_module_set_globals(module_obj, make_dict_long_lived(mod_globals, 10));
147149
}
148150
#endif
149151

@@ -173,6 +175,8 @@ STATIC void do_execute_raw_code(mp_obj_t module_obj, mp_raw_code_t *raw_code) {
173175

174176
// finish nlr block, restore context
175177
nlr_pop();
178+
mp_obj_module_set_globals(module_obj,
179+
make_dict_long_lived(mp_obj_module_get_globals(module_obj), 10));
176180
mp_globals_set(old_globals);
177181
mp_locals_set(old_locals);
178182
} else {
@@ -468,6 +472,10 @@ mp_obj_t mp_builtin___import__(size_t n_args, const mp_obj_t *args) {
468472
if (outer_module_obj != MP_OBJ_NULL) {
469473
qstr s = qstr_from_strn(mod_str + last, i - last);
470474
mp_store_attr(outer_module_obj, s, module_obj);
475+
// The above store can cause a dictionary rehash and new allocation. So,
476+
// lets make sure the globals dictionary is still long lived.
477+
mp_obj_module_set_globals(outer_module_obj,
478+
make_dict_long_lived(mp_obj_module_get_globals(outer_module_obj), 10));
471479
}
472480
outer_module_obj = module_obj;
473481
if (top_module_obj == MP_OBJ_NULL) {

py/gc.c

Lines changed: 140 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@
4141
#define DEBUG_printf(...) (void)0
4242
#endif
4343

44+
// Uncomment this if you want to use a debugger to capture state at every allocation and free.
45+
// #define LOG_HEAP_ACTIVITY 1
46+
4447
// make this 1 to dump the heap each time it changes
4548
#define EXTENSIVE_HEAP_PROFILING (0)
4649

@@ -59,15 +62,6 @@
5962
#define AT_MARK (3)
6063

6164
#define BLOCKS_PER_ATB (4)
62-
#define ATB_MASK_0 (0x03)
63-
#define ATB_MASK_1 (0x0c)
64-
#define ATB_MASK_2 (0x30)
65-
#define ATB_MASK_3 (0xc0)
66-
67-
#define ATB_0_IS_FREE(a) (((a) & ATB_MASK_0) == 0)
68-
#define ATB_1_IS_FREE(a) (((a) & ATB_MASK_1) == 0)
69-
#define ATB_2_IS_FREE(a) (((a) & ATB_MASK_2) == 0)
70-
#define ATB_3_IS_FREE(a) (((a) & ATB_MASK_3) == 0)
7165

7266
#define BLOCK_SHIFT(block) (2 * ((block) & (BLOCKS_PER_ATB - 1)))
7367
#define ATB_GET_KIND(block) ((MP_STATE_MEM(gc_alloc_table_start)[(block) / BLOCKS_PER_ATB] >> BLOCK_SHIFT(block)) & 3)
@@ -152,14 +146,19 @@ void gc_init(void *start, void *end) {
152146
memset(MP_STATE_MEM(gc_finaliser_table_start), 0, gc_finaliser_table_byte_len);
153147
#endif
154148

155-
// set last free ATB index to start of heap
149+
// Set first free ATB index to the start of the heap.
156150
MP_STATE_MEM(gc_last_free_atb_index) = 0;
151+
// Set last free ATB index to the end of the heap.
152+
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
153+
// Set the lowest long lived ptr to the end of the heap to start. This will be lowered as long
154+
// lived objects are allocated.
155+
MP_STATE_MEM(gc_lowest_long_lived_ptr) = (void*) PTR_FROM_BLOCK(MP_STATE_MEM(gc_alloc_table_byte_len * BLOCKS_PER_ATB));
157156

158157
// unlock the GC
159158
MP_STATE_MEM(gc_lock_depth) = 0;
160159

161160
// allow auto collection
162-
MP_STATE_MEM(gc_auto_collect_enabled) = 1;
161+
MP_STATE_MEM(gc_auto_collect_enabled) = true;
163162

164163
#if MICROPY_GC_ALLOC_THRESHOLD
165164
// by default, maxuint for gc threshold, effectively turning gc-by-threshold off
@@ -288,6 +287,7 @@ STATIC void gc_sweep(void) {
288287
}
289288
#endif
290289
free_tail = 1;
290+
ATB_ANY_TO_FREE(block);
291291
DEBUG_printf("gc_sweep(%x)\n", PTR_FROM_BLOCK(block));
292292

293293
#ifdef LOG_HEAP_ACTIVITY
@@ -296,7 +296,7 @@ STATIC void gc_sweep(void) {
296296
#if MICROPY_PY_GC_COLLECT_RETVAL
297297
MP_STATE_MEM(gc_collected)++;
298298
#endif
299-
// fall through to free the head
299+
break;
300300

301301
case AT_TAIL:
302302
if (free_tail) {
@@ -338,7 +338,8 @@ void gc_collect_root(void **ptrs, size_t len) {
338338
void gc_collect_end(void) {
339339
gc_deal_with_stack_overflow();
340340
gc_sweep();
341-
MP_STATE_MEM(gc_last_free_atb_index) = 0;
341+
MP_STATE_MEM(gc_first_free_atb_index) = 0;
342+
MP_STATE_MEM(gc_last_free_atb_index) = MP_STATE_MEM(gc_alloc_table_byte_len) - 1;
342343
MP_STATE_MEM(gc_lock_depth)--;
343344
GC_EXIT();
344345
}
@@ -407,7 +408,9 @@ void gc_info(gc_info_t *info) {
407408
GC_EXIT();
408409
}
409410

410-
void *gc_alloc(size_t n_bytes, bool has_finaliser) {
411+
// We place long lived objects at the end of the heap rather than the start. This reduces
412+
// fragmentation by localizing the heap churn to one portion of memory (the start of the heap.)
413+
void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived) {
411414
size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK;
412415
DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks);
413416

@@ -424,29 +427,62 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
424427
return NULL;
425428
}
426429

427-
size_t i;
430+
size_t found_block = 0xffffffff;
428431
size_t end_block;
429432
size_t start_block;
430-
size_t n_free = 0;
431-
int collected = !MP_STATE_MEM(gc_auto_collect_enabled);
433+
size_t n_free;
434+
bool collected = !MP_STATE_MEM(gc_auto_collect_enabled);
432435

433436
#if MICROPY_GC_ALLOC_THRESHOLD
434437
if (!collected && MP_STATE_MEM(gc_alloc_amount) >= MP_STATE_MEM(gc_alloc_threshold)) {
435438
GC_EXIT();
436439
gc_collect();
437440
GC_ENTER();
441+
collected = true;
438442
}
439443
#endif
440444

441-
for (;;) {
442-
445+
bool keep_looking = true;
446+
447+
// When we start searching on the other side of the crossover block we make sure to
448+
// perform a collect. That way we'll get the closest free block in our section.
449+
size_t crossover_block = BLOCK_FROM_PTR(MP_STATE_MEM(gc_lowest_long_lived_ptr));
450+
while (keep_looking) {
451+
int8_t direction = 1;
452+
size_t start = MP_STATE_MEM(gc_first_free_atb_index);
453+
if (long_lived) {
454+
direction = -1;
455+
start = MP_STATE_MEM(gc_last_free_atb_index);
456+
}
457+
n_free = 0;
443458
// look for a run of n_blocks available blocks
444-
for (i = MP_STATE_MEM(gc_last_free_atb_index); i < MP_STATE_MEM(gc_alloc_table_byte_len); i++) {
459+
for (size_t i = start; keep_looking && MP_STATE_MEM(gc_first_free_atb_index) <= i && i <= MP_STATE_MEM(gc_last_free_atb_index); i += direction) {
445460
byte a = MP_STATE_MEM(gc_alloc_table_start)[i];
446-
if (ATB_0_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 0; goto found; } } else { n_free = 0; }
447-
if (ATB_1_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 1; goto found; } } else { n_free = 0; }
448-
if (ATB_2_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 2; goto found; } } else { n_free = 0; }
449-
if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; }
461+
// Four ATB states are packed into a single byte.
462+
int j = 0;
463+
if (direction == -1) {
464+
j = 3;
465+
}
466+
for (; keep_looking && 0 <= j && j <= 3; j += direction) {
467+
if ((a & (0x3 << (j * 2))) == 0) {
468+
if (++n_free >= n_blocks) {
469+
found_block = i * BLOCKS_PER_ATB + j;
470+
keep_looking = false;
471+
}
472+
} else {
473+
if (!collected) {
474+
size_t block = i * BLOCKS_PER_ATB + j;
475+
if ((direction == 1 && block >= crossover_block) ||
476+
(direction == -1 && block < crossover_block)) {
477+
keep_looking = false;
478+
}
479+
}
480+
n_free = 0;
481+
}
482+
}
483+
}
484+
if (n_free >= n_blocks) {
485+
break;
450486
}
451487

452488
GC_EXIT();
@@ -456,23 +492,31 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
456492
}
457493
DEBUG_printf("gc_alloc(" UINT_FMT "): no free mem, triggering GC\n", n_bytes);
458494
gc_collect();
459-
collected = 1;
495+
collected = true;
496+
// Try again since we've hopefully freed up space.
497+
keep_looking = true;
460498
GC_ENTER();
461499
}
500+
assert(found_block != 0xffffffff);
462501

463-
// found, ending at block i inclusive
464-
found:
465-
// get starting and end blocks, both inclusive
466-
end_block = i;
467-
start_block = i - n_free + 1;
468-
469-
// Set last free ATB index to block after last block we found, for start of
502+
// Found free space ending at found_block inclusive.
503+
// Also, set last free ATB index to block after last block we found, for start of
470504
// next scan. To reduce fragmentation, we only do this if we were looking
471505
// for a single free block, which guarantees that there are no free blocks
472-
// before this one. Also, whenever we free or shink a block we must check
506+
// before this one. Also, whenever we free or shrink a block we must check
473507
// if this index needs adjusting (see gc_realloc and gc_free).
474-
if (n_free == 1) {
475-
MP_STATE_MEM(gc_last_free_atb_index) = (i + 1) / BLOCKS_PER_ATB;
508+
if (!long_lived) {
509+
end_block = found_block;
510+
start_block = found_block - n_free + 1;
511+
if (n_blocks == 1) {
512+
MP_STATE_MEM(gc_first_free_atb_index) = (found_block + 1) / BLOCKS_PER_ATB;
513+
}
514+
} else {
515+
start_block = found_block;
516+
end_block = found_block + n_free - 1;
517+
if (n_blocks == 1) {
518+
MP_STATE_MEM(gc_last_free_atb_index) = (found_block - 1) / BLOCKS_PER_ATB;
519+
}
476520
}
477521

478522
#ifdef LOG_HEAP_ACTIVITY
@@ -493,6 +537,13 @@ void *gc_alloc(size_t n_bytes, bool has_finaliser) {
493537
void *ret_ptr = (void*)(MP_STATE_MEM(gc_pool_start) + start_block * BYTES_PER_BLOCK);
494538
DEBUG_printf("gc_alloc(%p)\n", ret_ptr);
495539

540+
// If the allocation was long live then update the lowest value. Its used to trigger early
541+
// collects when allocations fail in their respective section. Its also used to ignore calls to
542+
// gc_make_long_lived where the pointer is already in the long lived section.
543+
if (long_lived && ret_ptr < MP_STATE_MEM(gc_lowest_long_lived_ptr)) {
544+
MP_STATE_MEM(gc_lowest_long_lived_ptr) = ret_ptr;
545+
}
546+
496547
#if MICROPY_GC_ALLOC_THRESHOLD
497548
MP_STATE_MEM(gc_alloc_amount) += n_blocks;
498549
#endif
@@ -566,7 +617,10 @@ void gc_free(void *ptr) {
566617
#endif
567618

568619
// set the last_free pointer to this block if it's earlier in the heap
569-
if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) {
620+
if (block / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
621+
MP_STATE_MEM(gc_first_free_atb_index) = block / BLOCKS_PER_ATB;
622+
}
623+
if (block / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
570624
MP_STATE_MEM(gc_last_free_atb_index) = block / BLOCKS_PER_ATB;
571625
}
572626

@@ -607,6 +661,50 @@ size_t gc_nbytes(const void *ptr) {
607661
return 0;
608662
}
609663

664+
bool gc_has_finaliser(const void *ptr) {
665+
#if MICROPY_ENABLE_FINALISER
666+
GC_ENTER();
667+
if (VERIFY_PTR(ptr)) {
668+
bool has_finaliser = FTB_GET(BLOCK_FROM_PTR(ptr));
669+
GC_EXIT();
670+
return has_finaliser;
671+
}
672+
673+
// invalid pointer
674+
GC_EXIT();
675+
#else
676+
(void) ptr;
677+
#endif
678+
return false;
679+
}
680+
681+
void *gc_make_long_lived(void *old_ptr) {
682+
// If its already in the long lived section then don't bother moving it.
683+
if (old_ptr >= MP_STATE_MEM(gc_lowest_long_lived_ptr)) {
684+
return old_ptr;
685+
}
686+
size_t n_bytes = gc_nbytes(old_ptr);
687+
if (n_bytes == 0) {
688+
return old_ptr;
689+
}
690+
bool has_finaliser = gc_has_finaliser(old_ptr);
691+
692+
// Try and find a new area in the long lived section to copy the memory to.
693+
void* new_ptr = gc_alloc(n_bytes, has_finaliser, true);
694+
if (new_ptr == NULL) {
695+
return old_ptr;
696+
} else if (old_ptr > new_ptr) {
697+
// Return the old pointer if the new one is lower in the heap and free the new space.
698+
gc_free(new_ptr);
699+
return old_ptr;
700+
}
701+
// We copy everything over and let the garbage collection process delete the old copy. That way
702+
// we ensure we don't delete memory that has a second reference. (Though if there is we may
703+
// confuse things when its mutable.)
704+
memcpy(new_ptr, old_ptr, n_bytes);
705+
return new_ptr;
706+
}
707+
610708
#if 0
611709
// old, simple realloc that didn't expand memory in place
612710
void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
@@ -639,7 +737,7 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) {
639737
void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
640738
// check for pure allocation
641739
if (ptr_in == NULL) {
642-
return gc_alloc(n_bytes, false);
740+
return gc_alloc(n_bytes, false, false);
643741
}
644742

645743
// check for pure free
@@ -714,7 +812,10 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
714812
}
715813

716814
// set the last_free pointer to end of this block if it's earlier in the heap
717-
if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_last_free_atb_index)) {
815+
if ((block + new_blocks) / BLOCKS_PER_ATB < MP_STATE_MEM(gc_first_free_atb_index)) {
816+
MP_STATE_MEM(gc_first_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
817+
}
818+
if ((block + new_blocks) / BLOCKS_PER_ATB > MP_STATE_MEM(gc_last_free_atb_index)) {
718819
MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
719820
}
720821

@@ -774,7 +875,7 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
774875
}
775876

776877
// can't resize inplace; try to find a new contiguous chain
777-
void *ptr_out = gc_alloc(n_bytes, ftb_state);
878+
void *ptr_out = gc_alloc(n_bytes, ftb_state, false);
778879

779880
// check that the alloc succeeded
780881
if (ptr_out == NULL) {

py/gc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,11 @@ void gc_collect_start(void);
4545
void gc_collect_root(void **ptrs, size_t len);
4646
void gc_collect_end(void);
4747

48-
void *gc_alloc(size_t n_bytes, bool has_finaliser);
48+
void *gc_alloc(size_t n_bytes, bool has_finaliser, bool long_lived);
4949
void gc_free(void *ptr); // does not call finaliser
5050
size_t gc_nbytes(const void *ptr);
51+
bool gc_has_finaliser(const void *ptr);
52+
void *gc_make_long_lived(void *old_ptr);
5153
void *gc_realloc(void *ptr, size_t n_bytes, bool allow_move);
5254

5355
typedef struct _gc_info_t {

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy