Skip to content

Commit 5255255

Browse files
committed
py: Create str/bytes objects in the parser, not the compiler.
Previous to this patch any non-interned str/bytes objects would create a special parse node that held a copy of the str/bytes data. Then in the compiler this data would be turned into a str/bytes object. This actually lead to 2 copies of the data, one in the parse node and one in the object. The parse node's copy of the data would be freed at the end of the compile stage but nevertheless it meant that the peak memory usage of the parse/compile stage was higher than it needed to be (by an amount equal to the number of bytes in all the non-interned str/bytes objects). This patch changes the behaviour so that str/bytes objects are created directly in the parser and the object stored in a const-object parse node (which already exists for bignum, float and complex const objects). This reduces peak RAM usage of the parse/compile stage, simplifies the parser and compiler, and reduces code size by about 170 bytes on Thumb2 archs, and by about 300 bytes on Xtensa archs.
1 parent f62503d commit 5255255

File tree

4 files changed

+24
-66
lines changed

4 files changed

+24
-66
lines changed

py/compile.c

Lines changed: 9 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ typedef enum {
4747
#include "py/grammar.h"
4848
#undef DEF_RULE
4949
#undef DEF_RULE_NC
50-
PN_string, // special node for non-interned string
51-
PN_bytes, // special node for non-interned bytes
5250
PN_const_object, // special node for a constant, generic Python object
5351
// define rules without a compile function
5452
#define DEF_RULE(rule, comp, kind, ...)
@@ -1880,8 +1878,6 @@ STATIC void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
18801878
} else {
18811879
// for non-REPL, evaluate then discard the expression
18821880
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
1883-
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)
1884-
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_bytes)
18851881
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)) {
18861882
// do nothing with a lonely constant
18871883
} else {
@@ -2600,33 +2596,19 @@ STATIC void compile_atom_expr_await(compiler_t *comp, mp_parse_node_struct_t *pn
26002596
}
26012597
#endif
26022598

2603-
STATIC void compile_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
2604-
// only create and load the actual str object on the last pass
2605-
if (comp->pass != MP_PASS_EMIT) {
2606-
EMIT_ARG(load_const_obj, mp_const_none);
2607-
} else {
2608-
EMIT_ARG(load_const_obj, mp_obj_new_str((const char*)pns->nodes[0], pns->nodes[1], false));
2609-
}
2610-
}
2611-
2612-
STATIC void compile_bytes(compiler_t *comp, mp_parse_node_struct_t *pns) {
2613-
// only create and load the actual bytes object on the last pass
2614-
if (comp->pass != MP_PASS_EMIT) {
2615-
EMIT_ARG(load_const_obj, mp_const_none);
2616-
} else {
2617-
EMIT_ARG(load_const_obj, mp_obj_new_bytes((const byte*)pns->nodes[0], pns->nodes[1]));
2618-
}
2619-
}
2620-
2621-
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
2599+
STATIC mp_obj_t get_const_object(mp_parse_node_struct_t *pns) {
26222600
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
26232601
// nodes are 32-bit pointers, but need to extract 64-bit object
2624-
EMIT_ARG(load_const_obj, (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
2602+
return (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
26252603
#else
2626-
EMIT_ARG(load_const_obj, (mp_obj_t)pns->nodes[0]);
2604+
return (mp_obj_t)pns->nodes[0];
26272605
#endif
26282606
}
26292607

2608+
STATIC void compile_const_object(compiler_t *comp, mp_parse_node_struct_t *pns) {
2609+
EMIT_ARG(load_const_obj, get_const_object(pns));
2610+
}
2611+
26302612
typedef void (*compile_function_t)(compiler_t*, mp_parse_node_struct_t*);
26312613
STATIC const compile_function_t compile_function[] = {
26322614
// only define rules with a compile function
@@ -2637,8 +2619,6 @@ STATIC const compile_function_t compile_function[] = {
26372619
#undef c
26382620
#undef DEF_RULE
26392621
#undef DEF_RULE_NC
2640-
compile_string,
2641-
compile_bytes,
26422622
compile_const_object,
26432623
};
26442624

@@ -2891,7 +2871,8 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
28912871
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
28922872
if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0])
28932873
&& MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING)
2894-
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
2874+
|| (MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_const_object)
2875+
&& MP_OBJ_IS_STR(get_const_object((mp_parse_node_struct_t*)pns->nodes[0])))) {
28952876
// compile the doc string
28962877
compile_node(comp, pns->nodes[0]);
28972878
// store the doc string

py/emitinlinethumb.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@ typedef enum {
4242
#include "py/grammar.h"
4343
#undef DEF_RULE
4444
#undef DEF_RULE_NC
45-
PN_string, // special node for non-interned string
46-
PN_bytes, // special node for non-interned bytes
4745
PN_const_object, // special node for a constant, generic Python object
4846
// define rules without a compile function
4947
#define DEF_RULE(rule, comp, kind, ...)

py/parse.c

Lines changed: 13 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "py/runtime0.h"
3939
#include "py/runtime.h"
4040
#include "py/objint.h"
41+
#include "py/objstr.h"
4142
#include "py/builtin.h"
4243

4344
#if MICROPY_ENABLE_COMPILER
@@ -75,8 +76,6 @@ enum {
7576
#include "py/grammar.h"
7677
#undef DEF_RULE
7778
#undef DEF_RULE_NC
78-
RULE_string, // special node for non-interned string
79-
RULE_bytes, // special node for non-interned bytes
8079
RULE_const_object, // special node for a constant, generic Python object
8180

8281
// define rules without a compile function
@@ -123,8 +122,6 @@ STATIC const rule_t *const rules[] = {
123122
#include "py/grammar.h"
124123
#undef DEF_RULE
125124
#undef DEF_RULE_NC
126-
NULL, // RULE_string
127-
NULL, // RULE_bytes
128125
NULL, // RULE_const_object
129126

130127
// define rules without a compile function
@@ -326,11 +323,7 @@ void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
326323
} else {
327324
// node must be a mp_parse_node_struct_t
328325
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
329-
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
330-
printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
331-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
332-
printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
333-
} else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
326+
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
334327
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
335328
printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
336329
#else
@@ -392,21 +385,6 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
392385
parser->result_stack[parser->result_stack_top++] = pn;
393386
}
394387

395-
STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
396-
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
397-
if (pn == NULL) {
398-
parser->parse_error = PARSE_ERROR_MEMORY;
399-
return MP_PARSE_NODE_NULL;
400-
}
401-
pn->source_line = src_line;
402-
pn->kind_num_nodes = rule_kind | (2 << 8);
403-
char *p = m_new(char, len);
404-
memcpy(p, str, len);
405-
pn->nodes[0] = (uintptr_t)p;
406-
pn->nodes[1] = len;
407-
return (mp_parse_node_t)pn;
408-
}
409-
410388
STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
411389
mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
412390
if (pn == NULL) {
@@ -473,8 +451,11 @@ STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
473451
// qstr exists, make a leaf node
474452
pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
475453
} else {
476-
// not interned, make a node holding a pointer to the string/bytes data
477-
pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
454+
// not interned, make a node holding a pointer to the string/bytes object
455+
mp_obj_t o = mp_obj_new_str_of_type(
456+
lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
457+
(const byte*)lex->vstr.buf, lex->vstr.len);
458+
pn = make_node_const_object(parser, lex->tok_line, o);
478459
}
479460
} else {
480461
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
@@ -934,15 +915,13 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
934915
// this code discards lonely statements, such as doc strings
935916
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
936917
mp_parse_node_t p = peek_result(&parser, 1);
937-
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
918+
if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p))
919+
|| MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) {
938920
pop_result(&parser); // MP_PARSE_NODE_NULL
939-
mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
940-
if (MP_PARSE_NODE_IS_STRUCT(pn)) {
941-
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
942-
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
943-
m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
944-
}
945-
}
921+
pop_result(&parser); // const expression (leaf or RULE_const_object)
922+
// Pushing the "pass" rule here will overwrite any RULE_const_object
923+
// entry that was on the result stack, allowing the GC to reclaim
924+
// the memory from the const object when needed.
946925
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
947926
break;
948927
}

tests/cmdline/cmd_parsetree.py.exp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
str(str)
1616
[ 8] rule(5) (n=2)
1717
id(c)
18-
[ 8] literal str(a very long str that will not be interned)
18+
[ 8] literal \.\+
1919
[ 9] rule(5) (n=2)
2020
id(d)
2121
bytes(bytes)
2222
[ 10] rule(5) (n=2)
2323
id(e)
24-
[ 10] literal bytes(a very long bytes that will not be interned)
24+
[ 10] literal \.\+
2525
[ 11] rule(5) (n=2)
2626
id(f)
2727
[ 11] literal \.\+

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy