From c3e40d50abdf46a3a4d03f330909bcb70687a1c0 Mon Sep 17 00:00:00 2001 From: Artyom Skrobov Date: Tue, 6 Apr 2021 12:33:03 -0400 Subject: [PATCH] [qstr] Separate hash and len from string data This allows the compiler to merge strings: e.g. "update", "difference_update" and "symmetric_difference_update" will all point to the same memory. Shaves ~1KB off the image size, and potentially allows bigger savings if qstr attrs are initialized in qstr_init(), and not stored in the image. --- py/makeqstrdata.py | 28 +++----- py/mpstate.h | 2 +- py/qstr.c | 132 +++++++++++++++++----------------- py/qstr.h | 20 +++++- supervisor/shared/translate.c | 2 +- tools/mpy-tool.py | 27 ++++--- 6 files changed, 113 insertions(+), 98 deletions(-) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 0ad749005cc11..2de42b14d538f 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -456,27 +456,24 @@ def parse_input_headers(infiles): return qcfgs, qstrs, i18ns +def escape_bytes(qstr): + if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): + # qstr is all printable ASCII so render it as-is (for easier debugging) + return qstr + else: + # qstr contains non-printable codes so render entire thing as hex pairs + qbytes = bytes_cons(qstr, "utf8") + return "".join(("\\x%02x" % b) for b in qbytes) def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): qbytes = bytes_cons(qstr, "utf8") qlen = len(qbytes) qhash = compute_hash(qbytes, cfg_bytes_hash) - if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): - # qstr is all printable ASCII so render it as-is (for easier debugging) - qdata = qstr - else: - # qstr contains non-printable codes so render entire thing as hex pairs - qdata = "".join(("\\x%02x" % b) for b in qbytes) if qlen >= (1 << (8 * cfg_bytes_len)): print("qstr is too long:", qstr) assert False - qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple( - ((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len) - ) - qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple( - ((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash) - ) - return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) + qdata = escape_bytes(qstr) + return '%d, %d, "%s"' % (qhash, qlen, qdata) def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns): @@ -489,10 +486,7 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns): print("") # add NULL qstr with no hash or data - print( - 'QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' - % ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len) - ) + print('QDEF(MP_QSTR_NULL, 0, 0, "")') total_qstr_size = 0 total_qstr_compressed_size = 0 diff --git a/py/mpstate.h b/py/mpstate.h index 139f23cb4f2b8..1f6a2d00a141f 100644 --- a/py/mpstate.h +++ b/py/mpstate.h @@ -197,7 +197,7 @@ typedef struct _mp_state_vm_t { // pointer and sizes to store interned string data // (qstr_last_chunk can be root pointer but is also stored in qstr pool) - byte *qstr_last_chunk; + char *qstr_last_chunk; size_t qstr_last_alloc; size_t qstr_last_used; diff --git a/py/qstr.c b/py/qstr.c index 2ae65379c749f..af5ea58686540 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -37,7 +37,6 @@ // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings) // ultimately we will replace this with a static hash table of some kind -// also probably need to include the length in the string data, to allow null bytes in the string #if MICROPY_DEBUG_VERBOSE // print debugging info #define DEBUG_printf DEBUG_printf @@ -46,34 +45,9 @@ #endif // A qstr is an index into the qstr pool. -// The data for a qstr contains (hash, length, data): -// - hash (configurable number of bytes) -// - length (configurable number of bytes) -// - data ("length" number of bytes) -// - \0 terminated (so they can be printed using printf) - -#if MICROPY_QSTR_BYTES_IN_HASH == 1 - #define Q_HASH_MASK (0xff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0]) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0) -#elif MICROPY_QSTR_BYTES_IN_HASH == 2 - #define Q_HASH_MASK (0xffff) - #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8)) - #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0) -#else - #error unimplemented qstr hash decoding -#endif -#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1) -#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN) -#if MICROPY_QSTR_BYTES_IN_LEN == 1 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH]) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0) -#elif MICROPY_QSTR_BYTES_IN_LEN == 2 - #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8)) - #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0) -#else - #error unimplemented qstr length decoding -#endif +// The data for a qstr is \0 terminated (so they can be printed using printf) + +#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1) #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1) @@ -98,14 +72,25 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) { return hash; } +const qstr_attr_t mp_qstr_const_attr[] = { + #ifndef NO_QSTR +#define QDEF(id, hash, len, str) { hash, len }, +#define TRANSLATION(id, length, compressed ...) + #include "genhdr/qstrdefs.generated.h" +#undef TRANSLATION +#undef QDEF + #endif +}; + const qstr_pool_t mp_qstr_const_pool = { NULL, // no previous pool 0, // no previous pool 10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below) MP_QSTRnumber_of, // corresponds to number of strings in array just below + (qstr_attr_t *)mp_qstr_const_attr, { #ifndef NO_QSTR -#define QDEF(id, str) str, +#define QDEF(id, hash, len, str) str, #define TRANSLATION(id, length, compressed ...) #include "genhdr/qstrdefs.generated.h" #undef TRANSLATION @@ -130,20 +115,22 @@ void qstr_init(void) { #endif } -STATIC const byte *find_qstr(qstr q) { +STATIC const char *find_qstr(qstr q, qstr_attr_t *attr) { // search pool for this qstr // total_prev_len==0 in the final pool, so the loop will always terminate qstr_pool_t *pool = MP_STATE_VM(last_pool); while (q < pool->total_prev_len) { pool = pool->prev; } - assert(q - pool->total_prev_len < pool->len); - return pool->qstrs[q - pool->total_prev_len]; + q -= pool->total_prev_len; + assert(q < pool->len); + *attr = pool->attrs[q]; + return pool->qstrs[q]; } // qstr_mutex must be taken while in this function -STATIC qstr qstr_add(const byte *q_ptr) { - DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr)); +STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { + DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr); // make sure we have room in the pool for a new qstr if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) { @@ -151,11 +138,14 @@ STATIC qstr qstr_add(const byte *q_ptr) { if (new_pool_length > MICROPY_QSTR_POOL_MAX_ENTRIES) { new_pool_length = MICROPY_QSTR_POOL_MAX_ENTRIES; } - qstr_pool_t *pool = m_new_ll_obj_var_maybe(qstr_pool_t, const char *, new_pool_length); - if (pool == NULL) { + mp_uint_t pool_size = sizeof(qstr_pool_t) + sizeof(const char *) * new_pool_length; + void *chunk = m_malloc_maybe(pool_size + sizeof(qstr_attr_t) * new_pool_length, true); + if (chunk == NULL) { QSTR_EXIT(); m_malloc_fail(new_pool_length); } + qstr_pool_t *pool = (qstr_pool_t *)chunk; + pool->attrs = (qstr_attr_t *)(void *)((char *)chunk + pool_size); pool->prev = MP_STATE_VM(last_pool); pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_pool_length; @@ -165,10 +155,14 @@ STATIC qstr qstr_add(const byte *q_ptr) { } // add the new qstr - MP_STATE_VM(last_pool)->qstrs[MP_STATE_VM(last_pool)->len++] = q_ptr; + mp_uint_t at = MP_STATE_VM(last_pool)->len; + MP_STATE_VM(last_pool)->attrs[at].hash = hash; + MP_STATE_VM(last_pool)->attrs[at].len = len; + MP_STATE_VM(last_pool)->qstrs[at] = q_ptr; + MP_STATE_VM(last_pool)->len++; // return id for the newly-added qstr - return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1; + return MP_STATE_VM(last_pool)->total_prev_len + at; } qstr qstr_find_strn(const char *str, size_t str_len) { @@ -177,9 +171,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) { - return pool->total_prev_len + (q - pool->qstrs); + qstr_attr_t *attrs = pool->attrs; + for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + if (attrs[at].hash == str_hash && attrs[at].len == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { + return pool->total_prev_len + at; } } } @@ -200,14 +195,14 @@ qstr qstr_from_strn(const char *str, size_t len) { // qstr does not exist in interned pool so need to add it // compute number of bytes needed to intern this string - size_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1; + size_t n_bytes = len + 1; if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) { // not enough room at end of previously interned string so try to grow - byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); + char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false); if (new_p == NULL) { // could not grow existing memory; shrink it to fit previous - (void)m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); + (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false); MP_STATE_VM(qstr_last_chunk) = NULL; } else { // could grow existing memory @@ -221,10 +216,10 @@ qstr qstr_from_strn(const char *str, size_t len) { if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) { al = MICROPY_ALLOC_QSTR_CHUNK_INIT; } - MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, al); + MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(char, al); if (MP_STATE_VM(qstr_last_chunk) == NULL) { // failed to allocate a large chunk so try with exact size - MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, n_bytes); + MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(char, n_bytes); if (MP_STATE_VM(qstr_last_chunk) == NULL) { QSTR_EXIT(); m_malloc_fail(n_bytes); @@ -236,39 +231,41 @@ qstr qstr_from_strn(const char *str, size_t len) { } // allocate memory from the chunk for this new interned string's data - byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); + char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used); MP_STATE_VM(qstr_last_used) += n_bytes; // store the interned strings' data mp_uint_t hash = qstr_compute_hash((const byte *)str, len); - Q_SET_HASH(q_ptr, hash); - Q_SET_LENGTH(q_ptr, len); - memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len); - q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0'; - q = qstr_add(q_ptr); + memcpy(q_ptr, str, len); + q_ptr[len] = '\0'; + q = qstr_add(hash, len, q_ptr); } QSTR_EXIT(); return q; } mp_uint_t PLACE_IN_ITCM(qstr_hash)(qstr q) { - return Q_GET_HASH(find_qstr(q)); + qstr_attr_t attr; + find_qstr(q, &attr); + return attr.hash; } size_t qstr_len(qstr q) { - const byte *qd = find_qstr(q); - return Q_GET_LENGTH(qd); + qstr_attr_t attr; + find_qstr(q, &attr); + return attr.len; } const char *qstr_str(qstr q) { - const byte *qd = find_qstr(q); - return (const char *)Q_GET_DATA(qd); + qstr_attr_t attr; + return find_qstr(q, &attr); } const byte *qstr_data(qstr q, size_t *len) { - const byte *qd = find_qstr(q); - *len = Q_GET_LENGTH(qd); - return Q_GET_DATA(qd); + qstr_attr_t attr; + const char *qd = find_qstr(q, &attr); + *len = attr.len; + return (byte *)qd; } void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) { @@ -280,13 +277,14 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { *n_pool += 1; *n_qstr += pool->len; - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - *n_str_data_bytes += Q_GET_ALLOC(*q); + for (const qstr_attr_t *q = pool->attrs, *q_top = pool->attrs + pool->len; q < q_top; q++) { + *n_str_data_bytes += sizeof(*q) + q->len + 1; } #if MICROPY_ENABLE_GC - *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap + // this counts actual bytes used in heap + *n_total_bytes += gc_nbytes(pool) - sizeof(qstr_attr_t) * pool->alloc; #else - *n_total_bytes += sizeof(qstr_pool_t) + sizeof(qstr) * pool->alloc; + *n_total_bytes += sizeof(qstr_pool_t) + sizeof(const char *) * pool->alloc; #endif } *n_total_bytes += *n_str_data_bytes; @@ -297,8 +295,8 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si void qstr_dump_data(void) { QSTR_ENTER(); for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) { - for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { - mp_printf(&mp_plat_print, "Q(%s)\n", Q_GET_DATA(*q)); + for (const char **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) { + mp_printf(&mp_plat_print, "Q(%s)\n", *q); } } QSTR_EXIT(); diff --git a/py/qstr.h b/py/qstr.h index c86e74324cc62..288bf5dbac061 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -47,12 +47,30 @@ enum { typedef size_t qstr; +typedef struct _qstr_attr_t { + #if MICROPY_QSTR_BYTES_IN_HASH == 1 + uint8_t hash; + #elif MICROPY_QSTR_BYTES_IN_HASH == 2 + uint16_t hash; + #else + #error unimplemented qstr hash decoding + #endif + #if MICROPY_QSTR_BYTES_IN_LEN == 1 + uint8_t len; + #elif MICROPY_QSTR_BYTES_IN_LEN == 2 + uint16_t len; + #else + #error unimplemented qstr length decoding + #endif +} qstr_attr_t; + typedef struct _qstr_pool_t { struct _qstr_pool_t *prev; size_t total_prev_len; size_t alloc; size_t len; - const byte *qstrs[]; + qstr_attr_t *attrs; + const char *qstrs[]; } qstr_pool_t; #define QSTR_FROM_STR_STATIC(s) (qstr_from_strn((s), strlen(s))) diff --git a/supervisor/shared/translate.c b/supervisor/shared/translate.c index c99cbf180d773..7afbd43e2159a 100644 --- a/supervisor/shared/translate.c +++ b/supervisor/shared/translate.c @@ -129,7 +129,7 @@ __attribute__((always_inline)) #endif const compressed_string_t *translate(const char *original) { #ifndef NO_QSTR - #define QDEF(id, str) + #define QDEF(id, hash, len, str) #define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else #include "genhdr/qstrdefs.generated.h" #undef TRANSLATION diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index c989b63007091..42d62fdf118bb 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -607,6 +607,20 @@ def freeze_mpy(base_qstrs, raw_codes): print(" MP_QSTR_%s," % new[i][1]) print("};") + print() + print("const qstr_attr_t mp_qstr_frozen_const_attr[] = {") + qstr_size = {"metadata": 0, "data": 0} + for _, _, qstr in new: + qbytes = qstrutil.bytes_cons(qstr, "utf8") + print(" {%d, %d}," % ( + qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH), + len(qbytes) + )) + qstr_size["metadata"] += ( + config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + ) + qstr_size["data"] += len(qbytes) + print("};") print() print("extern const qstr_pool_t mp_qstr_const_pool;") print("const qstr_pool_t mp_qstr_frozen_const_pool = {") @@ -614,19 +628,10 @@ def freeze_mpy(base_qstrs, raw_codes): print(" MP_QSTRnumber_of, // previous pool size") print(" %u, // allocated entries" % len(new)) print(" %u, // used entries" % len(new)) + print(" (qstr_attr_t *)mp_qstr_frozen_const_attr,") print(" {") - qstr_size = {"metadata": 0, "data": 0} for _, _, qstr in new: - qstr_size["metadata"] += ( - config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH - ) - qstr_size["data"] += len(qstr) - print( - " %s," - % qstrutil.make_bytes( - config.MICROPY_QSTR_BYTES_IN_LEN, config.MICROPY_QSTR_BYTES_IN_HASH, qstr - ) - ) + print(" \"%s\"," % qstrutil.escape_bytes(qstr)) print(" },") print("};") pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy