Skip to content

[qstr] Separate hash and len from string data #4583

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 11 additions & 17 deletions py/makeqstrdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,27 +456,24 @@ def parse_input_headers(infiles):

return qcfgs, qstrs, i18ns

def escape_bytes(qstr):
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
return qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
qbytes = bytes_cons(qstr, "utf8")
return "".join(("\\x%02x" % b) for b in qbytes)

def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qbytes = bytes_cons(qstr, "utf8")
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging)
qdata = qstr
else:
# qstr contains non-printable codes so render entire thing as hex pairs
qdata = "".join(("\\x%02x" % b) for b in qbytes)
if qlen >= (1 << (8 * cfg_bytes_len)):
print("qstr is too long:", qstr)
assert False
qlen_str = ("\\x%02x" * cfg_bytes_len) % tuple(
((qlen >> (8 * i)) & 0xFF) for i in range(cfg_bytes_len)
)
qhash_str = ("\\x%02x" * cfg_bytes_hash) % tuple(
((qhash >> (8 * i)) & 0xFF) for i in range(cfg_bytes_hash)
)
return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata)
qdata = escape_bytes(qstr)
return '%d, %d, "%s"' % (qhash, qlen, qdata)


def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
Expand All @@ -489,10 +486,7 @@ def print_qstr_data(encoding_table, qcfgs, qstrs, i18ns):
print("")

# add NULL qstr with no hash or data
print(
'QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")'
% ("\\x00" * cfg_bytes_hash, "\\x00" * cfg_bytes_len)
)
print('QDEF(MP_QSTR_NULL, 0, 0, "")')

total_qstr_size = 0
total_qstr_compressed_size = 0
Expand Down
2 changes: 1 addition & 1 deletion py/mpstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ typedef struct _mp_state_vm_t {

// pointer and sizes to store interned string data
// (qstr_last_chunk can be root pointer but is also stored in qstr pool)
byte *qstr_last_chunk;
char *qstr_last_chunk;
size_t qstr_last_alloc;
size_t qstr_last_used;

Expand Down
132 changes: 65 additions & 67 deletions py/qstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@

// NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings)
// ultimately we will replace this with a static hash table of some kind
// also probably need to include the length in the string data, to allow null bytes in the string

#if MICROPY_DEBUG_VERBOSE // print debugging info
#define DEBUG_printf DEBUG_printf
Expand All @@ -46,34 +45,9 @@
#endif

// A qstr is an index into the qstr pool.
// The data for a qstr contains (hash, length, data):
// - hash (configurable number of bytes)
// - length (configurable number of bytes)
// - data ("length" number of bytes)
// - \0 terminated (so they can be printed using printf)

#if MICROPY_QSTR_BYTES_IN_HASH == 1
#define Q_HASH_MASK (0xff)
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0])
#define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0)
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
#define Q_HASH_MASK (0xffff)
#define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
#define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0)
#else
#error unimplemented qstr hash decoding
#endif
#define Q_GET_ALLOC(q) (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
#define Q_GET_DATA(q) ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN)
#if MICROPY_QSTR_BYTES_IN_LEN == 1
#define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH])
#define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0)
#elif MICROPY_QSTR_BYTES_IN_LEN == 2
#define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8))
#define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0)
#else
#error unimplemented qstr length decoding
#endif
// The data for a qstr is \0 terminated (so they can be printed using printf)

#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)

#if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
#define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
Expand All @@ -98,14 +72,25 @@ mp_uint_t qstr_compute_hash(const byte *data, size_t len) {
return hash;
}

const qstr_attr_t mp_qstr_const_attr[] = {
#ifndef NO_QSTR
#define QDEF(id, hash, len, str) { hash, len },
#define TRANSLATION(id, length, compressed ...)
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
#undef QDEF
#endif
};

const qstr_pool_t mp_qstr_const_pool = {
NULL, // no previous pool
0, // no previous pool
10, // set so that the first dynamically allocated pool is twice this size; must be <= the len (just below)
MP_QSTRnumber_of, // corresponds to number of strings in array just below
(qstr_attr_t *)mp_qstr_const_attr,
{
#ifndef NO_QSTR
#define QDEF(id, str) str,
#define QDEF(id, hash, len, str) str,
#define TRANSLATION(id, length, compressed ...)
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
Expand All @@ -130,32 +115,37 @@ void qstr_init(void) {
#endif
}

STATIC const byte *find_qstr(qstr q) {
STATIC const char *find_qstr(qstr q, qstr_attr_t *attr) {
// search pool for this qstr
// total_prev_len==0 in the final pool, so the loop will always terminate
qstr_pool_t *pool = MP_STATE_VM(last_pool);
while (q < pool->total_prev_len) {
pool = pool->prev;
}
assert(q - pool->total_prev_len < pool->len);
return pool->qstrs[q - pool->total_prev_len];
q -= pool->total_prev_len;
assert(q < pool->len);
*attr = pool->attrs[q];
return pool->qstrs[q];
}

// qstr_mutex must be taken while in this function
STATIC qstr qstr_add(const byte *q_ptr) {
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);

// make sure we have room in the pool for a new qstr
if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
uint32_t new_pool_length = MP_STATE_VM(last_pool)->alloc * 2;
if (new_pool_length > MICROPY_QSTR_POOL_MAX_ENTRIES) {
new_pool_length = MICROPY_QSTR_POOL_MAX_ENTRIES;
}
qstr_pool_t *pool = m_new_ll_obj_var_maybe(qstr_pool_t, const char *, new_pool_length);
if (pool == NULL) {
mp_uint_t pool_size = sizeof(qstr_pool_t) + sizeof(const char *) * new_pool_length;
void *chunk = m_malloc_maybe(pool_size + sizeof(qstr_attr_t) * new_pool_length, true);
if (chunk == NULL) {
QSTR_EXIT();
m_malloc_fail(new_pool_length);
}
qstr_pool_t *pool = (qstr_pool_t *)chunk;
pool->attrs = (qstr_attr_t *)(void *)((char *)chunk + pool_size);
pool->prev = MP_STATE_VM(last_pool);
pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
pool->alloc = new_pool_length;
Expand All @@ -165,10 +155,14 @@ STATIC qstr qstr_add(const byte *q_ptr) {
}

// add the new qstr
MP_STATE_VM(last_pool)->qstrs[MP_STATE_VM(last_pool)->len++] = q_ptr;
mp_uint_t at = MP_STATE_VM(last_pool)->len;
MP_STATE_VM(last_pool)->attrs[at].hash = hash;
MP_STATE_VM(last_pool)->attrs[at].len = len;
MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
MP_STATE_VM(last_pool)->len++;

// return id for the newly-added qstr
return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1;
return MP_STATE_VM(last_pool)->total_prev_len + at;
}

qstr qstr_find_strn(const char *str, size_t str_len) {
Expand All @@ -177,9 +171,10 @@ qstr qstr_find_strn(const char *str, size_t str_len) {

// search pools for the data
for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) {
return pool->total_prev_len + (q - pool->qstrs);
qstr_attr_t *attrs = pool->attrs;
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
if (attrs[at].hash == str_hash && attrs[at].len == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) {
return pool->total_prev_len + at;
}
}
}
Expand All @@ -200,14 +195,14 @@ qstr qstr_from_strn(const char *str, size_t len) {
// qstr does not exist in interned pool so need to add it

// compute number of bytes needed to intern this string
size_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
size_t n_bytes = len + 1;

if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
// not enough room at end of previously interned string so try to grow
byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
if (new_p == NULL) {
// could not grow existing memory; shrink it to fit previous
(void)m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
(void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
MP_STATE_VM(qstr_last_chunk) = NULL;
} else {
// could grow existing memory
Expand All @@ -221,10 +216,10 @@ qstr qstr_from_strn(const char *str, size_t len) {
if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
}
MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, al);
MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(char, al);
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
// failed to allocate a large chunk so try with exact size
MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(byte, n_bytes);
MP_STATE_VM(qstr_last_chunk) = m_new_ll_maybe(char, n_bytes);
if (MP_STATE_VM(qstr_last_chunk) == NULL) {
QSTR_EXIT();
m_malloc_fail(n_bytes);
Expand All @@ -236,39 +231,41 @@ qstr qstr_from_strn(const char *str, size_t len) {
}

// allocate memory from the chunk for this new interned string's data
byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
MP_STATE_VM(qstr_last_used) += n_bytes;

// store the interned strings' data
mp_uint_t hash = qstr_compute_hash((const byte *)str, len);
Q_SET_HASH(q_ptr, hash);
Q_SET_LENGTH(q_ptr, len);
memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len);
q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
q = qstr_add(q_ptr);
memcpy(q_ptr, str, len);
q_ptr[len] = '\0';
q = qstr_add(hash, len, q_ptr);
}
QSTR_EXIT();
return q;
}

mp_uint_t PLACE_IN_ITCM(qstr_hash)(qstr q) {
return Q_GET_HASH(find_qstr(q));
qstr_attr_t attr;
find_qstr(q, &attr);
return attr.hash;
}

size_t qstr_len(qstr q) {
const byte *qd = find_qstr(q);
return Q_GET_LENGTH(qd);
qstr_attr_t attr;
find_qstr(q, &attr);
return attr.len;
}

const char *qstr_str(qstr q) {
const byte *qd = find_qstr(q);
return (const char *)Q_GET_DATA(qd);
qstr_attr_t attr;
return find_qstr(q, &attr);
}

const byte *qstr_data(qstr q, size_t *len) {
const byte *qd = find_qstr(q);
*len = Q_GET_LENGTH(qd);
return Q_GET_DATA(qd);
qstr_attr_t attr;
const char *qd = find_qstr(q, &attr);
*len = attr.len;
return (byte *)qd;
}

void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) {
Expand All @@ -280,13 +277,14 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si
for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
*n_pool += 1;
*n_qstr += pool->len;
for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
*n_str_data_bytes += Q_GET_ALLOC(*q);
for (const qstr_attr_t *q = pool->attrs, *q_top = pool->attrs + pool->len; q < q_top; q++) {
*n_str_data_bytes += sizeof(*q) + q->len + 1;
}
#if MICROPY_ENABLE_GC
*n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
// this counts actual bytes used in heap
*n_total_bytes += gc_nbytes(pool) - sizeof(qstr_attr_t) * pool->alloc;
#else
*n_total_bytes += sizeof(qstr_pool_t) + sizeof(qstr) * pool->alloc;
*n_total_bytes += sizeof(qstr_pool_t) + sizeof(const char *) * pool->alloc;
#endif
}
*n_total_bytes += *n_str_data_bytes;
Expand All @@ -297,8 +295,8 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si
void qstr_dump_data(void) {
QSTR_ENTER();
for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
mp_printf(&mp_plat_print, "Q(%s)\n", Q_GET_DATA(*q));
for (const char **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
mp_printf(&mp_plat_print, "Q(%s)\n", *q);
}
}
QSTR_EXIT();
Expand Down
20 changes: 19 additions & 1 deletion py/qstr.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,30 @@ enum {

typedef size_t qstr;

typedef struct _qstr_attr_t {
#if MICROPY_QSTR_BYTES_IN_HASH == 1
uint8_t hash;
#elif MICROPY_QSTR_BYTES_IN_HASH == 2
uint16_t hash;
#else
#error unimplemented qstr hash decoding
#endif
#if MICROPY_QSTR_BYTES_IN_LEN == 1
uint8_t len;
#elif MICROPY_QSTR_BYTES_IN_LEN == 2
uint16_t len;
#else
#error unimplemented qstr length decoding
#endif
} qstr_attr_t;

typedef struct _qstr_pool_t {
struct _qstr_pool_t *prev;
size_t total_prev_len;
size_t alloc;
size_t len;
const byte *qstrs[];
qstr_attr_t *attrs;
const char *qstrs[];
} qstr_pool_t;

#define QSTR_FROM_STR_STATIC(s) (qstr_from_strn((s), strlen(s)))
Expand Down
2 changes: 1 addition & 1 deletion supervisor/shared/translate.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ __attribute__((always_inline))
#endif
const compressed_string_t *translate(const char *original) {
#ifndef NO_QSTR
#define QDEF(id, str)
#define QDEF(id, hash, len, str)
#define TRANSLATION(id, firstbyte, ...) if (strcmp(original, id) == 0) { static const compressed_string_t v = { .data = firstbyte, .tail = { __VA_ARGS__ } }; return &v; } else
#include "genhdr/qstrdefs.generated.h"
#undef TRANSLATION
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy