Skip to content

py/qstr: qstr performance improvements. #10758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions extmod/modssl_axtls.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ STATIC NORETURN void ssl_raise_error(int err) {
o_str->base.type = &mp_type_str;
o_str->data = (const byte *)errstr;
o_str->len = strlen((char *)o_str->data);
#if MICROPY_QSTR_BYTES_IN_HASH
o_str->hash = qstr_compute_hash(o_str->data, o_str->len);
#endif

// Raise OSError(err, str).
mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(err), MP_OBJ_FROM_PTR(o_str)};
Expand Down
2 changes: 2 additions & 0 deletions extmod/modssl_mbedtls.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,9 @@ STATIC NORETURN void mbedtls_raise_error(int err) {
o_str->base.type = &mp_type_str;
o_str->data = o_str_buf;
o_str->len = len;
#if MICROPY_QSTR_BYTES_IN_HASH
o_str->hash = qstr_compute_hash(o_str->data, o_str->len);
#endif
// raise
mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(err), MP_OBJ_FROM_PTR(o_str)};
nlr_raise(mp_obj_exception_make_new(&mp_type_OSError, 2, 0, args));
Expand Down
2 changes: 1 addition & 1 deletion extmod/vfs_lfsx.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ STATIC MP_DEFINE_CONST_DICT(MP_VFS_LFSx(locals_dict), MP_VFS_LFSx(locals_dict_ta
STATIC mp_import_stat_t MP_VFS_LFSx(import_stat)(void *self_in, const char *path) {
MP_OBJ_VFS_LFSx *self = self_in;
struct LFSx_API (info) info;
mp_obj_str_t path_obj = { { &mp_type_str }, 0, 0, (const byte *)path };
MP_DEFINE_STR_OBJ(path_obj, path);
path = MP_VFS_LFSx(make_path)(self, MP_OBJ_FROM_PTR(&path_obj));
int ret = LFSx_API(stat)(&self->lfs, path, &info);
if (ret == 0) {
Expand Down
1 change: 0 additions & 1 deletion ports/cc3200/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
#else
#define MICROPY_CPYTHON_COMPAT (0)
#endif
#define MICROPY_QSTR_BYTES_IN_HASH (1)

// fatfs configuration used in ffconf.h
#define MICROPY_FATFS_ENABLE_LFN (2)
Expand Down
2 changes: 2 additions & 0 deletions ports/esp32/mphalport.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ void check_esp_err_(esp_err_t code, const char *func, const int line, const char
o_str->data = (const byte *)esp_err_to_name(code); // esp_err_to_name ret's ptr to const str
#endif
o_str->len = strlen((char *)o_str->data);
#if MICROPY_QSTR_BYTES_IN_HASH
o_str->hash = qstr_compute_hash(o_str->data, o_str->len);
#endif
// raise
mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(pcode), MP_OBJ_FROM_PTR(o_str)};
nlr_raise(mp_obj_exception_make_new(&mp_type_OSError, 2, 0, args));
Expand Down
1 change: 0 additions & 1 deletion ports/nrf/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@
#define MICROPY_PY_SYS (1)
#define MICROPY_PY_SYS_PATH_ARGV_DEFAULTS (1)
#define MICROPY_PY___FILE__ (1)
#define MICROPY_QSTR_BYTES_IN_HASH (2)
#endif

#ifndef MICROPY_PY_UBLUEPY
Expand Down
1 change: 0 additions & 1 deletion ports/pic16bit/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
// options to control how MicroPython is built
#define MICROPY_OBJ_REPR (MICROPY_OBJ_REPR_B)
#define MICROPY_ALLOC_PATH_MAX (64)
#define MICROPY_QSTR_BYTES_IN_HASH (1)
#define MICROPY_EMIT_X64 (0)
#define MICROPY_EMIT_THUMB (0)
#define MICROPY_EMIT_INLINE_THUMB (0)
Expand Down
1 change: 0 additions & 1 deletion ports/powerpc/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

// #define MICROPY_DEBUG_VERBOSE (1)

#define MICROPY_QSTR_BYTES_IN_HASH (1)
#define MICROPY_QSTR_EXTRA_POOL mp_qstr_frozen_const_pool
#define MICROPY_ALLOC_PATH_MAX (256)
#define MICROPY_EMIT_X64 (0)
Expand Down
1 change: 0 additions & 1 deletion ports/rp2/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
// Memory allocation policies
#define MICROPY_GC_STACK_ENTRY_TYPE uint16_t
#define MICROPY_ALLOC_PATH_MAX (128)
#define MICROPY_QSTR_BYTES_IN_HASH (1)

// MicroPython emitters
#define MICROPY_PERSISTENT_CODE_LOAD (1)
Expand Down
1 change: 0 additions & 1 deletion ports/samd/mpconfigport.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#define MICROPY_GC_STACK_ENTRY_TYPE uint16_t
#define MICROPY_GC_ALLOC_THRESHOLD (0)
#define MICROPY_ALLOC_PATH_MAX (256)
#define MICROPY_QSTR_BYTES_IN_HASH (1)

// MicroPython emitters
#define MICROPY_PERSISTENT_CODE_LOAD (1)
Expand Down
8 changes: 8 additions & 0 deletions ports/stm32/usb.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,11 @@ STATIC const uint8_t usbd_fifo_size_cdc3_msc_hid[] = {
// predefined hid mouse data
STATIC const mp_obj_str_t pyb_usb_hid_mouse_desc_obj = {
{&mp_type_bytes},
#if MICROPY_QSTR_BYTES_IN_HASH
0, // hash not valid
#else
0,
#endif
USBD_HID_MOUSE_REPORT_DESC_SIZE,
USBD_HID_MOUSE_ReportDesc,
};
Expand All @@ -204,7 +208,11 @@ const mp_rom_obj_tuple_t pyb_usb_hid_mouse_obj = {
// predefined hid keyboard data
STATIC const mp_obj_str_t pyb_usb_hid_keyboard_desc_obj = {
{&mp_type_bytes},
#if MICROPY_QSTR_BYTES_IN_HASH
0, // hash not valid
#else
0,
#endif
USBD_HID_KEYBOARD_REPORT_DESC_SIZE,
USBD_HID_KEYBOARD_ReportDesc,
};
Expand Down
3 changes: 0 additions & 3 deletions ports/unix/variants/minimal/mpconfigvariant.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,3 @@
// Enable just the sys and os built-in modules.
#define MICROPY_PY_SYS (1)
#define MICROPY_PY_OS (1)

// The minimum sets this to 1 to save flash.
#define MICROPY_QSTR_BYTES_IN_HASH (2)
107 changes: 77 additions & 30 deletions py/makeqstrdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
codepoint2name[ord("|")] = "pipe"
codepoint2name[ord("~")] = "tilde"

# static qstrs, should be sorted
# static qstrs, these must maintain a specific order for .mpy compatibility
# See QSTR_LAST_STATIC at the top of py/persistentcode.c

static_qstr_list = [
"",
Expand Down Expand Up @@ -220,6 +221,66 @@
"values",
"write",
"zip",
# Additional QSTRs that must have index <255 (these are not part of
# the .mpy compatibility list though).
"__bool__",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these added here, don't they increase the size of ports that don't use these qstrs? Eg bare-arm and minimal.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because these qstrs must have index <255 (e.g. the operator code assumes that the qstr for the operator name is a byte). Therefore they cannot go into a sorted pool, so they must be put in the unsorted pool at the start.

makeqstrdata.py only append these here if they appear in the required qstr list from the firmware.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But in a comment you made above you said that:

It adds a bunch of fixed qstrs (mostly dunder operators) that were not previously included in the bare-arm build but now need to be forced into the first pool so that their id < 256.

So... is it possible to not include these in the build if they aren't used? (Mainly I'm interested in getting the bare-arm size diff down.)

"__pos__",
"__neg__",
"__invert__",
"__abs__",
"__float__",
"__complex__",
"__sizeof__",
"__lt__",
"__gt__",
"__eq__",
"__le__",
"__ge__",
"__ne__",
"__contains__",
"__iadd__",
"__isub__",
"__imul__",
"__imatmul__",
"__ifloordiv__",
"__itruediv__",
"__imod__",
"__ipow__",
"__ior__",
"__ixor__",
"__iand__",
"__ilshift__",
"__irshift__",
"__add__",
"__sub__",
"__mul__",
"__matmul__",
"__floordiv__",
"__truediv__",
"__mod__",
"__divmod__",
"__pow__",
"__or__",
"__xor__",
"__and__",
"__lshift__",
"__rshift__",
"__radd__",
"__rsub__",
"__rmul__",
"__rmatmul__",
"__rfloordiv__",
"__rtruediv__",
"__rmod__",
"__rpow__",
"__ror__",
"__rxor__",
"__rand__",
"__rlshift__",
"__rrshift__",
"__get__",
"__set__",
"__delete__",
]


Expand All @@ -244,22 +305,13 @@ def esc_char(m):
return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)


static_qstr_list_ident = list(map(qstr_escape, static_qstr_list))


def parse_input_headers(infiles):
qcfgs = {}
qstrs = {}

# add static qstrs
for qstr in static_qstr_list:
# work out the corresponding qstr name
ident = qstr_escape(qstr)

# don't add duplicates
assert ident not in qstrs

# add the qstr to the list, with order number to retain original order in file
order = len(qstrs) - 300000
qstrs[ident] = (order, ident, qstr)

# read the qstrs in from the input files
for infile in infiles:
with open(infile, "rt") as f:
Expand Down Expand Up @@ -294,22 +346,12 @@ def parse_input_headers(infiles):
ident = qstr_escape(qstr)

# don't add duplicates
if ident in static_qstr_list_ident:
continue
if ident in qstrs:
continue

# add the qstr to the list, with order number to retain original order in file
order = len(qstrs)
# but put special method names like __add__ at the top of list, so
# that their id's fit into a byte
if ident == "":
# Sort empty qstr above all still
order = -200000
elif ident == "__dir__":
# Put __dir__ after empty qstr for builtin dir() to work
order = -190000
elif ident.startswith("__"):
order -= 100000
qstrs[ident] = (order, ident, qstr)
qstrs[ident] = (ident, qstr)

if not qcfgs:
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
Expand Down Expand Up @@ -348,12 +390,17 @@ def print_qstr_data(qcfgs, qstrs):
print("")

# add NULL qstr with no hash or data
print('QDEF(MP_QSTRnull, 0, 0, "")')
print('QDEF0(MP_QSTRnull, 0, 0, "")')

# add static qstrs to the first unsorted pool
for qstr in static_qstr_list:
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes))

# go through each qstr and print it out
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
# add remaining qstrs to the sorted (by value) pool
for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]):
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))


def do_work(infiles):
Expand Down
59 changes: 44 additions & 15 deletions py/map.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,46 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_
}
}

// When MICROPY_QSTR_BYTES_IN_HASH is zero, for qstr keys we use the qstr
// value (i.e. the index in the qstr pool) as the hash value as it is
// free to compute and also a very effective hash as it is unique across
// all qstrs.
//
// However, non-qstr strings (i.e. str) still use their "true" hash
// (i.e. computed using qstr_hash) which will be different. So this means
// that identical strings will hash differently depending on whether they
// are qstr or str. It should be rare for a program to be working with
// both but it is possible.
//
// So this means that in certain situations (see below), we cannot rely on
// the linear probing finding an unused slot as the signal that the key
// does not exist, as it's possible the key was inserted using
// its "other" hash, and must fall back to searching the entire table. We
// can still use the hash as a good hint for the starting location
// though. This flag must be set to false in these situations.
bool stop_at_empty = true;

// get hash of index, with fast path for common case of qstr
mp_uint_t hash;
if (mp_obj_is_qstr(index)) {
#if MICROPY_QSTR_BYTES_IN_HASH
hash = qstr_hash(MP_OBJ_QSTR_VALUE(index));
#else
// Optimisation -- use the qstr index directly as the hash.
hash = MP_OBJ_QSTR_VALUE(index);
// If there are non-qstr keys in this map, we must assume that there
// may possibly be str keys.
stop_at_empty = map->all_keys_are_qstrs;
#endif
} else {
hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));

#if MICROPY_QSTR_BYTES_IN_HASH == 0
if (mp_obj_is_exact_type(index, &mp_type_str)) {
// We must assume there might be qstr keys.
stop_at_empty = false;
}
#endif
}

size_t pos = hash % map->alloc;
Expand All @@ -255,20 +289,14 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_
for (;;) {
mp_map_elem_t *slot = &map->table[pos];
if (slot->key == MP_OBJ_NULL) {
// found NULL slot, so index is not in table
if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
map->used += 1;
if (avail_slot == NULL) {
avail_slot = slot;
}
avail_slot->key = index;
avail_slot->value = MP_OBJ_NULL;
if (!mp_obj_is_qstr(index)) {
map->all_keys_are_qstrs = 0;
}
return avail_slot;
} else {
return NULL;
// found an empty slot, remember for later
if (avail_slot == NULL) {
avail_slot = slot;
}
if (stop_at_empty) {
// safe to assume that the key doesn't exist, so pretend like
// we searched the entire table
goto search_over;
}
} else if (slot->key == MP_OBJ_SENTINEL) {
// found deleted slot, remember for later
Expand Down Expand Up @@ -297,7 +325,8 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_
pos = (pos + 1) % map->alloc;

if (pos == start_pos) {
// search got back to starting position, so index is not in table
search_over:
// search got back to starting position (or found empty slot), so index is not in table
if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) {
if (avail_slot != NULL) {
// there was an available slot, so use that
Expand Down
8 changes: 2 additions & 6 deletions py/mpconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,13 +282,9 @@
#define MICROPY_QSTR_BYTES_IN_LEN (1)
#endif

// Number of bytes used to store qstr hash
// Number of bytes used to store qstr hash, disabled by default
#ifndef MICROPY_QSTR_BYTES_IN_HASH
#if MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES
#define MICROPY_QSTR_BYTES_IN_HASH (2)
#else
#define MICROPY_QSTR_BYTES_IN_HASH (1)
#endif
#define MICROPY_QSTR_BYTES_IN_HASH (0)
#endif

// Avoid using C stack when making Python function calls. C stack still
Expand Down
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy