diff --git a/extmod/modssl_axtls.c b/extmod/modssl_axtls.c index 6cb999c13b69f..5ceffbb6a4c96 100644 --- a/extmod/modssl_axtls.c +++ b/extmod/modssl_axtls.c @@ -128,7 +128,9 @@ STATIC NORETURN void ssl_raise_error(int err) { o_str->base.type = &mp_type_str; o_str->data = (const byte *)errstr; o_str->len = strlen((char *)o_str->data); + #if MICROPY_QSTR_BYTES_IN_HASH o_str->hash = qstr_compute_hash(o_str->data, o_str->len); + #endif // Raise OSError(err, str). mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(err), MP_OBJ_FROM_PTR(o_str)}; diff --git a/extmod/modssl_mbedtls.c b/extmod/modssl_mbedtls.c index 449952594c2db..be56d1512b0fa 100644 --- a/extmod/modssl_mbedtls.c +++ b/extmod/modssl_mbedtls.c @@ -128,7 +128,9 @@ STATIC NORETURN void mbedtls_raise_error(int err) { o_str->base.type = &mp_type_str; o_str->data = o_str_buf; o_str->len = len; + #if MICROPY_QSTR_BYTES_IN_HASH o_str->hash = qstr_compute_hash(o_str->data, o_str->len); + #endif // raise mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(err), MP_OBJ_FROM_PTR(o_str)}; nlr_raise(mp_obj_exception_make_new(&mp_type_OSError, 2, 0, args)); diff --git a/extmod/vfs_lfsx.c b/extmod/vfs_lfsx.c index fe0731eced2a8..3442133340d03 100644 --- a/extmod/vfs_lfsx.c +++ b/extmod/vfs_lfsx.c @@ -488,7 +488,7 @@ STATIC MP_DEFINE_CONST_DICT(MP_VFS_LFSx(locals_dict), MP_VFS_LFSx(locals_dict_ta STATIC mp_import_stat_t MP_VFS_LFSx(import_stat)(void *self_in, const char *path) { MP_OBJ_VFS_LFSx *self = self_in; struct LFSx_API (info) info; - mp_obj_str_t path_obj = { { &mp_type_str }, 0, 0, (const byte *)path }; + MP_DEFINE_STR_OBJ(path_obj, path); path = MP_VFS_LFSx(make_path)(self, MP_OBJ_FROM_PTR(&path_obj)); int ret = LFSx_API(stat)(&self->lfs, path, &info); if (ret == 0) { diff --git a/ports/cc3200/mpconfigport.h b/ports/cc3200/mpconfigport.h index b6b412f178f3e..9e89e86df1b61 100644 --- a/ports/cc3200/mpconfigport.h +++ b/ports/cc3200/mpconfigport.h @@ -59,7 +59,6 @@ #else #define MICROPY_CPYTHON_COMPAT (0) #endif -#define MICROPY_QSTR_BYTES_IN_HASH (1) // fatfs configuration used in ffconf.h #define MICROPY_FATFS_ENABLE_LFN (2) diff --git a/ports/esp32/mphalport.c b/ports/esp32/mphalport.c index 538b3e405c905..8aff55f95ccbf 100644 --- a/ports/esp32/mphalport.c +++ b/ports/esp32/mphalport.c @@ -91,7 +91,9 @@ void check_esp_err_(esp_err_t code, const char *func, const int line, const char o_str->data = (const byte *)esp_err_to_name(code); // esp_err_to_name ret's ptr to const str #endif o_str->len = strlen((char *)o_str->data); + #if MICROPY_QSTR_BYTES_IN_HASH o_str->hash = qstr_compute_hash(o_str->data, o_str->len); + #endif // raise mp_obj_t args[2] = { MP_OBJ_NEW_SMALL_INT(pcode), MP_OBJ_FROM_PTR(o_str)}; nlr_raise(mp_obj_exception_make_new(&mp_type_OSError, 2, 0, args)); diff --git a/ports/nrf/mpconfigport.h b/ports/nrf/mpconfigport.h index 3a311fe5d9abc..b81bec9c62dcb 100644 --- a/ports/nrf/mpconfigport.h +++ b/ports/nrf/mpconfigport.h @@ -260,7 +260,6 @@ #define MICROPY_PY_SYS (1) #define MICROPY_PY_SYS_PATH_ARGV_DEFAULTS (1) #define MICROPY_PY___FILE__ (1) -#define MICROPY_QSTR_BYTES_IN_HASH (2) #endif #ifndef MICROPY_PY_UBLUEPY diff --git a/ports/pic16bit/mpconfigport.h b/ports/pic16bit/mpconfigport.h index f1bfa4cd6f16d..a2d607fb29325 100644 --- a/ports/pic16bit/mpconfigport.h +++ b/ports/pic16bit/mpconfigport.h @@ -29,7 +29,6 @@ // options to control how MicroPython is built #define MICROPY_OBJ_REPR (MICROPY_OBJ_REPR_B) #define MICROPY_ALLOC_PATH_MAX (64) -#define MICROPY_QSTR_BYTES_IN_HASH (1) #define MICROPY_EMIT_X64 (0) #define MICROPY_EMIT_THUMB (0) #define MICROPY_EMIT_INLINE_THUMB (0) diff --git a/ports/powerpc/mpconfigport.h b/ports/powerpc/mpconfigport.h index 06200a9969017..b74f374e7f9f5 100644 --- a/ports/powerpc/mpconfigport.h +++ b/ports/powerpc/mpconfigport.h @@ -35,7 +35,6 @@ // #define MICROPY_DEBUG_VERBOSE (1) -#define MICROPY_QSTR_BYTES_IN_HASH (1) #define MICROPY_QSTR_EXTRA_POOL mp_qstr_frozen_const_pool #define MICROPY_ALLOC_PATH_MAX (256) #define MICROPY_EMIT_X64 (0) diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 056e2df4fe203..7676d3cb79402 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -61,7 +61,6 @@ // Memory allocation policies #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t #define MICROPY_ALLOC_PATH_MAX (128) -#define MICROPY_QSTR_BYTES_IN_HASH (1) // MicroPython emitters #define MICROPY_PERSISTENT_CODE_LOAD (1) diff --git a/ports/samd/mpconfigport.h b/ports/samd/mpconfigport.h index 011119fd11719..5931e1b5595f0 100644 --- a/ports/samd/mpconfigport.h +++ b/ports/samd/mpconfigport.h @@ -35,7 +35,6 @@ #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t #define MICROPY_GC_ALLOC_THRESHOLD (0) #define MICROPY_ALLOC_PATH_MAX (256) -#define MICROPY_QSTR_BYTES_IN_HASH (1) // MicroPython emitters #define MICROPY_PERSISTENT_CODE_LOAD (1) diff --git a/ports/stm32/usb.c b/ports/stm32/usb.c index df6b2cf62a1ff..19e2fb18b744f 100644 --- a/ports/stm32/usb.c +++ b/ports/stm32/usb.c @@ -185,7 +185,11 @@ STATIC const uint8_t usbd_fifo_size_cdc3_msc_hid[] = { // predefined hid mouse data STATIC const mp_obj_str_t pyb_usb_hid_mouse_desc_obj = { {&mp_type_bytes}, + #if MICROPY_QSTR_BYTES_IN_HASH 0, // hash not valid + #else + 0, + #endif USBD_HID_MOUSE_REPORT_DESC_SIZE, USBD_HID_MOUSE_ReportDesc, }; @@ -204,7 +208,11 @@ const mp_rom_obj_tuple_t pyb_usb_hid_mouse_obj = { // predefined hid keyboard data STATIC const mp_obj_str_t pyb_usb_hid_keyboard_desc_obj = { {&mp_type_bytes}, + #if MICROPY_QSTR_BYTES_IN_HASH 0, // hash not valid + #else + 0, + #endif USBD_HID_KEYBOARD_REPORT_DESC_SIZE, USBD_HID_KEYBOARD_ReportDesc, }; diff --git a/ports/unix/variants/minimal/mpconfigvariant.h b/ports/unix/variants/minimal/mpconfigvariant.h index 0dbfbb3d1cd43..97ed786b8f409 100644 --- a/ports/unix/variants/minimal/mpconfigvariant.h +++ b/ports/unix/variants/minimal/mpconfigvariant.h @@ -64,6 +64,3 @@ // Enable just the sys and os built-in modules. #define MICROPY_PY_SYS (1) #define MICROPY_PY_OS (1) - -// The minimum sets this to 1 to save flash. -#define MICROPY_QSTR_BYTES_IN_HASH (2) diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index 71f529fb62900..88785c35fe708 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -52,7 +52,8 @@ codepoint2name[ord("|")] = "pipe" codepoint2name[ord("~")] = "tilde" -# static qstrs, should be sorted +# static qstrs, these must maintain a specific order for .mpy compatibility +# See QSTR_LAST_STATIC at the top of py/persistentcode.c static_qstr_list = [ "", @@ -220,6 +221,66 @@ "values", "write", "zip", + # Additional QSTRs that must have index <255 (these are not part of + # the .mpy compatibility list though). + "__bool__", + "__pos__", + "__neg__", + "__invert__", + "__abs__", + "__float__", + "__complex__", + "__sizeof__", + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "__contains__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__divmod__", + "__pow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rfloordiv__", + "__rtruediv__", + "__rmod__", + "__rpow__", + "__ror__", + "__rxor__", + "__rand__", + "__rlshift__", + "__rrshift__", + "__get__", + "__set__", + "__delete__", ] @@ -244,22 +305,13 @@ def esc_char(m): return re.sub(r"[^A-Za-z0-9_]", esc_char, qst) +static_qstr_list_ident = list(map(qstr_escape, static_qstr_list)) + + def parse_input_headers(infiles): qcfgs = {} qstrs = {} - # add static qstrs - for qstr in static_qstr_list: - # work out the corresponding qstr name - ident = qstr_escape(qstr) - - # don't add duplicates - assert ident not in qstrs - - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - 300000 - qstrs[ident] = (order, ident, qstr) - # read the qstrs in from the input files for infile in infiles: with open(infile, "rt") as f: @@ -294,22 +346,12 @@ def parse_input_headers(infiles): ident = qstr_escape(qstr) # don't add duplicates + if ident in static_qstr_list_ident: + continue if ident in qstrs: continue - # add the qstr to the list, with order number to retain original order in file - order = len(qstrs) - # but put special method names like __add__ at the top of list, so - # that their id's fit into a byte - if ident == "": - # Sort empty qstr above all still - order = -200000 - elif ident == "__dir__": - # Put __dir__ after empty qstr for builtin dir() to work - order = -190000 - elif ident.startswith("__"): - order -= 100000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = (ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") @@ -348,12 +390,17 @@ def print_qstr_data(qcfgs, qstrs): print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') + + # add static qstrs to the first unsorted pool + for qstr in static_qstr_list: + qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) + print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes)) - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): + # add remaining qstrs to the sorted (by value) pool + for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]): qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes)) def do_work(infiles): diff --git a/py/map.c b/py/map.c index c18df5a9f333c..3660b7e544c0b 100644 --- a/py/map.c +++ b/py/map.c @@ -241,12 +241,46 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ } } + // When MICROPY_QSTR_BYTES_IN_HASH is zero, for qstr keys we use the qstr + // value (i.e. the index in the qstr pool) as the hash value as it is + // free to compute and also a very effective hash as it is unique across + // all qstrs. + // + // However, non-qstr strings (i.e. str) still use their "true" hash + // (i.e. computed using qstr_hash) which will be different. So this means + // that identical strings will hash differently depending on whether they + // are qstr or str. It should be rare for a program to be working with + // both but it is possible. + // + // So this means that in certain situations (see below), we cannot rely on + // the linear probing finding an unused slot as the signal that the key + // does not exist, as it's possible the key was inserted using + // its "other" hash, and must fall back to searching the entire table. We + // can still use the hash as a good hint for the starting location + // though. This flag must be set to false in these situations. + bool stop_at_empty = true; + // get hash of index, with fast path for common case of qstr mp_uint_t hash; if (mp_obj_is_qstr(index)) { + #if MICROPY_QSTR_BYTES_IN_HASH hash = qstr_hash(MP_OBJ_QSTR_VALUE(index)); + #else + // Optimisation -- use the qstr index directly as the hash. + hash = MP_OBJ_QSTR_VALUE(index); + // If there are non-qstr keys in this map, we must assume that there + // may possibly be str keys. + stop_at_empty = map->all_keys_are_qstrs; + #endif } else { hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index)); + + #if MICROPY_QSTR_BYTES_IN_HASH == 0 + if (mp_obj_is_exact_type(index, &mp_type_str)) { + // We must assume there might be qstr keys. + stop_at_empty = false; + } + #endif } size_t pos = hash % map->alloc; @@ -255,20 +289,14 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ for (;;) { mp_map_elem_t *slot = &map->table[pos]; if (slot->key == MP_OBJ_NULL) { - // found NULL slot, so index is not in table - if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { - map->used += 1; - if (avail_slot == NULL) { - avail_slot = slot; - } - avail_slot->key = index; - avail_slot->value = MP_OBJ_NULL; - if (!mp_obj_is_qstr(index)) { - map->all_keys_are_qstrs = 0; - } - return avail_slot; - } else { - return NULL; + // found an empty slot, remember for later + if (avail_slot == NULL) { + avail_slot = slot; + } + if (stop_at_empty) { + // safe to assume that the key doesn't exist, so pretend like + // we searched the entire table + goto search_over; } } else if (slot->key == MP_OBJ_SENTINEL) { // found deleted slot, remember for later @@ -297,7 +325,8 @@ mp_map_elem_t *MICROPY_WRAP_MP_MAP_LOOKUP(mp_map_lookup)(mp_map_t * map, mp_obj_ pos = (pos + 1) % map->alloc; if (pos == start_pos) { - // search got back to starting position, so index is not in table + search_over: + // search got back to starting position (or found empty slot), so index is not in table if (lookup_kind == MP_MAP_LOOKUP_ADD_IF_NOT_FOUND) { if (avail_slot != NULL) { // there was an available slot, so use that diff --git a/py/mpconfig.h b/py/mpconfig.h index a36f9658fbddf..e9fa187af1a8d 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -282,13 +282,9 @@ #define MICROPY_QSTR_BYTES_IN_LEN (1) #endif -// Number of bytes used to store qstr hash +// Number of bytes used to store qstr hash, disabled by default #ifndef MICROPY_QSTR_BYTES_IN_HASH -#if MICROPY_CONFIG_ROM_LEVEL_AT_LEAST_CORE_FEATURES -#define MICROPY_QSTR_BYTES_IN_HASH (2) -#else -#define MICROPY_QSTR_BYTES_IN_HASH (1) -#endif +#define MICROPY_QSTR_BYTES_IN_HASH (0) #endif // Avoid using C stack when making Python function calls. C stack still diff --git a/py/objexcept.c b/py/objexcept.c index a90405c525484..e2647e78139fa 100644 --- a/py/objexcept.c +++ b/py/objexcept.c @@ -148,12 +148,16 @@ STATIC void decompress_error_text_maybe(mp_obj_exception_t *o) { mp_decompress_rom_string(buf, (mp_rom_error_text_t)o_str->data); o_str->data = buf; o_str->len = strlen((const char *)buf); + #if MICROPY_QSTR_BYTES_IN_HASH o_str->hash = 0; + #endif } + #if MICROPY_QSTR_BYTES_IN_HASH // Lazily compute the string hash. if (o_str->hash == 0) { o_str->hash = qstr_compute_hash(o_str->data, o_str->len); } + #endif } #endif } @@ -417,11 +421,13 @@ mp_obj_t mp_obj_new_exception_msg(const mp_obj_type_t *exc_type, mp_rom_error_te o_str->base.type = &mp_type_str; o_str->len = strlen((const char *)msg); o_str->data = (const byte *)msg; + #if MICROPY_QSTR_BYTES_IN_HASH #if MICROPY_ROM_TEXT_COMPRESSION o_str->hash = 0; // will be computed only if string object is accessed #else o_str->hash = qstr_compute_hash(o_str->data, o_str->len); #endif + #endif mp_obj_t arg = MP_OBJ_FROM_PTR(o_str); return mp_obj_exception_make_new(exc_type, 1, 0, &arg); } @@ -526,11 +532,13 @@ mp_obj_t mp_obj_new_exception_msg_vlist(const mp_obj_type_t *exc_type, mp_rom_er // Create the string object and call mp_obj_exception_make_new to create the exception o_str->base.type = &mp_type_str; + #if MICROPY_QSTR_BYTES_IN_HASH #if MICROPY_ROM_TEXT_COMPRESSION o_str->hash = 0; // will be computed only if string object is accessed #else o_str->hash = qstr_compute_hash(o_str->data, o_str->len); #endif + #endif mp_obj_t arg = MP_OBJ_FROM_PTR(o_str); return mp_obj_exception_make_new(exc_type, 1, 0, &arg); } diff --git a/py/objstr.c b/py/objstr.c index 5dfe94ac4fe1c..a86432e8ce82b 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -183,10 +183,12 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_ // TODO: validate 2nd/3rd args if (mp_obj_is_type(args[0], &mp_type_bytes)) { GET_STR_DATA_LEN(args[0], str_data, str_len); + #if MICROPY_QSTR_BYTES_IN_HASH GET_STR_HASH(args[0], str_hash); if (str_hash == 0) { str_hash = qstr_compute_hash(str_data, str_len); } + #endif #if MICROPY_PY_BUILTINS_STR_UNICODE_CHECK if (!utf8_check(str_data, str_len)) { mp_raise_msg(&mp_type_UnicodeError, NULL); @@ -201,7 +203,9 @@ mp_obj_t mp_obj_str_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_ mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_copy(type, NULL, str_len)); o->data = str_data; + #if MICROPY_QSTR_BYTES_IN_HASH o->hash = str_hash; + #endif return MP_OBJ_FROM_PTR(o); } else { mp_buffer_info_t bufinfo; @@ -240,13 +244,17 @@ STATIC mp_obj_t bytes_make_new(const mp_obj_type_t *type_in, size_t n_args, size #endif } GET_STR_DATA_LEN(args[0], str_data, str_len); + #if MICROPY_QSTR_BYTES_IN_HASH GET_STR_HASH(args[0], str_hash); if (str_hash == 0) { str_hash = qstr_compute_hash(str_data, str_len); } + #endif mp_obj_str_t *o = MP_OBJ_TO_PTR(mp_obj_new_str_copy(&mp_type_bytes, NULL, str_len)); o->data = str_data; + #if MICROPY_QSTR_BYTES_IN_HASH o->hash = str_hash; + #endif return MP_OBJ_FROM_PTR(o); } @@ -2063,7 +2071,9 @@ mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_u void mp_obj_str_set_data(mp_obj_str_t *str, const byte *data, size_t len) { str->data = data; str->len = len; + #if MICROPY_QSTR_BYTES_IN_HASH str->hash = qstr_compute_hash(data, len); + #endif } // This locals table is used for the following types: str, bytes, bytearray, array.array. @@ -2199,7 +2209,13 @@ MP_DEFINE_CONST_OBJ_TYPE( ); // The zero-length bytes object, with data that includes a null-terminating byte -const mp_obj_str_t mp_const_empty_bytes_obj = {{&mp_type_bytes}, 0, 0, (const byte *)""}; +const mp_obj_str_t mp_const_empty_bytes_obj = {{&mp_type_bytes}, + #if MICROPY_QSTR_BYTES_IN_HASH + 0, + #else + 0, + #endif + 0, (const byte *)""}; // Create a str/bytes object using the given data. New memory is allocated and // the data is copied across. This function should only be used if the type is bytes, @@ -2208,7 +2224,9 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type); o->len = len; if (data) { + #if MICROPY_QSTR_BYTES_IN_HASH o->hash = qstr_compute_hash(data, len); + #endif byte *p = m_new(byte, len + 1); o->data = p; memcpy(p, data, len * sizeof(byte)); @@ -2267,7 +2285,9 @@ STATIC mp_obj_t mp_obj_new_str_type_from_vstr(const mp_obj_type_t *type, vstr_t #endif mp_obj_str_t *o = mp_obj_malloc(mp_obj_str_t, type); o->len = vstr->len; + #if MICROPY_QSTR_BYTES_IN_HASH o->hash = qstr_compute_hash(data, vstr->len); + #endif o->data = data; return MP_OBJ_FROM_PTR(o); } @@ -2327,12 +2347,14 @@ bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) { if (mp_obj_is_qstr(s1) && mp_obj_is_qstr(s2)) { return s1 == s2; } else { + #if MICROPY_QSTR_BYTES_IN_HASH GET_STR_HASH(s1, h1); GET_STR_HASH(s2, h2); // If any of hashes is 0, it means it's not valid if (h1 != 0 && h2 != 0 && h1 != h2) { return false; } + #endif GET_STR_DATA_LEN(s1, d1, l1); GET_STR_DATA_LEN(s2, d2, l2); if (l1 != l2) { diff --git a/py/objstr.h b/py/objstr.h index 72fe1cfef01a6..df739c9474ac7 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -31,7 +31,11 @@ typedef struct _mp_obj_str_t { mp_obj_base_t base; + #if MICROPY_QSTR_BYTES_IN_HASH size_t hash; + #else + size_t _placeholder; // TODO: e.g. byte inline_data[sizeof(size_t)]; + #endif // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte size_t len; const byte *data; @@ -46,6 +50,7 @@ typedef struct _mp_obj_str_t { #define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte *)str} +#if MICROPY_QSTR_BYTES_IN_HASH // use this macro to extract the string hash // warning: the hash can be 0, meaning invalid, and must then be explicitly computed from the data #define GET_STR_HASH(str_obj_in, str_hash) \ @@ -55,6 +60,7 @@ typedef struct _mp_obj_str_t { } else { \ str_hash = ((mp_obj_str_t *)MP_OBJ_TO_PTR(str_obj_in))->hash; \ } +#endif // use this macro to extract the string length #define GET_STR_LEN(str_obj_in, str_len) \ diff --git a/py/persistentcode.c b/py/persistentcode.c index e1218e0b8a577..6f11c96098e2e 100644 --- a/py/persistentcode.c +++ b/py/persistentcode.c @@ -40,6 +40,11 @@ #include "py/smallint.h" +// makeqstrdata.py has a fixed list of qstrs at the start that we can assume +// are available with know indices on all MicroPython implementations, and +// avoid needing to duplicate the string data in the .mpy file. This is the +// last one in that list (anything with a qstr less than or equal to this is +// assumed to be in the list). #define QSTR_LAST_STATIC MP_QSTR_zip #if MICROPY_DYNAMIC_COMPILER diff --git a/py/qstr.c b/py/qstr.c index ea700566f4f7d..cfcf127c03f28 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -45,7 +45,9 @@ // A qstr is an index into the qstr pool. // The data for a qstr is \0 terminated (so they can be printed using printf) +#if MICROPY_QSTR_BYTES_IN_HASH #define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1) +#endif #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1) @@ -66,42 +68,94 @@ size_t qstr_compute_hash(const byte *data, size_t len) { for (const byte *top = data + len; data < top; data++) { hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data } + #if MICROPY_QSTR_BYTES_IN_HASH hash &= Q_HASH_MASK; // Make sure that valid hash is never zero, zero means "hash not computed" if (hash == 0) { hash++; } + #endif return hash; } +#if MICROPY_QSTR_BYTES_IN_HASH +const qstr_hash_t mp_qstr_const_hashes_special[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; const qstr_hash_t mp_qstr_const_hashes[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; +#endif +const qstr_len_t mp_qstr_const_lengths_special[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; const qstr_len_t mp_qstr_const_lengths[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; -const qstr_pool_t mp_qstr_const_pool = { +const qstr_pool_t mp_qstr_const_pool_special = { NULL, // no previous pool 0, // no previous pool MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below + MP_QSTRnumber_of_special, // corresponds to number of strings in array just below + false, // not sorted + #if MICROPY_QSTR_BYTES_IN_HASH + (qstr_hash_t *)mp_qstr_const_hashes_special, + #endif + (qstr_len_t *)mp_qstr_const_lengths_special, + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + }, +}; + +const qstr_pool_t mp_qstr_const_pool = { + &mp_qstr_const_pool_special, + MP_QSTRnumber_of_special, + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRnumber_of - MP_QSTRnumber_of_special, // corresponds to number of strings in array just below + true, // sorted + #if MICROPY_QSTR_BYTES_IN_HASH (qstr_hash_t *)mp_qstr_const_hashes, + #endif (qstr_len_t *)mp_qstr_const_lengths, { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; @@ -135,8 +189,16 @@ STATIC const qstr_pool_t *find_qstr(qstr *q) { } // qstr_mutex must be taken while in this function -STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { +STATIC qstr qstr_add( + #if MICROPY_QSTR_BYTES_IN_HASH + mp_uint_t hash, + #endif + mp_uint_t len, const char *q_ptr) { + #if MICROPY_QSTR_BYTES_IN_HASH DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr); + #else + DEBUG_printf("QSTR: add len=%d data=%.*s\n", hash, len, len, q_ptr); + #endif // make sure we have room in the pool for a new qstr if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) { @@ -146,7 +208,11 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc); #endif mp_uint_t pool_size = sizeof(qstr_pool_t) - + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc; + + (sizeof(const char *) + #if MICROPY_QSTR_BYTES_IN_HASH + + sizeof(qstr_hash_t) + #endif + + sizeof(qstr_len_t)) * new_alloc; qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size); if (pool == NULL) { // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned @@ -158,8 +224,12 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { QSTR_EXIT(); m_malloc_fail(new_alloc); } + #if MICROPY_QSTR_BYTES_IN_HASH pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc); pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc); + #else + pool->lengths = (qstr_len_t *)(pool->qstrs + new_alloc); + #endif pool->prev = MP_STATE_VM(last_pool); pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; @@ -170,7 +240,9 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { // add the new qstr mp_uint_t at = MP_STATE_VM(last_pool)->len; + #if MICROPY_QSTR_BYTES_IN_HASH MP_STATE_VM(last_pool)->hashes[at] = hash; + #endif MP_STATE_VM(last_pool)->lengths[at] = len; MP_STATE_VM(last_pool)->qstrs[at] = q_ptr; MP_STATE_VM(last_pool)->len++; @@ -179,14 +251,58 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { return MP_STATE_VM(last_pool)->total_prev_len + at; } +STATIC inline int qstr_strncmp(const char *a, size_t a_len, const char *b) { + while (a_len && *b) { + if (*a != *b) { + return (int)*a - (int)*b; + } + a++; + b++; + a_len--; + } + if (a_len == 0 && *b) { + return -1; + } + if (a_len && !*b) { + return 1; + } + return 0; +} + qstr qstr_find_strn(const char *str, size_t str_len) { // work out hash of str + #if MICROPY_QSTR_BYTES_IN_HASH size_t str_hash = qstr_compute_hash((const byte *)str, str_len); + #endif // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { - if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len + size_t low = pool->prev ? 0 : 1; // skip MP_QSTRnull at the start of the first pool + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->sorted) { + while (high - low > 1) { + size_t mid = (low + high) / 2; + int cmp = qstr_strncmp(str, str_len, pool->qstrs[mid]); + if (cmp < 0) { + high = mid; + } else { + low = mid; + if (MP_UNLIKELY(cmp == 0)) { + break; + } + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { + if ( + #if MICROPY_QSTR_BYTES_IN_HASH + pool->hashes[at] == str_hash && + #endif + pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; } @@ -194,7 +310,7 @@ qstr qstr_find_strn(const char *str, size_t str_len) { } // not found; return null qstr - return 0; + return MP_QSTRnull; } qstr qstr_from_str(const char *str) { @@ -254,19 +370,27 @@ qstr qstr_from_strn(const char *str, size_t len) { MP_STATE_VM(qstr_last_used) += n_bytes; // store the interned strings' data + #if MICROPY_QSTR_BYTES_IN_HASH size_t hash = qstr_compute_hash((const byte *)str, len); + #endif memcpy(q_ptr, str, len); q_ptr[len] = '\0'; - q = qstr_add(hash, len, q_ptr); + q = qstr_add( + #if MICROPY_QSTR_BYTES_IN_HASH + hash, + #endif + len, q_ptr); } QSTR_EXIT(); return q; } +#if MICROPY_QSTR_BYTES_IN_HASH mp_uint_t qstr_hash(qstr q) { const qstr_pool_t *pool = find_qstr(&q); return pool->hashes[q]; } +#endif size_t qstr_len(qstr q) { const qstr_pool_t *pool = find_qstr(&q); @@ -300,7 +424,11 @@ void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, si *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap #else *n_total_bytes += sizeof(qstr_pool_t) - + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc; + + (sizeof(const char *) + #if MICROPY_QSTR_BYTES_IN_HASH + + sizeof(qstr_hash_t) + #endif + + sizeof(qstr_len_t)) * pool->alloc; #endif } *n_total_bytes += *n_str_data_bytes; diff --git a/py/qstr.h b/py/qstr.h index 0ef861f33e8d5..6dcde390fb0fd 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + #endif + MP_QSTRnumber_of_special, + MP_QSTRstart_of_main = MP_QSTRnumber_of_special - 1, + + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -48,7 +60,8 @@ enum { typedef size_t qstr; typedef uint16_t qstr_short_t; -#if MICROPY_QSTR_BYTES_IN_HASH == 1 +#if MICROPY_QSTR_BYTES_IN_HASH == 0 +#elif MICROPY_QSTR_BYTES_IN_HASH == 1 typedef uint8_t qstr_hash_t; #elif MICROPY_QSTR_BYTES_IN_HASH == 2 typedef uint16_t qstr_hash_t; @@ -69,7 +82,10 @@ typedef struct _qstr_pool_t { size_t total_prev_len; size_t alloc; size_t len; + bool sorted; + #if MICROPY_QSTR_BYTES_IN_HASH qstr_hash_t *hashes; + #endif qstr_len_t *lengths; const char *qstrs[]; } qstr_pool_t; @@ -79,12 +95,15 @@ typedef struct _qstr_pool_t { void qstr_init(void); size_t qstr_compute_hash(const byte *data, size_t len); + qstr qstr_find_strn(const char *str, size_t str_len); // returns MP_QSTRnull if not found qstr qstr_from_str(const char *str); qstr qstr_from_strn(const char *str, size_t len); +#if MICROPY_QSTR_BYTES_IN_HASH mp_uint_t qstr_hash(qstr q); +#endif const char *qstr_str(qstr q); size_t qstr_len(qstr q); const byte *qstr_data(qstr q, size_t *len); diff --git a/py/runtime.c b/py/runtime.c index b4d5250e83a6f..3098a3f62270f 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -315,12 +315,17 @@ mp_obj_t mp_unary_op(mp_unary_op_t op, mp_obj_t arg) { } } else if (op == MP_UNARY_OP_HASH && mp_obj_is_str_or_bytes(arg)) { // fast path for hashing str/bytes + #if MICROPY_QSTR_BYTES_IN_HASH GET_STR_HASH(arg, h); if (h == 0) { GET_STR_DATA_LEN(arg, data, len); h = qstr_compute_hash(data, len); } return MP_OBJ_NEW_SMALL_INT(h); + #else + GET_STR_DATA_LEN(arg, data, len); + return MP_OBJ_NEW_SMALL_INT(qstr_compute_hash(data, len)); + #endif } else { const mp_obj_type_t *type = mp_obj_get_type(arg); if (MP_OBJ_TYPE_HAS_SLOT(type, unary_op)) { diff --git a/tests/extmod/asyncio_as_uasyncio.py b/tests/extmod/asyncio_as_uasyncio.py index 612292299c172..b021980590cdc 100644 --- a/tests/extmod/asyncio_as_uasyncio.py +++ b/tests/extmod/asyncio_as_uasyncio.py @@ -1,12 +1,33 @@ try: import uasyncio - import asyncio except ImportError: print("SKIP") raise SystemExit -x = set(dir(uasyncio)) -y = set(dir(asyncio)) - set(["event", "lock", "stream", "funcs"]) -print(x - y) -print(y - x) +# Sample of public symbols we expect to see from `asyncio`. Verify they're all +# available on `uasyncio`. +expected = [ + "CancelledError", + "create_task", + "current_task", + "Event", + "gather", + "get_event_loop", + "Lock", + "Loop", + "open_connection", + "run", + "run_until_complete", + "sleep", + "sleep_ms", + "start_server", + "StreamReader", + "StreamWriter", + "Task", + "ThreadSafeFlag", + "wait_for", +] + +for e in expected: + getattr(uasyncio, e) diff --git a/tests/extmod/asyncio_as_uasyncio.py.exp b/tests/extmod/asyncio_as_uasyncio.py.exp index 9405b80109126..e69de29bb2d1d 100644 --- a/tests/extmod/asyncio_as_uasyncio.py.exp +++ b/tests/extmod/asyncio_as_uasyncio.py.exp @@ -1,2 +0,0 @@ -set() -set() diff --git a/tests/unix/extra_coverage.py b/tests/unix/extra_coverage.py index 0ea8f7886bfff..8d8641ca9336d 100644 --- a/tests/unix/extra_coverage.py +++ b/tests/unix/extra_coverage.py @@ -10,11 +10,14 @@ data = extra_coverage() # test hashing of str/bytes that have an invalid hash +# if MICROPY_QSTR_BYTES_IN_HASH>0, then the hash will be mod 2**(n*8), whereas +# with MICROPY_QSTR_BYTES_IN_HASH==0 the hash will be the full range of +# size_t. so always % 65536 so this test works with both configurations. print(data[0], data[1]) -print(hash(data[0])) -print(hash(data[1])) -print(hash(bytes(data[0], "utf8"))) -print(hash(str(data[1], "utf8"))) +print(hash(data[0]) % 65536) +print(hash(data[1]) % 65536) +print(hash(bytes(data[0], "utf8")) % 65536) +print(hash(str(data[1], "utf8")) % 65536) # test streams stream = data[2] # has set_error and set_buf. Write always returns error diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 5ef69b267aec7..f100ceab96ab0 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -714,11 +714,13 @@ def freeze_constant_obj(self, obj_name, obj): else: obj_type = "mp_type_bytes" print( - 'static const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};' + 'static const mp_obj_str_t %s = {{&%s}, %s, %u, (const byte*)"%s"};' % ( obj_name, obj_type, - qstrutil.compute_hash(obj, config.MICROPY_QSTR_BYTES_IN_HASH), + str(qstrutil.compute_hash(obj, config.MICROPY_QSTR_BYTES_IN_HASH)) + if config.MICROPY_QSTR_BYTES_IN_HASH + else "0", len(obj), "".join(("\\x%02x" % b) for b in obj), ) @@ -1404,7 +1406,7 @@ def freeze_mpy(base_qstrs, compiled_modules): if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: continue new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + new = sorted(new.values(), key=lambda x: x[2]) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1464,20 +1466,20 @@ def freeze_mpy(base_qstrs, compiled_modules): raw_code_count = 0 raw_code_content = 0 - print() - print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") qstr_size = {"metadata": 0, "data": 0} - for _, _, _, qbytes in new: - qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) - print(" %d," % qhash) - print("};") + if config.MICROPY_QSTR_BYTES_IN_HASH: + print() + print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") + for _, _, _, qbytes in new: + qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) + print(" %d," % qhash) + qstr_size["metadata"] += config.MICROPY_QSTR_BYTES_IN_HASH + print("};") print() print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {") for _, _, _, qbytes in new: print(" %d," % len(qbytes)) - qstr_size["metadata"] += ( - config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH - ) + qstr_size["metadata"] += config.MICROPY_QSTR_BYTES_IN_LEN qstr_size["data"] += len(qbytes) print("};") print() @@ -1487,14 +1489,14 @@ def freeze_mpy(base_qstrs, compiled_modules): print(" MP_QSTRnumber_of, // previous pool size") print(" %u, // allocated entries" % qstr_pool_alloc) print(" %u, // used entries" % len(new)) - print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") + print(" true, // sorted") + if config.MICROPY_QSTR_BYTES_IN_HASH: + print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") print(" {") for _, _, qstr, qbytes in new: print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) - qstr_content += ( - config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1 - ) + qstr_content = qstr_size["metadata"] + qstr_size["data"] + 1 print(" },") print("};") @@ -1780,7 +1782,9 @@ def main(): # set config values for qstrs, and get the existing base set of qstrs if args.qstr_header: - qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + special_qstrs = qstrutil.static_qstr_list_ident + qcfgs, extra_qstrs = qstrutil.parse_input_headers([args.qstr_header]) + base_qstrs = set(special_qstrs) | set(extra_qstrs.keys()) config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"]) config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"]) else:
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: