diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py index e332ab94ed565..f834dac38684e 100644 --- a/py/makeqstrdata.py +++ b/py/makeqstrdata.py @@ -52,7 +52,7 @@ codepoint2name[ord("|")] = "pipe" codepoint2name[ord("~")] = "tilde" -# static qstrs, should be sorted +# static qstrs, unsorted. static_qstr_list = [ "", @@ -61,7 +61,12 @@ " ", "*", "/", + "", + "", + "", + "", "", + "", "_", "__call__", "__class__", @@ -84,6 +89,64 @@ "__repr__", "__setitem__", "__str__", + "__bool__", + "__pos__", + "__neg__", + "__invert__", + "__abs__", + "__float__", + "__complex__", + "__sizeof__", + "__lt__", + "__gt__", + "__eq__", + "__le__", + "__ge__", + "__ne__", + "__contains__", + "__iadd__", + "__isub__", + "__imul__", + "__imatmul__", + "__ifloordiv__", + "__itruediv__", + "__imod__", + "__ipow__", + "__ior__", + "__ixor__", + "__iand__", + "__ilshift__", + "__irshift__", + "__add__", + "__sub__", + "__mul__", + "__matmul__", + "__floordiv__", + "__truediv__", + "__mod__", + "__divmod__", + "__pow__", + "__or__", + "__xor__", + "__and__", + "__lshift__", + "__rshift__", + "__radd__", + "__rsub__", + "__rmul__", + "__rmatmul__", + "__rfloordiv__", + "__rtruediv__", + "__rmod__", + "__rpow__", + "__ror__", + "__rxor__", + "__rand__", + "__rlshift__", + "__rrshift__", + "__get__", + "__set__", + "__delete__", "ArithmeticError", "AssertionError", "AttributeError", @@ -257,7 +320,7 @@ def parse_input_headers(infiles): # add the qstr to the list, with order number to retain original order in file order = len(qstrs) - 300000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = Qstr(order, ident, qstr) # read the qstrs in from the input files for infile in infiles: @@ -298,17 +361,7 @@ def parse_input_headers(infiles): # add the qstr to the list, with order number to retain original order in file order = len(qstrs) - # but put special method names like __add__ at the top of list, so - # that their id's fit into a byte - if ident == "": - # Sort empty qstr above all still - order = -200000 - elif ident == "__dir__": - # Put __dir__ after empty qstr for builtin dir() to work - order = -190000 - elif ident.startswith("__"): - order -= 100000 - qstrs[ident] = (order, ident, qstr) + qstrs[ident] = Qstr(order, ident, qstr) if not qcfgs: sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") @@ -317,47 +370,72 @@ def parse_input_headers(infiles): return qcfgs, qstrs -def escape_bytes(qstr, qbytes): - if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in qstr): - # qstr is all printable ASCII so render it as-is (for easier debugging) - return qstr - else: - # qstr contains non-printable codes so render entire thing as hex pairs - return "".join(("\\x%02x" % b) for b in qbytes) +class Qstr: + cfg_bytes_len = 0 + cfg_bytes_hash = 0 + def __init__(self, order, ident, qstr): + self.order = order + self.ident = ident + self.qstr = qstr -def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): - qbytes = bytes_cons(qstr, "utf8") - qlen = len(qbytes) - qhash = compute_hash(qbytes, cfg_bytes_hash) - if qlen >= (1 << (8 * cfg_bytes_len)): - print("qstr is too long:", qstr) - assert False - qdata = escape_bytes(qstr, qbytes) - return '%d, %d, "%s"' % (qhash, qlen, qdata) + @property + def qbytes(self): + return bytes_cons(self.qstr, "utf8") + @property + def qlen(self): + if len(self.qbytes) >= (1 << (8 * Qstr.cfg_bytes_len)): + print("qstr is too long:", self.qstr) + assert False + return len(self.qbytes) -def print_qstr_data(qcfgs, qstrs): - # get config variables - cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"]) - cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"]) + @property + def qhash(self): + return compute_hash(self.qbytes, Qstr.cfg_bytes_hash) + + def _escape_bytes(self): + if all(32 <= ord(c) <= 126 and c != "\\" and c != '"' for c in self.qstr): + # qstr is all printable ASCII so render it as-is (for easier debugging) + return self.qstr + else: + # qstr contains non-printable codes so render entire thing as hex pairs + return "".join(("\\x%02x" % b) for b in self.qbytes) + + @property + def qdata(self): + return self._escape_bytes() + +def print_qstr_data(qstrs): # print out the starter of the generated C header file print("// This file was automatically generated by makeqstrdata.py") print("") # add NULL qstr with no hash or data - print('QDEF(MP_QSTRnull, 0, 0, "")') + print('QDEF0(MP_QSTRnull, 0, 0, "")') + + # split qstr values into two pools. static consts first. + q0_values = [q for q in qstrs.values() if q.order < 0] + q1_values = [q for q in qstrs.values() if q.order >= 0] - # go through each qstr and print it out - for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): - qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) - print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes)) + # go through each qstr in pool 0 and print it out. pool0 has special sort. + for q in sorted(q0_values, key=lambda x: x.qhash): + print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) + + # go through each qstr in pool 1 and print it out. pool1 is regularly sorted. + for q in sorted(q1_values, key=lambda x: x.qhash): + print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata)) def do_work(infiles): qcfgs, qstrs = parse_input_headers(infiles) - print_qstr_data(qcfgs, qstrs) + + # get config variables + Qstr.cfg_bytes_len = int(qcfgs["BYTES_IN_LEN"]) + Qstr.cfg_bytes_hash = int(qcfgs["BYTES_IN_HASH"]) + + print_qstr_data(qstrs) if __name__ == "__main__": diff --git a/py/qstr.c b/py/qstr.c index ea700566f4f7d..5cbd6407b18a6 100644 --- a/py/qstr.c +++ b/py/qstr.c @@ -74,34 +74,82 @@ size_t qstr_compute_hash(const byte *data, size_t len) { return hash; } -const qstr_hash_t mp_qstr_const_hashes[] = { +const qstr_hash_t mp_qstr_const_hashes0[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) hash, +#define QDEF0(id, hash, len, str) hash, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; -const qstr_len_t mp_qstr_const_lengths[] = { +const qstr_hash_t mp_qstr_const_hashes1[] = { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) len, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) hash, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }; +const qstr_len_t mp_qstr_const_lengths0[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) len, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_len_t mp_qstr_const_lengths1[] = { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) len, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif +}; + +const qstr_pool_t mp_qstr_special_const_pool = { + NULL, // no previous pool + 0, // no previous pool + MICROPY_ALLOC_QSTR_ENTRIES_INIT, + MP_QSTRspecial_const_number_of + 1, // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes0, + (qstr_len_t *)mp_qstr_const_lengths0, + true, + { + #ifndef NO_QSTR +#define QDEF0(id, hash, len, str) str, +#define QDEF1(id, hash, len, str) + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif + (const char *)"", // spacer for MP_QSTRspecial_const_number_of + }, +}; + const qstr_pool_t mp_qstr_const_pool = { - NULL, // no previous pool - 0, // no previous pool + (qstr_pool_t *)&mp_qstr_special_const_pool, + MP_QSTRspecial_const_number_of + 1, MICROPY_ALLOC_QSTR_ENTRIES_INIT, - MP_QSTRnumber_of, // corresponds to number of strings in array just below - (qstr_hash_t *)mp_qstr_const_hashes, - (qstr_len_t *)mp_qstr_const_lengths, + MP_QSTRnumber_of - + (MP_QSTRspecial_const_number_of + 1), // corresponds to number of strings in array just below + (qstr_hash_t *)mp_qstr_const_hashes1, + (qstr_len_t *)mp_qstr_const_lengths1, + true, // constant qstrs are sorted { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) str, +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) str, #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 #endif }, }; @@ -164,6 +212,7 @@ STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) { pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len; pool->alloc = new_alloc; pool->len = 0; + pool->sorted = false; MP_STATE_VM(last_pool) = pool; DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc); } @@ -185,7 +234,30 @@ qstr qstr_find_strn(const char *str, size_t str_len) { // search pools for the data for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) { - for (mp_uint_t at = 0, top = pool->len; at < top; at++) { + size_t low = 0; + size_t high = pool->len - 1; + + // binary search inside the pool + if (pool->sorted) { + while (high - low > 1) { + size_t mid = (low + high) / 2; + int cmp = pool->hashes[mid] - str_hash; + if (cmp > 0) { + high = mid; + } else { + low = mid; + if (MP_UNLIKELY(cmp == 0)) { + while (low > 0 && pool->hashes[low - 1] == str_hash) { + low--; + } + break; + } + } + } + } + + // sequential search for the remaining strings + for (mp_uint_t at = low; at < high + 1; at++) { if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len && memcmp(pool->qstrs[at], str, str_len) == 0) { return pool->total_prev_len + at; diff --git a/py/qstr.h b/py/qstr.h index 0ef861f33e8d5..22c234e0c4932 100644 --- a/py/qstr.h +++ b/py/qstr.h @@ -38,9 +38,21 @@ // first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr enum { #ifndef NO_QSTR -#define QDEF(id, hash, len, str) id, + +#define QDEF0(id, hash, len, str) id, +#define QDEF1(id, hash, len, str) #include "genhdr/qstrdefs.generated.h" -#undef QDEF +#undef QDEF0 +#undef QDEF1 + + MP_QSTRspecial_const_number_of, // no underscore so it can't clash with any of the above + +#define QDEF0(id, hash, len, str) +#define QDEF1(id, hash, len, str) id, + #include "genhdr/qstrdefs.generated.h" +#undef QDEF0 +#undef QDEF1 + #endif MP_QSTRnumber_of, // no underscore so it can't clash with any of the above }; @@ -71,6 +83,7 @@ typedef struct _qstr_pool_t { size_t len; qstr_hash_t *hashes; qstr_len_t *lengths; + bool sorted; const char *qstrs[]; } qstr_pool_t; diff --git a/tools/makemanifest.py b/tools/makemanifest.py index e69698d3f2340..c019f9a5058d4 100644 --- a/tools/makemanifest.py +++ b/tools/makemanifest.py @@ -415,7 +415,7 @@ def main(): b'#include "py/emitglue.h"\n' b"extern const qstr_pool_t mp_qstr_const_pool;\n" b"const qstr_pool_t mp_qstr_frozen_const_pool = {\n" - b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0\n" + b" (qstr_pool_t*)&mp_qstr_const_pool, MP_QSTRnumber_of, 0, 0, NULL, NULL, false, NULL\n" b"};\n" b'const char mp_frozen_names[] = { MP_FROZEN_STR_NAMES "\\0"};\n' b"const mp_raw_code_t *const mp_frozen_mpy_content[] = {NULL};\n" diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py index 31212fd5bdda8..d1bfa12718b5c 100755 --- a/tools/mpy-tool.py +++ b/tools/mpy-tool.py @@ -1396,8 +1396,8 @@ def freeze_mpy(base_qstrs, compiled_modules): # don't add duplicates if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new: continue - new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8")) - new = sorted(new.values(), key=lambda x: x[0]) + new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str) + new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen)) print('#include "py/mpconfig.h"') print('#include "py/objint.h"') @@ -1438,9 +1438,9 @@ def freeze_mpy(base_qstrs, compiled_modules): print("enum {") for i in range(len(new)): if i == 0: - print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1]) + print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i].ident) else: - print(" MP_QSTR_%s," % new[i][1]) + print(" MP_QSTR_%s," % new[i].ident) print("};") # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len @@ -1460,18 +1460,17 @@ def freeze_mpy(base_qstrs, compiled_modules): print() print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {") qstr_size = {"metadata": 0, "data": 0} - for _, _, _, qbytes in new: - qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH) - print(" %d," % qhash) + for q in new: + print(" %d," % q.qhash) print("};") print() print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {") - for _, _, _, qbytes in new: - print(" %d," % len(qbytes)) + for q in new: + print(" %d," % len(q.qbytes)) qstr_size["metadata"] += ( config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH ) - qstr_size["data"] += len(qbytes) + qstr_size["data"] += len(q.qbytes) print("};") print() print("extern const qstr_pool_t mp_qstr_const_pool;") @@ -1482,11 +1481,15 @@ def freeze_mpy(base_qstrs, compiled_modules): print(" %u, // used entries" % len(new)) print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,") print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,") + print(" true, // entries are sorted") print(" {") - for _, _, qstr, qbytes in new: - print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes)) + for q in new: + print(' "%s",' % q.qdata) qstr_content += ( - config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1 + config.MICROPY_QSTR_BYTES_IN_LEN + + config.MICROPY_QSTR_BYTES_IN_HASH + + len(q.qbytes) + + 1 ) print(" },") print("};") @@ -1781,6 +1784,8 @@ def main(): # Create initial list of global qstrs. global_qstrs = GlobalQStrList() + qstrutil.Qstr.cfg_bytes_len = config.MICROPY_QSTR_BYTES_IN_LEN + qstrutil.Qstr.cfg_bytes_hash = config.MICROPY_QSTR_BYTES_IN_HASH # Load all .mpy files. try: pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy