Skip to content

Commit 354d5ba

Browse files
committed
py/qstr: Add support for sorted qstr pools.
This provides a significant performance boost for qstr_find_strn, which is called a lot during parsing and loading of .mpy files, as well as interning of string objects (which happens in most string methods that return new strings). This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
1 parent 51fab44 commit 354d5ba

File tree

4 files changed

+191
-42
lines changed

4 files changed

+191
-42
lines changed

py/makeqstrdata.py

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,66 @@
220220
"values",
221221
"write",
222222
"zip",
223+
# Additional QSTRs that must have index <255 (these are not part of
224+
# the .mpy compatibility list though).
225+
"__bool__",
226+
"__pos__",
227+
"__neg__",
228+
"__invert__",
229+
"__abs__",
230+
"__float__",
231+
"__complex__",
232+
"__sizeof__",
233+
"__lt__",
234+
"__gt__",
235+
"__eq__",
236+
"__le__",
237+
"__ge__",
238+
"__ne__",
239+
"__contains__",
240+
"__iadd__",
241+
"__isub__",
242+
"__imul__",
243+
"__imatmul__",
244+
"__ifloordiv__",
245+
"__itruediv__",
246+
"__imod__",
247+
"__ipow__",
248+
"__ior__",
249+
"__ixor__",
250+
"__iand__",
251+
"__ilshift__",
252+
"__irshift__",
253+
"__add__",
254+
"__sub__",
255+
"__mul__",
256+
"__matmul__",
257+
"__floordiv__",
258+
"__truediv__",
259+
"__mod__",
260+
"__divmod__",
261+
"__pow__",
262+
"__or__",
263+
"__xor__",
264+
"__and__",
265+
"__lshift__",
266+
"__rshift__",
267+
"__radd__",
268+
"__rsub__",
269+
"__rmul__",
270+
"__rmatmul__",
271+
"__rfloordiv__",
272+
"__rtruediv__",
273+
"__rmod__",
274+
"__rpow__",
275+
"__ror__",
276+
"__rxor__",
277+
"__rand__",
278+
"__rlshift__",
279+
"__rrshift__",
280+
"__get__",
281+
"__set__",
282+
"__delete__",
223283
]
224284

225285

@@ -244,22 +304,13 @@ def esc_char(m):
244304
return re.sub(r"[^A-Za-z0-9_]", esc_char, qst)
245305

246306

307+
static_qstr_list_ident = list(map(qstr_escape, static_qstr_list))
308+
309+
247310
def parse_input_headers(infiles):
248311
qcfgs = {}
249312
qstrs = {}
250313

251-
# add static qstrs
252-
for qstr in static_qstr_list:
253-
# work out the corresponding qstr name
254-
ident = qstr_escape(qstr)
255-
256-
# don't add duplicates
257-
assert ident not in qstrs
258-
259-
# add the qstr to the list, with order number to retain original order in file
260-
order = len(qstrs) - 300000
261-
qstrs[ident] = (order, ident, qstr)
262-
263314
# read the qstrs in from the input files
264315
for infile in infiles:
265316
with open(infile, "rt") as f:
@@ -294,22 +345,12 @@ def parse_input_headers(infiles):
294345
ident = qstr_escape(qstr)
295346

296347
# don't add duplicates
348+
if ident in static_qstr_list_ident:
349+
continue
297350
if ident in qstrs:
298351
continue
299352

300-
# add the qstr to the list, with order number to retain original order in file
301-
order = len(qstrs)
302-
# but put special method names like __add__ at the top of list, so
303-
# that their id's fit into a byte
304-
if ident == "":
305-
# Sort empty qstr above all still
306-
order = -200000
307-
elif ident == "__dir__":
308-
# Put __dir__ after empty qstr for builtin dir() to work
309-
order = -190000
310-
elif ident.startswith("__"):
311-
order -= 100000
312-
qstrs[ident] = (order, ident, qstr)
353+
qstrs[ident] = (ident, qstr)
313354

314355
if not qcfgs:
315356
sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n")
@@ -348,12 +389,17 @@ def print_qstr_data(qcfgs, qstrs):
348389
print("")
349390

350391
# add NULL qstr with no hash or data
351-
print('QDEF(MP_QSTRnull, 0, 0, "")')
392+
print('QDEF0(MP_QSTRnull, 0, 0, "")')
393+
394+
# add static qstrs to the first unsorted pool
395+
for qstr in static_qstr_list:
396+
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
397+
print("QDEF0(MP_QSTR_%s, %s)" % (qstr_escape(qstr), qbytes))
352398

353-
# go through each qstr and print it out
354-
for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
399+
# add remaining qstrs to the sorted (by value) pool
400+
for ident, qstr in sorted(qstrs.values(), key=lambda x: x[1]):
355401
qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr)
356-
print("QDEF(MP_QSTR_%s, %s)" % (ident, qbytes))
402+
print("QDEF1(MP_QSTR_%s, %s)" % (ident, qbytes))
357403

358404

359405
def do_work(infiles):

py/qstr.c

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,37 +79,83 @@ size_t qstr_compute_hash(const byte *data, size_t len) {
7979
}
8080

8181
#if MICROPY_QSTR_BYTES_IN_HASH
82+
const qstr_hash_t mp_qstr_const_hashes_special[] = {
83+
#ifndef NO_QSTR
84+
#define QDEF0(id, hash, len, str) hash,
85+
#define QDEF1(id, hash, len, str)
86+
#include "genhdr/qstrdefs.generated.h"
87+
#undef QDEF0
88+
#undef QDEF1
89+
#endif
90+
};
8291
const qstr_hash_t mp_qstr_const_hashes[] = {
8392
#ifndef NO_QSTR
84-
#define QDEF(id, hash, len, str) hash,
93+
#define QDEF0(id, hash, len, str)
94+
#define QDEF1(id, hash, len, str) hash,
8595
#include "genhdr/qstrdefs.generated.h"
86-
#undef QDEF
96+
#undef QDEF0
97+
#undef QDEF1
8798
#endif
8899
};
89100
#endif
90101

102+
const qstr_len_t mp_qstr_const_lengths_special[] = {
103+
#ifndef NO_QSTR
104+
#define QDEF0(id, hash, len, str) len,
105+
#define QDEF1(id, hash, len, str)
106+
#include "genhdr/qstrdefs.generated.h"
107+
#undef QDEF0
108+
#undef QDEF1
109+
#endif
110+
};
91111
const qstr_len_t mp_qstr_const_lengths[] = {
92112
#ifndef NO_QSTR
93-
#define QDEF(id, hash, len, str) len,
113+
#define QDEF0(id, hash, len, str)
114+
#define QDEF1(id, hash, len, str) len,
94115
#include "genhdr/qstrdefs.generated.h"
95-
#undef QDEF
116+
#undef QDEF0
117+
#undef QDEF1
96118
#endif
97119
};
98120

99-
const qstr_pool_t mp_qstr_const_pool = {
121+
const qstr_pool_t mp_qstr_const_pool_special = {
100122
NULL, // no previous pool
101123
0, // no previous pool
102124
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
103-
MP_QSTRnumber_of, // corresponds to number of strings in array just below
125+
MP_QSTRnumber_of_special, // corresponds to number of strings in array just below
126+
false, // not sorted
127+
#if MICROPY_QSTR_BYTES_IN_HASH
128+
(qstr_hash_t *)mp_qstr_const_hashes_special,
129+
#endif
130+
(qstr_len_t *)mp_qstr_const_lengths_special,
131+
{
132+
#ifndef NO_QSTR
133+
#define QDEF0(id, hash, len, str) str,
134+
#define QDEF1(id, hash, len, str)
135+
#include "genhdr/qstrdefs.generated.h"
136+
#undef QDEF0
137+
#undef QDEF1
138+
#endif
139+
},
140+
};
141+
142+
const qstr_pool_t mp_qstr_const_pool = {
143+
&mp_qstr_const_pool_special,
144+
MP_QSTRnumber_of_special,
145+
MICROPY_ALLOC_QSTR_ENTRIES_INIT,
146+
MP_QSTRnumber_of - MP_QSTRnumber_of_special, // corresponds to number of strings in array just below
147+
true, // sorted
104148
#if MICROPY_QSTR_BYTES_IN_HASH
105149
(qstr_hash_t *)mp_qstr_const_hashes,
106150
#endif
107151
(qstr_len_t *)mp_qstr_const_lengths,
108152
{
109153
#ifndef NO_QSTR
110-
#define QDEF(id, hash, len, str) str,
154+
#define QDEF0(id, hash, len, str)
155+
#define QDEF1(id, hash, len, str) str,
111156
#include "genhdr/qstrdefs.generated.h"
112-
#undef QDEF
157+
#undef QDEF0
158+
#undef QDEF1
113159
#endif
114160
},
115161
};
@@ -205,6 +251,24 @@ STATIC qstr qstr_add(
205251
return MP_STATE_VM(last_pool)->total_prev_len + at;
206252
}
207253

254+
STATIC inline int qstr_strncmp(const char *a, size_t a_len, const char *b) {
255+
while (a_len && *b) {
256+
if (*a != *b) {
257+
return (int)*a - (int)*b;
258+
}
259+
a++;
260+
b++;
261+
a_len--;
262+
}
263+
if (a_len == 0 && *b) {
264+
return -1;
265+
}
266+
if (a_len && !*b) {
267+
return 1;
268+
}
269+
return 0;
270+
}
271+
208272
qstr qstr_find_strn(const char *str, size_t str_len) {
209273
// work out hash of str
210274
#if MICROPY_QSTR_BYTES_IN_HASH
@@ -213,7 +277,30 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
213277

214278
// search pools for the data
215279
for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
216-
for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
280+
size_t low = 0;
281+
size_t high = pool->len - 1;
282+
283+
// binary search inside the pool
284+
if (pool->sorted) {
285+
while (high - low > 1) {
286+
size_t mid = (low + high) / 2;
287+
int cmp = qstr_strncmp(str, str_len, pool->qstrs[mid]);// strncmp();//pool->hashes[mid] - str_hash;
288+
if (cmp < 0) {
289+
high = mid;
290+
} else {
291+
low = mid;
292+
if (MP_UNLIKELY(cmp == 0)) {
293+
// while (low > 0 && pool->hashes[low - 1] == str_hash) {
294+
// low--;
295+
// }
296+
break;
297+
}
298+
}
299+
}
300+
}
301+
302+
// sequential search for the remaining strings
303+
for (mp_uint_t at = low; at < high + 1; at++) {
217304
if (
218305
#if MICROPY_QSTR_BYTES_IN_HASH
219306
pool->hashes[at] == str_hash &&

py/qstr.h

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,21 @@
3838
// first entry in enum will be MP_QSTRnull=0, which indicates invalid/no qstr
3939
enum {
4040
#ifndef NO_QSTR
41-
#define QDEF(id, hash, len, str) id,
41+
#define QDEF0(id, hash, len, str) id,
42+
#define QDEF1(id, hash, len, str)
4243
#include "genhdr/qstrdefs.generated.h"
43-
#undef QDEF
44+
#undef QDEF0
45+
#undef QDEF1
46+
#endif
47+
MP_QSTRnumber_of_special,
48+
MP_QSTRstart_of_main = MP_QSTRnumber_of_special - 1,
49+
50+
#ifndef NO_QSTR
51+
#define QDEF0(id, hash, len, str)
52+
#define QDEF1(id, hash, len, str) id,
53+
#include "genhdr/qstrdefs.generated.h"
54+
#undef QDEF0
55+
#undef QDEF1
4456
#endif
4557
MP_QSTRnumber_of, // no underscore so it can't clash with any of the above
4658
};
@@ -70,6 +82,7 @@ typedef struct _qstr_pool_t {
7082
size_t total_prev_len;
7183
size_t alloc;
7284
size_t len;
85+
bool sorted;
7386
#if MICROPY_QSTR_BYTES_IN_HASH
7487
qstr_hash_t *hashes;
7588
#endif

tools/mpy-tool.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1406,7 +1406,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
14061406
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
14071407
continue
14081408
new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
1409-
new = sorted(new.values(), key=lambda x: x[0])
1409+
new = sorted(new.values(), key=lambda x: x[2])
14101410

14111411
print('#include "py/mpconfig.h"')
14121412
print('#include "py/objint.h"')
@@ -1489,6 +1489,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
14891489
print(" MP_QSTRnumber_of, // previous pool size")
14901490
print(" %u, // allocated entries" % qstr_pool_alloc)
14911491
print(" %u, // used entries" % len(new))
1492+
print(" true, // sorted")
14921493
if config.MICROPY_QSTR_BYTES_IN_HASH:
14931494
print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
14941495
print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
@@ -1781,7 +1782,9 @@ def main():
17811782

17821783
# set config values for qstrs, and get the existing base set of qstrs
17831784
if args.qstr_header:
1784-
qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
1785+
special_qstrs = qstrutil.static_qstr_list_ident
1786+
qcfgs, extra_qstrs = qstrutil.parse_input_headers([args.qstr_header])
1787+
base_qstrs = set(special_qstrs) | set(extra_qstrs.keys())
17851788
config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"])
17861789
config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"])
17871790
else:

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy