Skip to content

Commit 660c810

Browse files
committed
py/qstr: Sort qstrs by hash.
Instead of sorting by string, sort qstrs by (hash, len). This allows faster binary serach on qstr_find_strn, since it's faster to compare hashes than strings. A few strings needed to be moved to special string pool (QDEF0) because their qstr is assumed to be small (8 bit) on py/scope.c
1 parent d254396 commit 660c810

File tree

3 files changed

+21
-14
lines changed

3 files changed

+21
-14
lines changed

py/makeqstrdata.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@
6161
" ",
6262
"*",
6363
"/",
64+
"<dictcomp>",
65+
"<genexpr>",
66+
"<lambda>",
67+
"<listcomp>",
6468
"<module>",
69+
"<setcomp>",
6570
"_",
6671
"__call__",
6772
"__class__",
@@ -373,7 +378,7 @@ def print_qstr_data(qstrs):
373378
print('QDEF0(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
374379

375380
# go through each qstr in pool 1 and print it out. pool1 is regularly sorted.
376-
for q in sorted(q1_values, key=lambda x: x.qstr):
381+
for q in sorted(q1_values, key=lambda x: (x.qhash, x.qlen)):
377382
print('QDEF1(MP_QSTR_%s, %d, %d, "%s")' % (q.ident, q.qhash, q.qlen, q.qdata))
378383

379384

py/qstr.c

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -242,23 +242,25 @@ qstr qstr_find_strn(const char *str, size_t str_len) {
242242
if (pool->sorted) {
243243
while (high - low > MP_QSTR_SEARCH_THRESHOLD) {
244244
size_t mid = (low + high + 1) / 2;
245-
size_t len = pool->lengths[mid];
246-
if (len > str_len) {
247-
len = str_len;
248-
}
249-
int cmp = memcmp(pool->qstrs[mid], str, str_len);
245+
int cmp = pool->hashes[mid] - str_hash;
246+
if (cmp == 0) cmp = pool->lengths[mid] - str_len;
250247
if (cmp < 0) {
251248
low = mid;
252249
} else if (cmp > 0) {
253250
high = mid;
254251
} else {
255-
if (pool->lengths[mid] < str_len) {
256-
low = mid;
257-
} else if (pool->lengths[mid] > str_len) {
258-
high = mid;
259-
} else {
260-
return pool->total_prev_len + mid;
261-
}
252+
// avoid a rare (hash,len) collisions
253+
while (MP_UNLIKELY(
254+
pool->lengths[mid] != str_len ||
255+
memcmp(pool->qstrs[mid], str, str_len) != 0)) {
256+
mid++;
257+
if (mid > high ||
258+
pool->hashes[mid] != str_hash ||
259+
pool->lengths[mid] != str_len) {
260+
return 0;
261+
}
262+
}
263+
return pool->total_prev_len + mid;
262264
}
263265
}
264266
}

tools/mpy-tool.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1273,7 +1273,7 @@ def freeze_mpy(base_qstrs, compiled_modules):
12731273
if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
12741274
continue
12751275
new[q.qstr_esc] = qstrutil.Qstr(len(new), q.qstr_esc, q.str)
1276-
new = sorted(new.values(), key=lambda x: x.qstr)
1276+
new = sorted(new.values(), key=lambda x: (x.qhash, x.qlen))
12771277

12781278
print('#include "py/mpconfig.h"')
12791279
print('#include "py/objint.h"')

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy