Skip to content

Commit 71f7c2c

Browse files
andrewleechpi-anl
authored andcommitted
extmod/re: Use buffer protocol for data to search through.
Signed-off-by: Andrew Leech <andrew@alelec.net>
1 parent 43be1ae commit 71f7c2c

File tree

4 files changed

+68
-18
lines changed

4 files changed

+68
-18
lines changed

extmod/modure.c

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,13 @@ STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
8080
// no match for this group
8181
return mp_const_none;
8282
}
83-
return mp_obj_new_str_of_type(mp_obj_get_type(self->str),
83+
const mp_obj_type_t *str_type = mp_obj_get_type(self->str);
84+
if (str_type != &mp_type_str) {
85+
// bytes, bytearray etc. args should return bytes
86+
str_type = &mp_type_bytes;
87+
}
88+
89+
return mp_obj_new_str_of_type(str_type,
8490
(const byte *)start, self->caps[no * 2 + 1] - start);
8591
}
8692
MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
@@ -120,7 +126,9 @@ STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span
120126
const char *start = self->caps[no * 2];
121127
if (start != NULL) {
122128
// have a match for this group
123-
const char *begin = mp_obj_str_get_str(self->str);
129+
mp_buffer_info_t bufinfo;
130+
mp_get_buffer_raise(self->str, &bufinfo, MP_BUFFER_READ);
131+
const char *begin = bufinfo.buf;
124132
s = start - begin;
125133
e = self->caps[no * 2 + 1] - begin;
126134
}
@@ -203,9 +211,10 @@ STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
203211
self = MP_OBJ_TO_PTR(mod_re_compile(1, args));
204212
}
205213
Subject subj;
206-
size_t len;
207-
subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len);
208-
subj.end = subj.begin + len;
214+
mp_buffer_info_t bufinfo;
215+
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
216+
subj.begin_line = subj.begin = bufinfo.buf;
217+
subj.end = subj.begin + bufinfo.len;
209218
int caps_num = (self->re.sub + 1) * 2;
210219
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char *, caps_num);
211220
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
@@ -235,10 +244,15 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_search_obj, 2, 4, re_search);
235244
STATIC mp_obj_t re_split(size_t n_args, const mp_obj_t *args) {
236245
mp_obj_re_t *self = MP_OBJ_TO_PTR(args[0]);
237246
Subject subj;
238-
size_t len;
247+
mp_buffer_info_t bufinfo;
239248
const mp_obj_type_t *str_type = mp_obj_get_type(args[1]);
240-
subj.begin_line = subj.begin = mp_obj_str_get_data(args[1], &len);
241-
subj.end = subj.begin + len;
249+
if (str_type != &mp_type_str) {
250+
// bytes, bytearray etc. args should return bytes
251+
str_type = &mp_type_bytes;
252+
}
253+
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
254+
subj.begin_line = subj.begin = bufinfo.buf;
255+
subj.end = subj.begin + bufinfo.len;
242256
int caps_num = (self->re.sub + 1) * 2;
243257

244258
int maxsplit = 0;
@@ -294,11 +308,11 @@ STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
294308
// Note: flags are currently ignored
295309
}
296310

297-
size_t where_len;
298-
const char *where_str = mp_obj_str_get_data(where, &where_len);
299311
Subject subj;
300-
subj.begin_line = subj.begin = where_str;
301-
subj.end = subj.begin + where_len;
312+
mp_buffer_info_t bufinfo;
313+
mp_get_buffer_raise(where, &bufinfo, MP_BUFFER_READ);
314+
subj.begin_line = subj.begin = bufinfo.buf;
315+
subj.end = subj.begin + bufinfo.len;
302316
int caps_num = (self->re.sub + 1) * 2;
303317

304318
vstr_t vstr_return;
@@ -327,10 +341,13 @@ STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
327341
vstr_add_strn(&vstr_return, subj.begin, match->caps[0] - subj.begin);
328342

329343
// Get replacement string
330-
const char *repl = mp_obj_str_get_str((mp_obj_is_callable(replace) ? mp_call_function_1(replace, MP_OBJ_FROM_PTR(match)) : replace));
344+
mp_obj_t repl_obj = (mp_obj_is_callable(replace) ? mp_call_function_1(replace, MP_OBJ_FROM_PTR(match)) : replace);
345+
mp_get_buffer_raise(repl_obj, &bufinfo, MP_BUFFER_READ);
346+
const char *repl = bufinfo.buf;
347+
const char *repl_top = repl + bufinfo.len;
331348

332349
// Append replacement string to result, substituting any regex groups
333-
while (*repl != '\0') {
350+
while (repl < repl_top) {
334351
if (*repl == '\\') {
335352
++repl;
336353
bool is_g_format = false;
@@ -423,8 +440,11 @@ STATIC MP_DEFINE_CONST_OBJ_TYPE(
423440

424441
STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args) {
425442
(void)n_args;
426-
const char *re_str = mp_obj_str_get_str(args[0]);
427-
int size = re1_5_sizecode(re_str);
443+
444+
mp_buffer_info_t bufinfo;
445+
mp_get_buffer_raise(args[0], &bufinfo, MP_BUFFER_READ);
446+
const char *re_str = bufinfo.buf;
447+
int size = re1_5_sizecode(re_str, bufinfo.len);
428448
if (size == -1) {
429449
goto error;
430450
}
@@ -435,7 +455,7 @@ STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args) {
435455
flags = mp_obj_get_int(args[1]);
436456
}
437457
#endif
438-
int error = re1_5_compilecode(&o->re, re_str);
458+
int error = re1_5_compilecode(&o->re, re_str, bufinfo.len);
439459
if (error != 0) {
440460
error:
441461
mp_raise_ValueError(MP_ERROR_TEXT("error in regex"));

tests/extmod/ure1.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,23 @@
9696
print(m.group(0))
9797
print("===")
9898

99+
# bytearray / memoryview objects
100+
m = re.match(rb"a.", bytearray(b"ab"))
101+
print(m.group(0))
102+
m = re.match(rb"a.", memoryview(b"ab"))
103+
print(m.group(0))
104+
# While micropython supports bytearray pattern, cpython does not.
105+
# m = re.match(bytearray(b"a."), b"ab")
106+
# print(m.group(0))
107+
print("===")
108+
109+
# null chars
110+
m = re.match("ab.d", "ab\x00d")
111+
print(list(m.group(0)))
112+
m = re.match("ab\x00d", "ab\x00d")
113+
print(list(m.group(0)))
114+
print("===")
115+
99116
# escaping
100117
m = re.match(r"a\.c", "a.c")
101118
print(m.group(0) if m else "")

tests/extmod/ure_split.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,8 @@
4141
r = re.compile("^ab|cab")
4242
s = r.split("abababcabab")
4343
print(s)
44+
45+
# bytearray objects
46+
r = re.compile(b"x")
47+
s = r.split(bytearray(b"fooxbar"))
48+
print(s)

tests/extmod/ure_sub.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def A():
2929

3030
print(re.sub("a", A(), "aBCBABCDabcda."))
3131

32+
33+
def B():
34+
return bytearray(b"B")
35+
36+
37+
print(re.sub(b"a", B(), b"aBCBABCDabcda."))
38+
3239
print(
3340
re.sub(
3441
r"def\s+([a-zA-Z_][a-zA-Z_0-9]*)\s*\(\s*\):",
@@ -64,10 +71,11 @@ def A():
6471
except:
6572
print("invalid group")
6673

67-
# Module function takes str/bytes/re.
74+
# Module function takes str/bytes/re/bytearray.
6875
print(re.sub("a", "a", "a"))
6976
print(re.sub(b".", b"a", b"a"))
7077
print(re.sub(re.compile("a"), "a", "a"))
78+
print(re.sub(b"a", bytearray(b"b"), bytearray(b"a")))
7179
try:
7280
re.sub(123, "a", "a")
7381
except TypeError:

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy