@@ -80,7 +80,13 @@ STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
80
80
// no match for this group
81
81
return mp_const_none ;
82
82
}
83
- return mp_obj_new_str_of_type (mp_obj_get_type (self -> str ),
83
+ const mp_obj_type_t * str_type = mp_obj_get_type (self -> str );
84
+ if (str_type != & mp_type_str ) {
85
+ // bytes, bytearray etc. args should return bytes
86
+ str_type = & mp_type_bytes ;
87
+ }
88
+
89
+ return mp_obj_new_str_of_type (str_type ,
84
90
(const byte * )start , self -> caps [no * 2 + 1 ] - start );
85
91
}
86
92
MP_DEFINE_CONST_FUN_OBJ_2 (match_group_obj , match_group );
@@ -120,7 +126,9 @@ STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span
120
126
const char * start = self -> caps [no * 2 ];
121
127
if (start != NULL ) {
122
128
// have a match for this group
123
- const char * begin = mp_obj_str_get_str (self -> str );
129
+ mp_buffer_info_t bufinfo ;
130
+ mp_get_buffer_raise (self -> str , & bufinfo , MP_BUFFER_READ );
131
+ const char * begin = bufinfo .buf ;
124
132
s = start - begin ;
125
133
e = self -> caps [no * 2 + 1 ] - begin ;
126
134
}
@@ -203,9 +211,10 @@ STATIC mp_obj_t ure_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
203
211
self = MP_OBJ_TO_PTR (mod_re_compile (1 , args ));
204
212
}
205
213
Subject subj ;
206
- size_t len ;
207
- subj .begin_line = subj .begin = mp_obj_str_get_data (args [1 ], & len );
208
- subj .end = subj .begin + len ;
214
+ mp_buffer_info_t bufinfo ;
215
+ mp_get_buffer_raise (args [1 ], & bufinfo , MP_BUFFER_READ );
216
+ subj .begin_line = subj .begin = bufinfo .buf ;
217
+ subj .end = subj .begin + bufinfo .len ;
209
218
int caps_num = (self -> re .sub + 1 ) * 2 ;
210
219
mp_obj_match_t * match = m_new_obj_var (mp_obj_match_t , char * , caps_num );
211
220
// cast is a workaround for a bug in msvc: it treats const char** as a const pointer instead of a pointer to pointer to const char
@@ -235,10 +244,15 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_search_obj, 2, 4, re_search);
235
244
STATIC mp_obj_t re_split (size_t n_args , const mp_obj_t * args ) {
236
245
mp_obj_re_t * self = MP_OBJ_TO_PTR (args [0 ]);
237
246
Subject subj ;
238
- size_t len ;
247
+ mp_buffer_info_t bufinfo ;
239
248
const mp_obj_type_t * str_type = mp_obj_get_type (args [1 ]);
240
- subj .begin_line = subj .begin = mp_obj_str_get_data (args [1 ], & len );
241
- subj .end = subj .begin + len ;
249
+ if (str_type != & mp_type_str ) {
250
+ // bytes, bytearray etc. args should return bytes
251
+ str_type = & mp_type_bytes ;
252
+ }
253
+ mp_get_buffer_raise (args [1 ], & bufinfo , MP_BUFFER_READ );
254
+ subj .begin_line = subj .begin = bufinfo .buf ;
255
+ subj .end = subj .begin + bufinfo .len ;
242
256
int caps_num = (self -> re .sub + 1 ) * 2 ;
243
257
244
258
int maxsplit = 0 ;
@@ -294,11 +308,11 @@ STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
294
308
// Note: flags are currently ignored
295
309
}
296
310
297
- size_t where_len ;
298
- const char * where_str = mp_obj_str_get_data (where , & where_len );
299
311
Subject subj ;
300
- subj .begin_line = subj .begin = where_str ;
301
- subj .end = subj .begin + where_len ;
312
+ mp_buffer_info_t bufinfo ;
313
+ mp_get_buffer_raise (where , & bufinfo , MP_BUFFER_READ );
314
+ subj .begin_line = subj .begin = bufinfo .buf ;
315
+ subj .end = subj .begin + bufinfo .len ;
302
316
int caps_num = (self -> re .sub + 1 ) * 2 ;
303
317
304
318
vstr_t vstr_return ;
@@ -327,10 +341,13 @@ STATIC mp_obj_t re_sub_helper(size_t n_args, const mp_obj_t *args) {
327
341
vstr_add_strn (& vstr_return , subj .begin , match -> caps [0 ] - subj .begin );
328
342
329
343
// Get replacement string
330
- const char * repl = mp_obj_str_get_str ((mp_obj_is_callable (replace ) ? mp_call_function_1 (replace , MP_OBJ_FROM_PTR (match )) : replace ));
344
+ mp_obj_t repl_obj = (mp_obj_is_callable (replace ) ? mp_call_function_1 (replace , MP_OBJ_FROM_PTR (match )) : replace );
345
+ mp_get_buffer_raise (repl_obj , & bufinfo , MP_BUFFER_READ );
346
+ const char * repl = bufinfo .buf ;
347
+ const char * repl_top = repl + bufinfo .len ;
331
348
332
349
// Append replacement string to result, substituting any regex groups
333
- while (* repl != '\0' ) {
350
+ while (repl < repl_top ) {
334
351
if (* repl == '\\' ) {
335
352
++ repl ;
336
353
bool is_g_format = false;
@@ -423,8 +440,11 @@ STATIC MP_DEFINE_CONST_OBJ_TYPE(
423
440
424
441
STATIC mp_obj_t mod_re_compile (size_t n_args , const mp_obj_t * args ) {
425
442
(void )n_args ;
426
- const char * re_str = mp_obj_str_get_str (args [0 ]);
427
- int size = re1_5_sizecode (re_str );
443
+
444
+ mp_buffer_info_t bufinfo ;
445
+ mp_get_buffer_raise (args [0 ], & bufinfo , MP_BUFFER_READ );
446
+ const char * re_str = bufinfo .buf ;
447
+ int size = re1_5_sizecode (re_str , bufinfo .len );
428
448
if (size == -1 ) {
429
449
goto error ;
430
450
}
@@ -435,7 +455,7 @@ STATIC mp_obj_t mod_re_compile(size_t n_args, const mp_obj_t *args) {
435
455
flags = mp_obj_get_int (args [1 ]);
436
456
}
437
457
#endif
438
- int error = re1_5_compilecode (& o -> re , re_str );
458
+ int error = re1_5_compilecode (& o -> re , re_str , bufinfo . len );
439
459
if (error != 0 ) {
440
460
error :
441
461
mp_raise_ValueError (MP_ERROR_TEXT ("error in regex" ));
0 commit comments