Skip to content

Commit 06f22cb

Browse files
committed
When reading data from a file into a str, check if it's utf-8
Otherwise, weird stuff can happen down the line when it is print()ed, especially as it can break the webrepl of circuitpython.
1 parent 76f03a2 commit 06f22cb

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

py/stream.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "py/objstr.h"
3232
#include "py/stream.h"
3333
#include "py/runtime.h"
34+
#include "py/unicode.h"
3435
#include "supervisor/shared/translate/translate.h"
3536

3637
// This file defines generic Python stream read/write methods which
@@ -43,6 +44,13 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in);
4344

4445
#define STREAM_CONTENT_TYPE(stream) (((stream)->is_text) ? &mp_type_str : &mp_type_bytes)
4546

47+
static mp_obj_t mp_obj_new_str_from_vstr_check(const mp_obj_type_t *type, vstr_t *vstr) {
48+
if (type == &mp_type_str && !utf8_check((void *)vstr->buf, vstr->len)) {
49+
mp_raise_msg(&mp_type_UnicodeError, NULL);
50+
}
51+
return mp_obj_new_str_from_vstr(type, vstr);
52+
}
53+
4654
// Returns error condition in *errcode, if non-zero, return value is number of bytes written
4755
// before error condition occurred. If *errcode == 0, returns total bytes written (which will
4856
// be equal to input size).
@@ -201,8 +209,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl
201209
}
202210
}
203211
}
204-
205-
return mp_obj_new_str_from_vstr(&mp_type_str, &vstr);
212+
return mp_obj_new_str_from_vstr_check(&mp_type_str, &vstr);
206213
}
207214
#endif
208215

@@ -223,7 +230,7 @@ STATIC mp_obj_t stream_read_generic(size_t n_args, const mp_obj_t *args, byte fl
223230
mp_raise_OSError(error);
224231
} else {
225232
vstr.len = out_sz;
226-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
233+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
227234
}
228235
}
229236

@@ -364,7 +371,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
364371
}
365372

366373
vstr.len = total_size;
367-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
374+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
368375
}
369376

370377
// Unbuffered, inefficient implementation of readline() for raw I/O files.
@@ -417,7 +424,7 @@ STATIC mp_obj_t stream_unbuffered_readline(size_t n_args, const mp_obj_t *args)
417424
}
418425
}
419426

420-
return mp_obj_new_str_from_vstr(STREAM_CONTENT_TYPE(stream_p), &vstr);
427+
return mp_obj_new_str_from_vstr_check(STREAM_CONTENT_TYPE(stream_p), &vstr);
421428
}
422429
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_stream_unbuffered_readline_obj, 1, 2, stream_unbuffered_readline);
423430

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy