From 5972b4c05ffe6973820d24161f604ae8db0d299b Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Thu, 20 Mar 2014 16:47:44 +0200 Subject: [PATCH 1/3] objstr: Switch from in-object string data to ptr to separate memory area. This is pre-requisite for having efficient implementation of str<->bytes conversion, and having that efficient is required with unfortunare str vs bytes dichotomy in Python3. --- py/objstr.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/py/objstr.c b/py/objstr.c index 77cefa82bcd93..3c5cabe05fcd9 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -14,7 +14,7 @@ typedef struct _mp_obj_str_t { mp_obj_base_t base; machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c) machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte - byte data[]; + const byte *data; } mp_obj_str_t; // use this macro to extract the string hash @@ -636,10 +636,12 @@ const mp_obj_type_t bytes_type = { }; mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { - mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; o->len = len; - *data = o->data; + byte *p = m_new(byte, len + 1); + o->data = p; + *data = p; return o; } @@ -647,17 +649,22 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { assert(MP_OBJ_IS_STR(o_in)); mp_obj_str_t *o = o_in; o->hash = qstr_compute_hash(o->data, o->len); - o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + byte *p = (byte*)o->data; + p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings return o; } STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) { - mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; - o->hash = qstr_compute_hash(data, len); o->len = len; - memcpy(o->data, data, len * sizeof(byte)); - o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + if (data) { + o->hash = qstr_compute_hash(data, len); + byte *p = m_new(byte, len + 1); + o->data = p; + memcpy(p, data, len * sizeof(byte)); + p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + } return o; } From be020c27a870feff9773c348fa04be8c54873f70 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 21 Mar 2014 11:39:01 +0200 Subject: [PATCH 2/3] py: Make 'str' be a proper type, support standard constructor args. --- py/builtin.c | 10 ---------- py/objstr.c | 36 ++++++++++++++++++++++++++++++++++++ py/runtime.c | 2 +- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/py/builtin.c b/py/builtin.c index 2e0627fa5fc7a..11b86111ec2d4 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -375,16 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); -STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) { - vstr_t *vstr = vstr_new(); - mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR); - mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); - vstr_free(vstr); - return s; -} - -MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str); - // TODO: This should be type, this is just quick CPython compat hack STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) { if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) { diff --git a/py/objstr.c b/py/objstr.c index 3c5cabe05fcd9..44e84d7090bcb 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -28,6 +28,7 @@ typedef struct _mp_obj_str_t { STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); +STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len); /******************************************************************************/ /* str */ @@ -78,6 +79,40 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, } } +STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { + switch (n_args) { + case 0: + return MP_OBJ_NEW_QSTR(MP_QSTR_); + + case 1: + { + vstr_t *vstr = vstr_new(); + mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR); + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; + } + + case 2: + case 3: + { + // TODO: validate 2nd/3rd args + if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected")); + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = str_new(&str_type, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + default: + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments")); + } +} + // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -619,6 +654,7 @@ const mp_obj_type_t str_type = { { &mp_type_type }, .name = MP_QSTR_str, .print = str_print, + .make_new = str_make_new, .binary_op = str_binary_op, .getiter = mp_obj_new_str_iterator, .methods = str_type_methods, diff --git a/py/runtime.c b/py/runtime.c index c268fd5464772..2ab97ed18218c 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -102,6 +102,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_list, (mp_obj_t)&list_type }, { MP_QSTR_map, (mp_obj_t)&map_type }, { MP_QSTR_set, (mp_obj_t)&set_type }, + { MP_QSTR_str, (mp_obj_t)&str_type }, { MP_QSTR_super, (mp_obj_t)&super_type }, { MP_QSTR_tuple, (mp_obj_t)&tuple_type }, { MP_QSTR_type, (mp_obj_t)&mp_type_type }, @@ -137,7 +138,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_repr, (mp_obj_t)&mp_builtin_repr_obj }, { MP_QSTR_sorted, (mp_obj_t)&mp_builtin_sorted_obj }, { MP_QSTR_sum, (mp_obj_t)&mp_builtin_sum_obj }, - { MP_QSTR_str, (mp_obj_t)&mp_builtin_str_obj }, { MP_QSTR_bytearray, (mp_obj_t)&mp_builtin_bytearray_obj }, // built-in exceptions From 1ecea7c7539e73f105fef25da8a3bde7783da755 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 21 Mar 2014 23:46:59 +0200 Subject: [PATCH 3/3] py: Make 'bytes' be a proper type, support standard constructor args. --- py/builtin.c | 12 ------- py/objstr.c | 77 ++++++++++++++++++++++++++++++++++++++++++- py/runtime.c | 2 +- tests/basics/bytes.py | 28 ++++++++++++++++ 4 files changed, 105 insertions(+), 14 deletions(-) diff --git a/py/builtin.c b/py/builtin.c index 11b86111ec2d4..93e91072c4aa0 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -375,18 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); -// TODO: This should be type, this is just quick CPython compat hack -STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) { - if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) { - assert(0); - } - // Currently, MicroPython strings are mix between CPython byte and unicode - // strings. So, conversion is null so far. - return args[0]; -} - -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes); - STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) { return mp_obj_new_int((machine_int_t)o_in); } diff --git a/py/objstr.c b/py/objstr.c index 44e84d7090bcb..35a948700c856 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -17,6 +17,8 @@ typedef struct _mp_obj_str_t { const byte *data; } mp_obj_str_t; +const mp_obj_t mp_const_empty_bytes; + // use this macro to extract the string hash #define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } @@ -113,6 +115,75 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ } } +STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { + if (n_args == 0) { + return mp_const_empty_bytes; + } + + if (MP_OBJ_IS_STR(args[0])) { + if (n_args < 2 || n_args > 3) { + goto wrong_args; + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + if (n_args > 1) { + goto wrong_args; + } + + if (MP_OBJ_IS_SMALL_INT(args[0])) { + uint len = MP_OBJ_SMALL_INT_VALUE(args[0]); + byte *data; + + mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data); + memset(data, 0, len); + return mp_obj_str_builder_end(o); + } + + int len; + byte *data; + vstr_t *vstr = NULL; + mp_obj_t o = NULL; + // Try to create array of exact len if initializer len is known + mp_obj_t len_in = mp_obj_len_maybe(args[0]); + if (len_in == MP_OBJ_NULL) { + len = -1; + vstr = vstr_new(); + } else { + len = MP_OBJ_SMALL_INT_VALUE(len_in); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + } + + mp_obj_t iterable = rt_getiter(args[0]); + mp_obj_t item; + while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) { + if (len == -1) { + vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item)); + } else { + *data++ = MP_OBJ_SMALL_INT_VALUE(item); + } + } + + if (len == -1) { + vstr_shrink(vstr); + // TODO: Optimize, borrow buffer from vstr + len = vstr_len(vstr); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + memcpy(data, vstr_str(vstr), len); + vstr_free(vstr); + } + + return mp_obj_str_builder_end(o); + +wrong_args: + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments")); +} + // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -666,11 +737,16 @@ const mp_obj_type_t bytes_type = { { &mp_type_type }, .name = MP_QSTR_bytes, .print = str_print, + .make_new = bytes_make_new, .binary_op = str_binary_op, .getiter = mp_obj_new_bytes_iterator, .methods = str_type_methods, }; +// the zero-length bytes +STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL}; +const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj; + mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; @@ -682,7 +758,6 @@ mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **da } mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { - assert(MP_OBJ_IS_STR(o_in)); mp_obj_str_t *o = o_in; o->hash = qstr_compute_hash(o->data, o->len); byte *p = (byte*)o->data; diff --git a/py/runtime.c b/py/runtime.c index 2ab97ed18218c..4bcb91c5470f1 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = { // built-in types { MP_QSTR_bool, (mp_obj_t)&bool_type }, + { MP_QSTR_bytes, (mp_obj_t)&bytes_type }, #if MICROPY_ENABLE_FLOAT { MP_QSTR_complex, (mp_obj_t)&mp_type_complex }, #endif @@ -115,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj }, { MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj }, { MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj }, - { MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj }, { MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj }, { MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj }, { MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj }, diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py index 7d0cf22d44e13..a084bc39949c4 100644 --- a/tests/basics/bytes.py +++ b/tests/basics/bytes.py @@ -4,8 +4,36 @@ print(repr(a)) print(a[0], a[2]) print(a[-1]) +print(str(a, "utf-8")) +print(str(a, "utf-8", "ignore")) +try: + str(a, "utf-8", "ignore", "toomuch") +except TypeError: + print("TypeError") s = 0 for i in a: s += i print(s) + + +print(bytes("abc", "utf-8")) +print(bytes("abc", "utf-8", "replace")) +try: + bytes("abc") +except TypeError: + print("TypeError") +try: + bytes("abc", "utf-8", "replace", "toomuch") +except TypeError: + print("TypeError") + +print(bytes(3)) + +print(bytes([3, 2, 1])) +print(bytes(range(5))) + +def gen(): + for i in range(4): + yield i +print(bytes(gen())) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy