From f765d93981d1db0fb311cba5a2cd0b9c208a8a9f Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 8 May 2019 23:50:35 -0400 Subject: [PATCH 01/15] py/binary,objint: Add overflow checks for int to bytes conversions. For both small and long integers, raise an exception if calling struct.pack, adding an element to an array.array, or formatting an int with int.to_bytes would overflow the requested size. (Cherry-picked from CircuitPython commit 095c8440.) Signed-off-by: Angus Gratton --- py/binary.c | 20 ++++++++++++++++---- py/objint.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ py/objint.h | 1 + 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/py/binary.c b/py/binary.c index 48d3421bca963..b8e7e2471a890 100644 --- a/py/binary.c +++ b/py/binary.c @@ -441,15 +441,18 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p break; } #endif - default: + default: { + bool signed_type = is_signed(val_type); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { + mp_obj_int_buffer_overflow_check(val_in, size, signed_type); mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); return; } #endif val = mp_obj_get_int(val_in); + mp_obj_int_buffer_overflow_check(val_in, size, signed_type); // zero/sign extend if needed if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { int c = (mp_int_t)val < 0 ? 0xff : 0x00; @@ -459,6 +462,7 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p } } break; + } } mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); @@ -478,16 +482,24 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ case 'O': ((mp_obj_t *)p)[index] = val_in; break; - default: + default: { + size_t size = mp_binary_get_size('@', typecode, NULL); + bool signed_type = is_signed(typecode); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - size_t size = mp_binary_get_size('@', typecode, NULL); + mp_obj_int_buffer_overflow_check(val_in, size, signed_type); mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, size, (uint8_t *)p + index * size); return; } #endif - mp_binary_set_val_array_from_int(typecode, p, index, mp_obj_get_int(val_in)); + mp_int_t val = mp_obj_get_int(val_in); + if (val < 0 && typecode == BYTEARRAY_TYPECODE) { + val = val & 0xFF; + } + mp_obj_int_buffer_overflow_check(mp_obj_new_int(val), size, signed_type); + mp_binary_set_val_array_from_int(typecode, p, index, val); + } } } diff --git a/py/objint.c b/py/objint.c index 87d8a27852d34..7f1c618564cd5 100644 --- a/py/objint.c +++ b/py/objint.c @@ -301,6 +301,48 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co return b; } +void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { + if (is_signed) { + // edge = 1 << (nbytes * 8 - 1) + mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT, + mp_obj_new_int(1), + mp_obj_new_int(nbytes * 8 - 1)); + + // if self >= edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_MORE_EQUAL, self_in, edge) == mp_const_true) { + goto raise; + } + + // edge = -edge + edge = mp_unary_op(MP_UNARY_OP_NEGATIVE, edge); + + // if self < edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_LESS, self_in, edge) == mp_const_true) { + goto raise; + } + } else { + if (mp_obj_int_sign(self_in) < 0) { + // Negative numbers never fit in an unsigned value + goto raise; + } + + // edge = 1 << (nbytes * 8) + mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT, + mp_obj_new_int(1), + mp_obj_new_int(nbytes * 8)); + + // if self >= edge, we don't fit + if (mp_binary_op(MP_BINARY_OP_MORE_EQUAL, self_in, edge) == mp_const_true) { + goto raise; + } + } + + return; + +raise: + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE int mp_obj_int_sign(mp_obj_t self_in) { @@ -435,6 +477,8 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; + mp_obj_int_buffer_overflow_check(args[0], dlen, false); + #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (!mp_obj_is_small_int(args[0])) { overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); diff --git a/py/objint.h b/py/objint.h index 28930e35adb49..663a15ab107e2 100644 --- a/py/objint.h +++ b/py/objint.h @@ -53,6 +53,7 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co int base, const char *prefix, char base_char, char comma); char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in, int base, const char *prefix, char base_char, char comma); +void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed); mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); // Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. From 685d2b8e00b8216fcc1e4eeb951866e030d1d9c1 Mon Sep 17 00:00:00 2001 From: Dan Halbert Date: Sun, 12 May 2019 00:10:53 -0400 Subject: [PATCH 02/15] py/objint: Handle truth values; speed up smallint checks. Cherry-picked from CircuitPython commit d103ac1d. Signed-off-by: Angus Gratton --- py/binary.c | 25 ++++++++------ py/objint.c | 93 +++++++++++++++++++++++++++++++++++++---------------- py/objint.h | 5 +++ 3 files changed, 86 insertions(+), 37 deletions(-) diff --git a/py/binary.c b/py/binary.c index b8e7e2471a890..fcb76d0b65131 100644 --- a/py/binary.c +++ b/py/binary.c @@ -445,20 +445,23 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p bool signed_type = is_signed(val_type); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { + // It's a longint. mp_obj_int_buffer_overflow_check(val_in, size, signed_type); mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); return; } #endif - - val = mp_obj_get_int(val_in); - mp_obj_int_buffer_overflow_check(val_in, size, signed_type); - // zero/sign extend if needed - if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { - int c = (mp_int_t)val < 0 ? 0xff : 0x00; - memset(p, c, size); - if (struct_type == '>') { - p += size - sizeof(val); + { + val = mp_obj_get_int(val_in); + // Small int checking is separate, to be fast. + mp_small_int_buffer_overflow_check(val, size, signed_type); + // zero/sign extend if needed + if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { + int c = (is_signed(val_type) && (mp_int_t)val < 0) ? 0xff : 0x00; + memset(p, c, size); + if (struct_type == '>') { + p += size - sizeof(val); + } } } break; @@ -487,6 +490,7 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ bool signed_type = is_signed(typecode); #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { + // It's a long int. mp_obj_int_buffer_overflow_check(val_in, size, signed_type); mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, size, (uint8_t *)p + index * size); @@ -497,7 +501,8 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ if (val < 0 && typecode == BYTEARRAY_TYPECODE) { val = val & 0xFF; } - mp_obj_int_buffer_overflow_check(mp_obj_new_int(val), size, signed_type); + // Small int checking is separate, to be fast. + mp_small_int_buffer_overflow_check(val, size, signed_type); mp_binary_set_val_array_from_int(typecode, p, index, val); } } diff --git a/py/objint.c b/py/objint.c index 7f1c618564cd5..51b9482a17f66 100644 --- a/py/objint.c +++ b/py/objint.c @@ -301,6 +301,8 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co return b; } +#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { if (is_signed) { // edge = 1 << (nbytes * 8 - 1) @@ -343,6 +345,64 @@ void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_s mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); } +#endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + +void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { + // Fast path for zero. + if (val == 0) { + return; + } + if (!is_signed) { + if (val >= 0) { + // Using signed constants here, not UINT8_MAX, etc. to avoid any unintended conversions. + if (val <= 0xff) { + return; // Small values fit in any number of nbytes. + } + if (nbytes == 2 && val <= 0xffff) { + return; + } + #if !defined(__LP64__) + // 32-bit ints and pointers + if (nbytes >= 4) { + return; // Any mp_int_t will fit. + } + #else + // 64-bit ints and pointers + if (nbytes == 4 && val <= 0xffffffff) { + return; + } + if (nbytes >= 8) { + return; // Any mp_int_t will fit. + } + #endif + } // Negative, fall through to failure. + } else { + // signed + if (val >= INT8_MIN && val <= INT8_MAX) { + return; // Small values fit in any number of nbytes. + } + if (nbytes == 2 && val >= INT16_MIN && val <= INT16_MAX) { + return; + } + #if !defined(__LP64__) + // 32-bit ints and pointers + if (nbytes >= 4) { + return; // Any mp_int_t will fit. + } + #else + // 64-bit ints and pointers + if (nbytes == 4 && val >= INT32_MIN && val <= INT32_MAX) { + return; + } + if (nbytes >= 8) { + return; // Any mp_int_t will fit. + } + #endif + } // Fall through to failure. + + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE int mp_obj_int_sign(mp_obj_t self_in) { @@ -465,7 +525,6 @@ static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { // TODO: Support signed (currently behaves as if signed=(val < 0)) - bool overflow; mp_int_t dlen = n_args < 2 ? 1 : mp_obj_get_int(args[1]); if (dlen < 0) { @@ -477,38 +536,18 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - mp_obj_int_buffer_overflow_check(args[0], dlen, false); - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (!mp_obj_is_small_int(args[0])) { - overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); + mp_obj_int_buffer_overflow_check(args[0], dlen, false); + mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); } else #endif { mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); - int slen = 0; // Number of bytes to represent val - - // This logic has a twin in objint_longlong.c - if (val > 0) { - slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8; - } else if (val < -1) { - slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8; - } else { - // clz of 0 is defined, so 0 and -1 map to 0 and 1 - slen = -val; - } - - if (slen <= dlen) { - memset(data, val < 0 ? 0xFF : 0x00, dlen); - mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val); - overflow = false; - } else { - overflow = true; - } - } - - if (overflow) { - mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small")); + // Small int checking is separate, to be fast. + mp_small_int_buffer_overflow_check(val, dlen, false); + size_t l = MIN((size_t)dlen, sizeof(val)); + mp_binary_set_int(l, big_endian, data + (big_endian ? (dlen - l) : 0), val); } return mp_obj_new_bytes_from_vstr(&vstr); diff --git a/py/objint.h b/py/objint.h index 663a15ab107e2..c75e6fa834f04 100644 --- a/py/objint.h +++ b/py/objint.h @@ -53,7 +53,12 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co int base, const char *prefix, char base_char, char comma); char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in, int base, const char *prefix, char base_char, char comma); + +#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed); +#endif +void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed); + mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); // Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. From c73ef5b9d6721b17e1d79db089faaa05d7d15d12 Mon Sep 17 00:00:00 2001 From: Dan Halbert Date: Fri, 14 Feb 2020 15:12:20 -0500 Subject: [PATCH 03/15] py/objint: Implement to_bytes(..., signed=True). Cherry-picked from CircuitPython commit c592bd61. Signed-off-by: Angus Gratton --- py/objint.c | 36 +++++++++++++++++-------- tests/basics/int_bytes.py | 45 ++++++++++++++++++-------------- tests/basics/int_bytes_intbig.py | 11 +++++++- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/py/objint.c b/py/objint.c index 51b9482a17f66..3ffeda07fba66 100644 --- a/py/objint.c +++ b/py/objint.c @@ -523,36 +523,50 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) { static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes); static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj)); -static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) { - // TODO: Support signed (currently behaves as if signed=(val < 0)) - - mp_int_t dlen = n_args < 2 ? 1 : mp_obj_get_int(args[1]); +static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + enum { ARG_length, ARG_byteorder, ARG_signed }; + static const mp_arg_t allowed_args[] = { + { MP_QSTR_length, MP_ARG_REQUIRED | MP_ARG_INT, {} }, + { MP_QSTR_byteorder, MP_ARG_REQUIRED | MP_ARG_OBJ, {} }, + { MP_QSTR_signed, MP_ARG_KW_ONLY | MP_ARG_BOOL, {.u_bool = false} }, + }; + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + mp_int_t dlen = args[ARG_length].u_int; if (dlen < 0) { mp_raise_ValueError(NULL); } - bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little); + + mp_obj_t self = pos_args[0]; + bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little); + bool signed_ = args[ARG_signed].u_bool; vstr_t vstr; vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (!mp_obj_is_small_int(args[0])) { - mp_obj_int_buffer_overflow_check(args[0], dlen, false); - mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data); + if (!mp_obj_is_small_int(self)) { + mp_obj_int_buffer_overflow_check(self, dlen, signed_); + mp_obj_int_to_bytes_impl(self, big_endian, dlen, data); } else #endif { - mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]); + mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self); // Small int checking is separate, to be fast. - mp_small_int_buffer_overflow_check(val, dlen, false); + mp_small_int_buffer_overflow_check(val, dlen, signed_); size_t l = MIN((size_t)dlen, sizeof(val)); + if (val < 0) { + // Sign extend negative numbers. + memset(data, -1, dlen); + } mp_binary_set_int(l, big_endian, data + (big_endian ? (dlen - l) : 0), val); } return mp_obj_new_bytes_from_vstr(&vstr); } -static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_to_bytes_obj, 1, 4, int_to_bytes); +static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes); static const mp_rom_map_elem_t int_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) }, diff --git a/tests/basics/int_bytes.py b/tests/basics/int_bytes.py index 15c12640e951b..7a1edc2ccecd6 100644 --- a/tests/basics/int_bytes.py +++ b/tests/basics/int_bytes.py @@ -1,11 +1,16 @@ import sys print((10).to_bytes(1, "little")) +print((-10).to_bytes(1, "little", signed=True)) +# Test fitting in length that's not a power of two. +print((0x10000).to_bytes(3, "little")) print((111111).to_bytes(4, "little")) +print((-111111).to_bytes(4, "little", signed=True)) print((100).to_bytes(10, "little")) print(int.from_bytes(b"\x00\x01\0\0\0\0\0\0", "little")) print(int.from_bytes(b"\x01\0\0\0\0\0\0\0", "little")) print(int.from_bytes(b"\x00\x01\0\0\0\0\0\0", "little")) +print((-100).to_bytes(10, "little", signed=True)) # check that extra zero bytes don't change the internal int value print(int.from_bytes(bytes(20), "little") == 0) @@ -13,7 +18,9 @@ # big-endian conversion print((10).to_bytes(1, "big")) +print((-10).to_bytes(1, "big", signed=True)) print((100).to_bytes(10, "big")) +print((-100).to_bytes(10, "big", signed=True)) print(int.from_bytes(b"\0\0\0\0\0\0\0\0\0\x01", "big")) print(int.from_bytes(b"\x01\0", "big")) @@ -60,36 +67,36 @@ # negative representations -# MicroPython int.to_bytes() behaves as if signed=True for negative numbers -if "micropython" in repr(sys.implementation): - - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e) -else: - # Implement MicroPython compatible behaviour for CPython - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e, signed=i < 0) +print((-1).to_bytes(1, "little", signed=True)) +print((-1).to_bytes(3, "little", signed=True)) +print((-1).to_bytes(1, "big", signed=True)) +print((-1).to_bytes(3, "big", signed=True)) +print((-128).to_bytes(1, "big", signed=True)) +print((-32768).to_bytes(2, "big", signed=True)) +print((-(1 << 23)).to_bytes(3, "big", signed=True)) +# negative numbers should raise an error if signed=False, regardless of fitting or not +try: + (-256).to_bytes(2, "little", signed=False) +except OverflowError: + print("OverflowError") -print(to_bytes_compat(-1, 1, "little")) -print(to_bytes_compat(-1, 3, "little")) -print(to_bytes_compat(-1, 1, "big")) -print(to_bytes_compat(-1, 3, "big")) -print(to_bytes_compat(-128, 1, "big")) -print(to_bytes_compat(-32768, 2, "big")) -print(to_bytes_compat(-(1 << 23), 3, "big")) +try: + (-1).to_bytes(1, "little") +except OverflowError: + print("OverflowError") try: - print(to_bytes_compat(-129, 1, "big")) + print((-129).to_bytes(1, "big")) except OverflowError: print("OverflowError") try: - print(to_bytes_compat(-32769, 2, "big")) + print((-32769).to_bytes(2, "big")) except OverflowError: print("OverflowError") try: - print(to_bytes_compat(-(1 << 23) - 1, 2, "big")) + print((-(1 << 23) - 1).to_bytes(2, "big")) except OverflowError: print("OverflowError") diff --git a/tests/basics/int_bytes_intbig.py b/tests/basics/int_bytes_intbig.py index 13cf5d0085890..a0825c03882ef 100644 --- a/tests/basics/int_bytes_intbig.py +++ b/tests/basics/int_bytes_intbig.py @@ -1,7 +1,9 @@ import sys print((2**64).to_bytes(9, "little")) +print((-(2**64)).to_bytes(9, "little", signed=True)) print((2**64).to_bytes(9, "big")) +print((-(2**64)).to_bytes(9, "big", signed=True)) b = bytes(range(20)) @@ -25,7 +27,7 @@ except OverflowError: print("OverflowError") -# or one that it too short +# or one that is too short try: ib.to_bytes(18, "big") except OverflowError: @@ -33,6 +35,13 @@ # negative representations +# negative numbers should raise an error if signed=False +try: + (-(2**64)).to_bytes(9, "little", signed=False) +except OverflowError: + print("OverflowError") + + # MicroPython int.to_bytes() behaves as if signed=True for negative numbers if "micropython" in repr(sys.implementation): From 25f4fff249c9db23bfde856895a18ab7a8cd6535 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Thu, 21 Nov 2024 16:07:12 +1100 Subject: [PATCH 04/15] tests: Update for new int.to_bytes() behaviour. Signed-off-by: Angus Gratton --- tests/basics/int_bytes_int64.py | 17 ++++------------- tests/basics/int_bytes_intbig.py | 27 +++++++++++---------------- tests/cpydiff/types_int_to_bytes.py | 16 ---------------- 3 files changed, 15 insertions(+), 45 deletions(-) delete mode 100644 tests/cpydiff/types_int_to_bytes.py diff --git a/tests/basics/int_bytes_int64.py b/tests/basics/int_bytes_int64.py index 032dbccc5b14e..c6e062ecb44ab 100644 --- a/tests/basics/int_bytes_int64.py +++ b/tests/basics/int_bytes_int64.py @@ -36,17 +36,8 @@ # negative representations -# MicroPython int.to_bytes() behaves as if signed=True for negative numbers -if "micropython" in repr(sys.implementation): +x = -x - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e) -else: - # Implement MicroPython compatible behaviour for CPython - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e, signed=i < 0) - - -print(to_bytes_compat(-x, 8, "little")) -print(to_bytes_compat(-x, 20, "big")) -print(to_bytes_compat(-x, 20, "little")) +print(x.to_bytes(8, "little", signed=True)) +print(x.to_bytes(20, "big", signed=True)) +print(x.to_bytes(20, "little", signed=True)) diff --git a/tests/basics/int_bytes_intbig.py b/tests/basics/int_bytes_intbig.py index a0825c03882ef..3613c8fc716e4 100644 --- a/tests/basics/int_bytes_intbig.py +++ b/tests/basics/int_bytes_intbig.py @@ -33,6 +33,12 @@ except OverflowError: print("OverflowError") +# including when signed +try: + ib.to_bytes(18, "big", signed=True) +except OverflowError: + print("OverflowError") + # negative representations # negative numbers should raise an error if signed=False @@ -42,19 +48,8 @@ print("OverflowError") -# MicroPython int.to_bytes() behaves as if signed=True for negative numbers -if "micropython" in repr(sys.implementation): - - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e) -else: - # Implement MicroPython compatible behaviour for CPython - def to_bytes_compat(i, l, e): - return i.to_bytes(l, e, signed=i < 0) - - -print(to_bytes_compat(-ib, 20, "big")) -print(to_bytes_compat(ib * -ib, 40, "big")) +print((-ib).to_bytes(20, "big", signed=True)) +print((ib * -ib).to_bytes(40, "big", signed=True)) # case where an additional byte is needed for sign bit ib = (2**64) - 1 @@ -63,9 +58,9 @@ def to_bytes_compat(i, l, e): ib *= -1 try: - print(to_bytes_compat(ib, 8, "little")) + print(ib.to_bytes(8, "little", signed=True)) except OverflowError: print("OverflowError") -print(to_bytes_compat(ib, 9, "little")) -print(to_bytes_compat(ib, 9, "big")) +print(ib.to_bytes(9, "little", signed=True)) +print(ib.to_bytes(9, "big", signed=True)) diff --git a/tests/cpydiff/types_int_to_bytes.py b/tests/cpydiff/types_int_to_bytes.py deleted file mode 100644 index 6530a2a32ecb7..0000000000000 --- a/tests/cpydiff/types_int_to_bytes.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -categories: Types,int -description: ``to_bytes`` method doesn't implement signed parameter. -cause: The ``signed`` keyword-only parameter is not implemented for ``int.to_bytes()``. - -When the integer is negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=True)`` - -When the integer is non-negative, MicroPython behaves the same as CPython ``int.to_bytes(..., signed=False)``. - -(The difference is subtle, but in CPython a positive integer converted with ``signed=True`` may require one byte more in the output length, in order to fit the 0 sign bit.) - -workaround: Take care when calling ``to_bytes()`` on an integer value which may be negative. -""" - -x = -1 -print(x.to_bytes(1, "big")) From c0129ec84221b0f11275c14cac8403f13a79a085 Mon Sep 17 00:00:00 2001 From: Dan Halbert Date: Sun, 12 May 2019 11:17:29 -0400 Subject: [PATCH 05/15] py/objint: Use approx of original @godlygeek code for smallints. Fixes case where destination nbytes is non-power-of-2 size. Also adds tests. Cherry-picked from CircuitPython commit 8664a65. Signed-off-by: Angus Gratton --- py/objint.c | 61 ++++++++----------------- tests/basics/bigint_array_overflow.py | 31 +++++++++++++ tests/basics/smallint_array_overflow.py | 54 ++++++++++++++++++++++ 3 files changed, 104 insertions(+), 42 deletions(-) create mode 100644 tests/basics/bigint_array_overflow.py create mode 100644 tests/basics/smallint_array_overflow.py diff --git a/py/objint.c b/py/objint.c index 3ffeda07fba66..5386cbee01bd1 100644 --- a/py/objint.c +++ b/py/objint.c @@ -352,53 +352,30 @@ void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_sig if (val == 0) { return; } - if (!is_signed) { - if (val >= 0) { - // Using signed constants here, not UINT8_MAX, etc. to avoid any unintended conversions. - if (val <= 0xff) { - return; // Small values fit in any number of nbytes. - } - if (nbytes == 2 && val <= 0xffff) { + // Trying to store negative values in unsigned bytes falls through to failure. + if (is_signed || val >= 0) { + + if (nbytes >= sizeof(val)) { + // All non-negative N bit signed integers fit in an unsigned N bit integer. + // This case prevents shifting too far below. + return; + } + + if (is_signed) { + mp_int_t edge = ((mp_int_t)1 << (nbytes * 8 - 1)); + if (-edge <= val && val < edge) { return; } - #if !defined(__LP64__) - // 32-bit ints and pointers - if (nbytes >= 4) { - return; // Any mp_int_t will fit. - } - #else - // 64-bit ints and pointers - if (nbytes == 4 && val <= 0xffffffff) { + // Out of range, fall through to failure. + } else { + // Unsigned. We already know val >= 0. + mp_int_t edge = ((mp_int_t)1 << (nbytes * 8)); + if (val < edge) { return; } - if (nbytes >= 8) { - return; // Any mp_int_t will fit. - } - #endif - } // Negative, fall through to failure. - } else { - // signed - if (val >= INT8_MIN && val <= INT8_MAX) { - return; // Small values fit in any number of nbytes. } - if (nbytes == 2 && val >= INT16_MIN && val <= INT16_MAX) { - return; - } - #if !defined(__LP64__) - // 32-bit ints and pointers - if (nbytes >= 4) { - return; // Any mp_int_t will fit. - } - #else - // 64-bit ints and pointers - if (nbytes == 4 && val >= INT32_MIN && val <= INT32_MAX) { - return; - } - if (nbytes >= 8) { - return; // Any mp_int_t will fit. - } - #endif - } // Fall through to failure. + // Fall through to failure. + } mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); } diff --git a/tests/basics/bigint_array_overflow.py b/tests/basics/bigint_array_overflow.py new file mode 100644 index 0000000000000..6ee6907bb8e33 --- /dev/null +++ b/tests/basics/bigint_array_overflow.py @@ -0,0 +1,31 @@ +import skip_if + +skip_if.no_bigint() + +try: + from array import array +except ImportError: + print("SKIP") + raise SystemExit + + +def test_array_overflow(typecode, val): + try: + print(array(typecode, [val])) + except OverflowError: + print("OverflowError") + + +# big int -1 +test_array_overflow("Q", -(2**64) // 2**64) +test_array_overflow("L", -(2**64) // 2**64) +test_array_overflow("I", -(2**64) // 2**64) +test_array_overflow("H", -(2**64) // 2**64) +test_array_overflow("B", -(2**64) // 2**64) + +# big int 2**63 +test_array_overflow("q", 2**63) +test_array_overflow("l", 2**63) +test_array_overflow("i", 2**63) +test_array_overflow("h", 2**63) +test_array_overflow("b", 2**63) diff --git a/tests/basics/smallint_array_overflow.py b/tests/basics/smallint_array_overflow.py new file mode 100644 index 0000000000000..ea573c9cfc124 --- /dev/null +++ b/tests/basics/smallint_array_overflow.py @@ -0,0 +1,54 @@ +try: + from array import array +except ImportError: + print("SKIP") + raise SystemExit + + +def test_array_overflow(typecode, val): + try: + print(array(typecode, [val])) + except OverflowError: + print("OverflowError") + + +def test_bytearray_overflow(val): + try: + print(bytearray([val])) + except (OverflowError, ValueError): + # CircuitPython always does OverflowError + print("(OverflowError, ValueError)") + + +# small int -1 +test_array_overflow("Q", -1) +test_array_overflow("L", -1) +test_array_overflow("I", -1) +test_array_overflow("H", -1) +test_array_overflow("B", -1) + +# 0 ok +test_array_overflow("Q", 0) +test_array_overflow("L", 0) +test_array_overflow("I", 0) +test_array_overflow("H", 0) +test_array_overflow("B", 0) + +# 1 ok +test_array_overflow("Q", 1) +test_array_overflow("L", 1) +test_array_overflow("I", 1) +test_array_overflow("H", 1) +test_array_overflow("B", 1) + +# truth value conversions +test_array_overflow("b", True) +test_array_overflow("b", False) + +# similar tests for bytearrays +test_bytearray_overflow(0) +test_bytearray_overflow(1) +test_bytearray_overflow(-1) +test_bytearray_overflow(256) +test_bytearray_overflow(True) +test_bytearray_overflow(False) From ef0fb6c52d506911893f7ea549f307ef348ced8f Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 27 Nov 2024 14:24:05 +1100 Subject: [PATCH 06/15] py/objint: Restore CPython >= 3.11 defaults for int.to_bytes(). Originally added in 80c5e76 and 0b432b3, then replaced by cherry-pick in 7e4ee62, now restored. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- py/objint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/py/objint.c b/py/objint.c index 5386cbee01bd1..3920cc79a4027 100644 --- a/py/objint.c +++ b/py/objint.c @@ -503,8 +503,8 @@ static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { enum { ARG_length, ARG_byteorder, ARG_signed }; static const mp_arg_t allowed_args[] = { - { MP_QSTR_length, MP_ARG_REQUIRED | MP_ARG_INT, {} }, - { MP_QSTR_byteorder, MP_ARG_REQUIRED | MP_ARG_OBJ, {} }, + { MP_QSTR_length, MP_ARG_INT, { .u_int = 1 } }, + { MP_QSTR_byteorder, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_QSTR(MP_QSTR_big) } }, { MP_QSTR_signed, MP_ARG_KW_ONLY | MP_ARG_BOOL, {.u_bool = false} }, }; mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; @@ -543,7 +543,7 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t * return mp_obj_new_bytes_from_vstr(&vstr); } -static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 3, int_to_bytes); +static MP_DEFINE_CONST_FUN_OBJ_KW(int_to_bytes_obj, 1, int_to_bytes); static const mp_rom_map_elem_t int_locals_dict_table[] = { { MP_ROM_QSTR(MP_QSTR_from_bytes), MP_ROM_PTR(&int_from_bytes_obj) }, From 8e1b0da862b0359da01855abab1d9d1951511ddc Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 27 Nov 2024 15:06:05 +1100 Subject: [PATCH 07/15] py/binary: Restore MP 1.x array constructor behaviour. - CPython and CircuitPython both raise OverflowError if an array constructor passes an out of bounds value. - MicroPython V1.x truncates the integer to suit. - The plan is for MicroPython V2 to change this to be the same as CircuitPython. The bounds checking cherry-picked from CircuitPython in 50bd33b and d352a73 adds these checks to array constructors. Move them behind a macro guard, and also rewrite the tests from CircuitPython to pass on MicroPython V1.x (but should be easy to convert over for MP2 in the future). This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- py/binary.c | 19 +++++- tests/basics/array_limits_intbig.py | 78 +++++++++++++++++++++++++ tests/basics/bigint_array_overflow.py | 31 ---------- tests/basics/smallint_array_overflow.py | 54 ----------------- 4 files changed, 96 insertions(+), 86 deletions(-) create mode 100644 tests/basics/array_limits_intbig.py delete mode 100644 tests/basics/bigint_array_overflow.py delete mode 100644 tests/basics/smallint_array_overflow.py diff --git a/py/binary.c b/py/binary.c index fcb76d0b65131..8ebda284f04f8 100644 --- a/py/binary.c +++ b/py/binary.c @@ -42,6 +42,10 @@ #define alignof(type) offsetof(struct { char c; type t; }, t) #endif +// MicroPython V1.x truncates integers when writing into arrays, +// MicroPython V2 will raise OverflowError in these cases, same as CPython +#define OVERFLOW_CHECKS MICROPY_PREVIEW_VERSION_2 + size_t mp_binary_get_size(char struct_type, char val_type, size_t *palign) { size_t size = 0; int align = 1; @@ -442,22 +446,29 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p } #endif default: { + #if OVERFLOW_CHECKS bool signed_type = is_signed(val_type); + #endif #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { // It's a longint. + #if OVERFLOW_CHECKS mp_obj_int_buffer_overflow_check(val_in, size, signed_type); + #endif mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); return; } #endif { val = mp_obj_get_int(val_in); + + #if OVERFLOW_CHECKS // Small int checking is separate, to be fast. mp_small_int_buffer_overflow_check(val, size, signed_type); + #endif // zero/sign extend if needed if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { - int c = (is_signed(val_type) && (mp_int_t)val < 0) ? 0xff : 0x00; + int c = (mp_int_t)val < 0 ? 0xff : 0x00; memset(p, c, size); if (struct_type == '>') { p += size - sizeof(val); @@ -487,11 +498,15 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ break; default: { size_t size = mp_binary_get_size('@', typecode, NULL); + #if OVERFLOW_CHECKS bool signed_type = is_signed(typecode); + #endif #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE if (mp_obj_is_exact_type(val_in, &mp_type_int)) { // It's a long int. + #if OVERFLOW_CHECKS mp_obj_int_buffer_overflow_check(val_in, size, signed_type); + #endif mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, size, (uint8_t *)p + index * size); return; @@ -501,8 +516,10 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ if (val < 0 && typecode == BYTEARRAY_TYPECODE) { val = val & 0xFF; } + #if OVERFLOW_CHECKS // Small int checking is separate, to be fast. mp_small_int_buffer_overflow_check(val, size, signed_type); + #endif mp_binary_set_val_array_from_int(typecode, p, index, val); } } diff --git a/tests/basics/array_limits_intbig.py b/tests/basics/array_limits_intbig.py new file mode 100644 index 0000000000000..50eb00a0ff613 --- /dev/null +++ b/tests/basics/array_limits_intbig.py @@ -0,0 +1,78 @@ +# Test behaviour when array module is provided out of bounds values +# +# This test is intended to also pass on CPython. + +try: + from array import array +except ImportError: + print("SKIP") + raise SystemExit + +import unittest + +# MicroPython V2.0 will enforce bounds on items (same as CPython), V1.x truncates +# +# Note: once Date: Wed, 27 Nov 2024 16:18:00 +1100 Subject: [PATCH 08/15] py/objint,py/binary: Reduce code size of int to byte conversions. Refactors similar code paths to a common mp_obj_int_to_bytes() function to reduce code size. This commit should have no functional changes. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton Signed-off-by: Angus Gratton --- extmod/moductypes.c | 4 +- ports/unix/modffi.c | 2 +- py/binary.c | 83 +++++++---------------- py/binary.h | 2 +- py/mpz.c | 12 +--- py/mpz.h | 3 +- py/obj.c | 2 +- py/objint.c | 152 ++++++++++++++++++++++++++++++++++--------- py/objint.h | 10 +-- py/objint_longlong.c | 36 ---------- py/objint_mpz.c | 6 -- 11 files changed, 157 insertions(+), 155 deletions(-) diff --git a/extmod/moductypes.c b/extmod/moductypes.c index eb72f441bbbce..36aed22905093 100644 --- a/extmod/moductypes.c +++ b/extmod/moductypes.c @@ -451,8 +451,8 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set if (self->flags == LAYOUT_NATIVE) { set_aligned_basic(val_type & 6, self->addr + offset, val); } else { - mp_binary_set_int(GET_SCALAR_SIZE(val_type & 7), self->flags == LAYOUT_BIG_ENDIAN, - self->addr + offset, val); + size_t item_size = GET_SCALAR_SIZE(val_type & 7); + mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN); } return set_val; // just !MP_OBJ_NULL } diff --git a/ports/unix/modffi.c b/ports/unix/modffi.c index b469e932e0d5a..c16d40ad3b485 100644 --- a/ports/unix/modffi.c +++ b/ports/unix/modffi.c @@ -446,7 +446,7 @@ static unsigned long long ffi_get_int_value(mp_obj_t o) { return MP_OBJ_SMALL_INT_VALUE(o); } else { unsigned long long res; - mp_obj_int_to_bytes_impl(o, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res); + mp_obj_int_to_bytes(o, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false); return res; } } diff --git a/py/binary.c b/py/binary.c index 8ebda284f04f8..3642a80355eb6 100644 --- a/py/binary.c +++ b/py/binary.c @@ -374,7 +374,21 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte * } } -void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val) { +void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian) { + if (dest_sz > val_sz) { + // zero/sign extension if needed + int c = ((mp_int_t)val < 0) ? 0xff : 0x00; + memset(dest, c, dest_sz); + + // big endian: write val_sz bytes at end of 'dest' + if (big_endian) { + dest += dest_sz - val_sz; + } + } else if (dest_sz < val_sz) { + // truncate 'val' into 'dest' + val_sz = dest_sz; + } + if (MP_ENDIANNESS_LITTLE && !big_endian) { memcpy(dest, &val, val_sz); } else if (MP_ENDIANNESS_BIG && big_endian) { @@ -438,48 +452,21 @@ void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p val = fp_dp.i64; } else { int be = struct_type == '>'; - mp_binary_set_int(sizeof(uint32_t), be, p, fp_dp.i32[MP_ENDIANNESS_BIG ^ be]); + mp_binary_set_int(sizeof(uint32_t), p, sizeof(uint32_t), fp_dp.i32[MP_ENDIANNESS_BIG ^ be], be); + // Now fall through and copy the second word, below p += sizeof(uint32_t); + size = sizeof(uint32_t); val = fp_dp.i32[MP_ENDIANNESS_LITTLE ^ be]; } break; } #endif - default: { - #if OVERFLOW_CHECKS - bool signed_type = is_signed(val_type); - #endif - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - // It's a longint. - #if OVERFLOW_CHECKS - mp_obj_int_buffer_overflow_check(val_in, size, signed_type); - #endif - mp_obj_int_to_bytes_impl(val_in, struct_type == '>', size, p); - return; - } - #endif - { - val = mp_obj_get_int(val_in); - - #if OVERFLOW_CHECKS - // Small int checking is separate, to be fast. - mp_small_int_buffer_overflow_check(val, size, signed_type); - #endif - // zero/sign extend if needed - if (MP_BYTES_PER_OBJ_WORD < 8 && size > sizeof(val)) { - int c = (mp_int_t)val < 0 ? 0xff : 0x00; - memset(p, c, size); - if (struct_type == '>') { - p += size - sizeof(val); - } - } - } - break; - } + default: + mp_obj_int_to_bytes(val_in, size, p, struct_type == '>', is_signed(val_type), OVERFLOW_CHECKS); + return; } - mp_binary_set_int(MIN((size_t)size, sizeof(val)), struct_type == '>', p, val); + mp_binary_set_int(size, p, sizeof(val), val, struct_type == '>'); } void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in) { @@ -498,29 +485,9 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ break; default: { size_t size = mp_binary_get_size('@', typecode, NULL); - #if OVERFLOW_CHECKS - bool signed_type = is_signed(typecode); - #endif - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (mp_obj_is_exact_type(val_in, &mp_type_int)) { - // It's a long int. - #if OVERFLOW_CHECKS - mp_obj_int_buffer_overflow_check(val_in, size, signed_type); - #endif - mp_obj_int_to_bytes_impl(val_in, MP_ENDIANNESS_BIG, - size, (uint8_t *)p + index * size); - return; - } - #endif - mp_int_t val = mp_obj_get_int(val_in); - if (val < 0 && typecode == BYTEARRAY_TYPECODE) { - val = val & 0xFF; - } - #if OVERFLOW_CHECKS - // Small int checking is separate, to be fast. - mp_small_int_buffer_overflow_check(val, size, signed_type); - #endif - mp_binary_set_val_array_from_int(typecode, p, index, val); + p = (uint8_t *)p + index * size; + mp_obj_int_to_bytes(val_in, size, p, MP_ENDIANNESS_BIG, is_signed(typecode), OVERFLOW_CHECKS); + return; } } } diff --git a/py/binary.h b/py/binary.h index 5c645bcaaa9c2..80a37daeaa627 100644 --- a/py/binary.h +++ b/py/binary.h @@ -41,6 +41,6 @@ void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_i mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte **ptr); void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p_base, byte **ptr); long long mp_binary_get_int(size_t size, bool is_signed, bool big_endian, const byte *src); -void mp_binary_set_int(size_t val_sz, bool big_endian, byte *dest, mp_uint_t val); +void mp_binary_set_int(size_t dest_sz, byte *dest, size_t val_sz, mp_uint_t val, bool big_endian); #endif // MICROPY_INCLUDED_PY_BINARY_H diff --git a/py/mpz.c b/py/mpz.c index 5a4d7d27d94d5..6b4bee943694b 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -1592,7 +1592,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { +void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1602,7 +1602,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b mpz_dbl_dig_t d = 0; mpz_dbl_dig_t carry = 1; size_t olen = len; // bytes in output buffer - bool ok = true; for (size_t zlen = z->len; zlen > 0; --zlen) { bits += DIG_SIZE; d = (d << DIG_SIZE) | *zdig++; @@ -1615,7 +1614,6 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b if (!olen) { // Buffer is full, only OK if all remaining bytes are zeroes - ok = ok && ((byte)val == 0); continue; } @@ -1628,16 +1626,10 @@ bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, b } } - if (as_signed && olen == 0 && len > 0) { - // If output exhausted then ensure there was enough space for the sign bit - byte most_sig = big_endian ? buf[0] : buf[len - 1]; - ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg; - } else { + if (!(as_signed && olen == 0 && len > 0)) { // fill remainder of buf with zero/sign extension of the integer memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen); } - - return ok; } #if MICROPY_PY_BUILTINS_FLOAT diff --git a/py/mpz.h b/py/mpz.h index 6f1ac930b0214..d38c4b1c0c09d 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -145,8 +145,7 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) { mp_int_t mpz_hash(const mpz_t *z); bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value); bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value); -// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. -bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); +void mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf); #if MICROPY_PY_BUILTINS_FLOAT mp_float_t mpz_as_float(const mpz_t *z); #endif diff --git a/py/obj.c b/py/obj.c index 586759460762b..080795d511526 100644 --- a/py/obj.c +++ b/py/obj.c @@ -338,7 +338,7 @@ long long mp_obj_get_ll(mp_const_obj_t arg) { return MP_OBJ_SMALL_INT_VALUE(arg); } else { long long res; - mp_obj_int_to_bytes_impl((mp_obj_t)arg, MP_ENDIANNESS_BIG, sizeof(res), (byte *)&res); + mp_obj_int_to_bytes((mp_obj_t)arg, sizeof(res), (byte *)&res, MP_ENDIANNESS_BIG, false, false); return res; } } diff --git a/py/objint.c b/py/objint.c index 3920cc79a4027..d7329316a7258 100644 --- a/py/objint.c +++ b/py/objint.c @@ -39,6 +39,11 @@ #include #endif +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ +// Generally this implementation lives in objint_mpz.c, but some small functions inlined here... +#include "py/mpz.h" +#endif + // This dispatcher function is expected to be independent of the implementation of long int static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) { (void)type_in; @@ -99,8 +104,8 @@ static mp_fp_as_int_class_t mp_classify_fp_as_int(mp_float_t val) { #elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE e = u.i[MP_ENDIANNESS_LITTLE]; #endif -#define MP_FLOAT_SIGN_SHIFT_I32 ((MP_FLOAT_FRAC_BITS + MP_FLOAT_EXP_BITS) % 32) -#define MP_FLOAT_EXP_SHIFT_I32 (MP_FLOAT_FRAC_BITS % 32) + #define MP_FLOAT_SIGN_SHIFT_I32 ((MP_FLOAT_FRAC_BITS + MP_FLOAT_EXP_BITS) % 32) + #define MP_FLOAT_EXP_SHIFT_I32 (MP_FLOAT_FRAC_BITS % 32) if (e & (1U << MP_FLOAT_SIGN_SHIFT_I32)) { #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE @@ -199,10 +204,10 @@ static const uint8_t log_base2_floor[] = { 3, 3, 3, 3, 3, 3, 3, 4, /* if needed, these are the values for higher bases - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 4, - 4, 4, 4, 5 + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 4, + 4, 4, 4, 5 */ }; @@ -301,9 +306,9 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co return b; } -#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ -void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { +static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { if (is_signed) { // edge = 1 << (nbytes * 8 - 1) mp_obj_t edge = mp_binary_op(MP_BINARY_OP_INPLACE_LSHIFT, @@ -344,10 +349,82 @@ void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_s raise: mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); } +#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ + +#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + +// Same as the general mp_small_int_buffer_overflow_check() below, but using 64-bit integers +static void longint_buffer_overflow_check(mp_longint_impl_t val, size_t nbytes, bool is_signed) { + // Fast path for zero. + if (val == 0) { + return; + } + // Trying to store negative values in unsigned bytes falls through to failure. + if (is_signed || val >= 0) { + + if (nbytes >= sizeof(val)) { + // All non-negative N bit signed integers fit in an unsigned N bit integer. + // This case prevents shifting too far below. + return; + } + + if (is_signed) { + mp_longint_impl_t edge = 1LL << (nbytes * 8 - 1); + if (-edge <= val && val < edge) { + return; + } + // Out of range, fall through to failure. + } else { + // Unsigned. We already know val >= 0. + mp_longint_impl_t edge = 1LL << (nbytes * 8); + if (val < edge) { + return; + } + } + // Fall through to failure. + } + + mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); +} + +static void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed) { + const mp_obj_int_t *self = self_in; + mp_longint_impl_t val = self->val; + longint_buffer_overflow_check(val, nbytes, is_signed); +} -#endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE +// save some code size by calling into the longint version for both sizes of int +static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { + longint_buffer_overflow_check((mp_longint_impl_t)val, nbytes, is_signed); +} + +// Placed here rather than objint_longlong.c for code size reasons +static void longint_to_bytes(mp_obj_int_t *self, bool big_endian, size_t len, byte *buf) { + MP_STATIC_ASSERT(sizeof(mp_uint_t) == 4); + long long val = self->val; + mp_uint_t lower = val; + mp_uint_t upper = (val >> 32); + + if (big_endian) { + if (len > 4) { + // write the least significant 4 bytes at the end + mp_binary_set_int(4, buf + len - 4, sizeof(lower), lower, true); + } + // write most significant bytes at the start, extending if necessary + mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(upper), upper, true); + } else { + // write the least significant 4 bytes at the start + mp_binary_set_int(len > 4 ? len - 4 : len, buf, sizeof(lower), lower, false); + if (len > 4) { + // write the most significant bytes at the end, extending if necessary + mp_binary_set_int(len - 4, buf + 4, sizeof(upper), upper, false); + } + } +} + +#else -void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { +static void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed) { // Fast path for zero. if (val == 0) { return; @@ -380,6 +457,34 @@ void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_sig mp_raise_msg_varg(&mp_type_OverflowError, MP_ERROR_TEXT("value would overflow a %d byte buffer"), nbytes); } +#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG + +void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check) { + #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + if (mp_obj_is_exact_type(self_in, &mp_type_int)) { + if (overflow_check) { + mp_obj_int_buffer_overflow_check(self_in, buf_len, is_signed); + } + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ + mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); + mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, buf_len, buf); + #else // MICROPY_LONGINT_IMPL_LONGLONG + longint_to_bytes(self_in, big_endian, buf_len, buf); + #endif + return; + } + #endif // MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE + + // self_in is either a smallint, or another type convertible to mp_int_t (i.e. bool) + + mp_int_t val = mp_obj_get_int(self_in); + if (overflow_check) { + mp_small_int_buffer_overflow_check(val, buf_len, is_signed); + } + mp_binary_set_int(buf_len, buf, sizeof(val), val, big_endian); +} + + #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE int mp_obj_int_sign(mp_obj_t self_in) { @@ -510,36 +615,21 @@ static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t * mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + mp_obj_t self = pos_args[0]; + mp_int_t dlen = args[ARG_length].u_int; if (dlen < 0) { mp_raise_ValueError(NULL); } - mp_obj_t self = pos_args[0]; - bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little); - bool signed_ = args[ARG_signed].u_bool; - vstr_t vstr; vstr_init_len(&vstr, dlen); byte *data = (byte *)vstr.buf; - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - if (!mp_obj_is_small_int(self)) { - mp_obj_int_buffer_overflow_check(self, dlen, signed_); - mp_obj_int_to_bytes_impl(self, big_endian, dlen, data); - } else - #endif - { - mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self); - // Small int checking is separate, to be fast. - mp_small_int_buffer_overflow_check(val, dlen, signed_); - size_t l = MIN((size_t)dlen, sizeof(val)); - if (val < 0) { - // Sign extend negative numbers. - memset(data, -1, dlen); - } - mp_binary_set_int(l, big_endian, data + (big_endian ? (dlen - l) : 0), val); - } + bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little); + bool signed_ = args[ARG_signed].u_bool; + + mp_obj_int_to_bytes(self, dlen, data, big_endian, signed_, true); return mp_obj_new_bytes_from_vstr(&vstr); } diff --git a/py/objint.h b/py/objint.h index c75e6fa834f04..f6a551ea96143 100644 --- a/py/objint.h +++ b/py/objint.h @@ -54,15 +54,11 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in, int base, const char *prefix, char base_char, char comma); -#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE -void mp_obj_int_buffer_overflow_check(mp_obj_t self_in, size_t nbytes, bool is_signed); -#endif -void mp_small_int_buffer_overflow_check(mp_int_t val, size_t nbytes, bool is_signed); - mp_int_t mp_obj_int_hash(mp_obj_t self_in); mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf); -// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise. -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf); +// Write an integer to a byte sequence. +// If overflow_check is true, raises OverflowError if 'self_in' doesn't fit. If false, truncate to fit. +void mp_obj_int_to_bytes(mp_obj_t self_in, size_t buf_len, byte *buf, bool big_endian, bool is_signed, bool overflow_check); int mp_obj_int_sign(mp_obj_t self_in); mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in); mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in); diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 22ac0ba12efa3..b0e3f5ac740ff 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -62,42 +62,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return mp_obj_new_int_from_ll(value); } -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_exact_type(self_in, &mp_type_int)); - mp_obj_int_t *self = self_in; - long long val = self->val; - size_t slen; // Number of bytes to represent val - - // This logic has a twin in objint.c - if (val > 0) { - slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8; - } else if (val < -1) { - slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8; - } else { - // clz of 0 is defined, so 0 and -1 map to 0 and 1 - slen = -val; - } - - if (slen > len) { - return false; // Would overflow - // TODO: Determine whether to copy and truncate, as some callers probably expect this...? - } - - if (big_endian) { - byte *b = buf + len; - while (b > buf) { - *--b = val; - val >>= 8; - } - } else { - for (; len > 0; --len) { - *buf++ = val; - val >>= 8; - } - } - return true; -} - int mp_obj_int_sign(mp_obj_t self_in) { mp_longint_impl_t val; if (mp_obj_is_small_int(self_in)) { diff --git a/py/objint_mpz.c b/py/objint_mpz.c index 6f2ea616c779c..895ed17e5dc5b 100644 --- a/py/objint_mpz.c +++ b/py/objint_mpz.c @@ -112,12 +112,6 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf return MP_OBJ_FROM_PTR(o); } -bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) { - assert(mp_obj_is_exact_type(self_in, &mp_type_int)); - mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in); - return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf); -} - int mp_obj_int_sign(mp_obj_t self_in) { if (mp_obj_is_small_int(self_in)) { mp_int_t val = MP_OBJ_SMALL_INT_VALUE(self_in); From df47cbba2f8e3152f4c666b11c61221fe5b8c234 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 17 Dec 2024 10:20:11 +1100 Subject: [PATCH 09/15] moductypes: Add OverflowError on truncate for MP V2. Expands tests to match, although I think there may still be some corner cases where this doesn't work as expected on V2. Should be no change on V1. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- extmod/moductypes.c | 94 +++++++++++++++++--- tests/extmod/uctypes_array_load_store.py | 76 ++++++++++++++-- tests/extmod/uctypes_array_load_store.py.exp | 24 ----- 3 files changed, 151 insertions(+), 43 deletions(-) delete mode 100644 tests/extmod/uctypes_array_load_store.py.exp diff --git a/extmod/moductypes.c b/extmod/moductypes.c index 36aed22905093..776df16a96d56 100644 --- a/extmod/moductypes.c +++ b/extmod/moductypes.c @@ -72,9 +72,12 @@ enum { #define TYPE2SMALLINT(x, nbits) ((((int)x) << (32 - nbits)) >> 1) #define GET_TYPE(x, nbits) (((x) >> (31 - nbits)) & ((1 << nbits) - 1)) // Bit 0 is "is_signed" -#define GET_SCALAR_SIZE(val_type) (1 << ((val_type) >> 1)) +#define GET_SCALAR_SIZE(val_type) (1 << (((val_type) & 7) >> 1)) #define VALUE_MASK(type_nbits) ~((int)0x80000000 >> type_nbits) +#define INT_TYPE_IS_SIGNED(TYPE) ((TYPE) & 1) +#define INT_TYPE_TO_UNSIGNED(TYPE) ((TYPE) & 6) + #define IS_SCALAR_ARRAY(tuple_desc) ((tuple_desc)->len == 2) // We cannot apply the below to INT8, as their range [-128, 127] #define IS_SCALAR_ARRAY_OF_BYTES(tuple_desc) (GET_TYPE(MP_OBJ_SMALL_INT_VALUE((tuple_desc)->items[1]), VAL_TYPE_BITS) == UINT8) @@ -137,7 +140,7 @@ static inline mp_uint_t uctypes_struct_scalar_size(int val_type) { if (val_type == FLOAT32) { return 4; } else { - return GET_SCALAR_SIZE(val_type & 7); + return GET_SCALAR_SIZE(val_type); } } @@ -305,6 +308,12 @@ static inline mp_uint_t get_aligned_basic(uint val_type, void *p) { return 0; } +#if MICROPY_PREVIEW_VERSION_2 +static void raise_overflow_exception(void) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("value would truncate")); +} +#endif + static inline void set_aligned_basic(uint val_type, void *p, mp_uint_t v) { switch (val_type) { case UINT8: @@ -361,7 +370,58 @@ static void set_aligned(uint val_type, void *p, mp_int_t index, mp_obj_t val) { return; } #endif + + // Special case where mp_int_t can't hold the target type, fall through + if (sizeof(mp_int_t) < 8 && (val_type == INT64 || val_type == UINT64)) { + // Doesn't offer atomic store semantics, but should at least try + set_unaligned(val_type, (void *)&((uint64_t *)p)[index], MP_ENDIANNESS_BIG, val); + return; + } + + #if MICROPY_PREVIEW_VERSION_2 + // V2 raises exception if setting int will truncate + mp_int_t v; + bool ok = mp_obj_get_int_maybe(val, &v); + if (ok) { + switch (val_type) { + case UINT8: + ok = (v == (uint8_t)v); + break; + case INT8: + ok = (v == (int8_t)v); + break; + case UINT16: + ok = (v == (uint16_t)v); + break; + case INT16: + ok = (v == (int16_t)v); + break; + case UINT32: + ok = (v == (uint32_t)v); + break; + case INT32: + ok = (v == (int32_t)v); + break; + case UINT64: + assert(sizeof(mp_int_t) == 8); + ok = v >= 0; + break; + case INT64: + assert(sizeof(mp_int_t) == 8); + break; + default: + assert(0); + ok = false; + } + if (!ok) { + raise_overflow_exception(); + } + } + + #else mp_int_t v = mp_obj_get_int_truncated(val); + #endif + switch (val_type) { case UINT8: ((uint8_t *)p)[index] = (uint8_t)v; @@ -383,12 +443,8 @@ static void set_aligned(uint val_type, void *p, mp_int_t index, mp_obj_t val) { return; case INT64: case UINT64: - if (sizeof(mp_int_t) == 8) { - ((uint64_t *)p)[index] = (uint64_t)v; - } else { - // TODO: Doesn't offer atomic store semantics, but should at least try - set_unaligned(val_type, (void *)&((uint64_t *)p)[index], MP_ENDIANNESS_BIG, val); - } + assert(sizeof(mp_int_t) == 8); + ((uint64_t *)p)[index] = (uint64_t)v; return; default: assert(0); @@ -430,28 +486,38 @@ static mp_obj_t uctypes_struct_attr_op(mp_obj_t self_in, qstr attr, mp_obj_t set offset &= (1 << OFFSET_BITS) - 1; mp_uint_t val; if (self->flags == LAYOUT_NATIVE) { - val = get_aligned_basic(val_type & 6, self->addr + offset); + val = get_aligned_basic(INT_TYPE_TO_UNSIGNED(val_type), self->addr + offset); } else { - val = mp_binary_get_int(GET_SCALAR_SIZE(val_type & 7), val_type & 1, self->flags, self->addr + offset); + val = mp_binary_get_int(GET_SCALAR_SIZE(val_type), INT_TYPE_IS_SIGNED(val_type), + self->flags, self->addr + offset); } if (set_val == MP_OBJ_NULL) { val >>= bit_offset; val &= (1 << bit_len) - 1; // TODO: signed - assert((val_type & 1) == 0); + assert(!INT_TYPE_IS_SIGNED(val_type)); return mp_obj_new_int(val); } else { - mp_uint_t set_val_int = (mp_uint_t)mp_obj_get_int(set_val); mp_uint_t mask = (1 << bit_len) - 1; + mp_uint_t set_val_int; + + #if MICROPY_PREVIEW_VERSION_2 + if (!mp_obj_get_int_maybe(set_val, (mp_int_t *)&set_val_int) || (set_val_int & mask) != set_val_int) { + raise_overflow_exception(); + } + #else + set_val_int = (mp_uint_t)mp_obj_get_int(set_val); + #endif + set_val_int &= mask; set_val_int <<= bit_offset; mask <<= bit_offset; val = (val & ~mask) | set_val_int; if (self->flags == LAYOUT_NATIVE) { - set_aligned_basic(val_type & 6, self->addr + offset, val); + set_aligned_basic(INT_TYPE_TO_UNSIGNED(val_type), self->addr + offset, val); } else { - size_t item_size = GET_SCALAR_SIZE(val_type & 7); + size_t item_size = GET_SCALAR_SIZE(val_type); mp_binary_set_int(item_size, self->addr + offset, item_size, val, self->flags == LAYOUT_BIG_ENDIAN); } return set_val; // just !MP_OBJ_NULL diff --git a/tests/extmod/uctypes_array_load_store.py b/tests/extmod/uctypes_array_load_store.py index df7deb6837a17..063dbba0bde19 100644 --- a/tests/extmod/uctypes_array_load_store.py +++ b/tests/extmod/uctypes_array_load_store.py @@ -1,4 +1,5 @@ # Test uctypes array, load and store, with array size > 1 +import unittest try: import uctypes @@ -13,14 +14,79 @@ print("SKIP") raise SystemExit +# MicroPython V2.0 will enforce bounds on uctypes items, V1.x truncates +import sys + +is_v2 = hasattr(sys.implementation, "_v2") + N = 5 +PLACEHOLDER = 99 + + +class Test(unittest.TestCase): + def test_native_endian(self): + self._test_endian("NATIVE") -for endian in ("NATIVE", "LITTLE_ENDIAN", "BIG_ENDIAN"): - for type_ in ("INT8", "UINT8", "INT16", "UINT16", "INT32", "UINT32", "INT64", "UINT64"): - desc = {"arr": (uctypes.ARRAY | 0, getattr(uctypes, type_) | N)} + def test_little_endian(self): + self._test_endian("LITTLE_ENDIAN") + + def test_big_endian(self): + self._test_endian("BIG_ENDIAN") + + def _test_endian(self, endian): + for item_type in ( + "INT8", + "UINT8", + "INT16", + "UINT16", + "INT32", + "UINT32", + "INT64", + "UINT64", + ): + print(endian, item_type) + self._test_endian_type(endian, item_type) + + def _test_endian_type(self, endian, item_type): + print("Testing array of", item_type, "with", endian, "endianness") + desc = {"arr": (uctypes.ARRAY | 0, getattr(uctypes, item_type) | N)} + print(repr(desc)) sz = uctypes.sizeof(desc) data = bytearray(sz) + print(sz, repr((uctypes.addressof(data), desc, getattr(uctypes, endian)))) s = uctypes.struct(uctypes.addressof(data), desc, getattr(uctypes, endian)) + is_unsigned = item_type.startswith("U") + item_sz = uctypes.sizeof({"": getattr(uctypes, item_type)}) + + # V2 enforces range limits when setting fields + item_min = 0 if is_unsigned else -(2 ** (item_sz * 8 - 1)) + 1 + item_max = 2 ** (item_sz * 8 if is_unsigned else (item_sz * 8 - 1)) - 1 + print("representable range", item_min, item_max) + + for i in range(N): + n = i - 2 + print(i, n) + if is_v2 and (n < item_min or n > item_max): + with self.assertRaises(OverflowError): + s.arr[i] = n + s.arr[i] = PLACEHOLDER + n = PLACEHOLDER + else: + s.arr[i] = n + + print(endian, item_type, sz, *(s.arr[i] for i in range(N))) + for i in range(N): - s.arr[i] = i - 2 - print(endian, type_, sz, *(s.arr[i] for i in range(N))) + n = i - 2 + if is_v2 and (n < item_min or n > item_max): + # V2 will raise OverflowError, test swaps in placeholder + n = PLACEHOLDER + elif is_unsigned and n < 0: + # V1 will mask and then load as unsigned int + n = n & ((1 << (item_sz * 8)) - 1) + + self.assertEqual(s.arr[i], n) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/extmod/uctypes_array_load_store.py.exp b/tests/extmod/uctypes_array_load_store.py.exp deleted file mode 100644 index 10de8046454ba..0000000000000 --- a/tests/extmod/uctypes_array_load_store.py.exp +++ /dev/null @@ -1,24 +0,0 @@ -NATIVE INT8 5 -2 -1 0 1 2 -NATIVE UINT8 5 254 255 0 1 2 -NATIVE INT16 10 -2 -1 0 1 2 -NATIVE UINT16 10 65534 65535 0 1 2 -NATIVE INT32 20 -2 -1 0 1 2 -NATIVE UINT32 20 4294967294 4294967295 0 1 2 -NATIVE INT64 40 -2 -1 0 1 2 -NATIVE UINT64 40 18446744073709551614 18446744073709551615 0 1 2 -LITTLE_ENDIAN INT8 5 -2 -1 0 1 2 -LITTLE_ENDIAN UINT8 5 254 255 0 1 2 -LITTLE_ENDIAN INT16 10 -2 -1 0 1 2 -LITTLE_ENDIAN UINT16 10 65534 65535 0 1 2 -LITTLE_ENDIAN INT32 20 -2 -1 0 1 2 -LITTLE_ENDIAN UINT32 20 4294967294 4294967295 0 1 2 -LITTLE_ENDIAN INT64 40 -2 -1 0 1 2 -LITTLE_ENDIAN UINT64 40 18446744073709551614 18446744073709551615 0 1 2 -BIG_ENDIAN INT8 5 -2 -1 0 1 2 -BIG_ENDIAN UINT8 5 254 255 0 1 2 -BIG_ENDIAN INT16 10 -2 -1 0 1 2 -BIG_ENDIAN UINT16 10 65534 65535 0 1 2 -BIG_ENDIAN INT32 20 -2 -1 0 1 2 -BIG_ENDIAN UINT32 20 4294967294 4294967295 0 1 2 -BIG_ENDIAN INT64 40 -2 -1 0 1 2 -BIG_ENDIAN UINT64 40 18446744073709551614 18446744073709551615 0 1 2 From fcd76e3bd7e29289b7b5743cca85883f5f29db62 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 25 Feb 2025 17:55:26 +1100 Subject: [PATCH 10/15] ci: Allow unittest to be imported when testing mpy merge. Signed-off-by: Angus Gratton --- tools/ci.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/ci.sh b/tools/ci.sh index 510bb3a4d3c8c..6130e945a2ead 100755 --- a/tools/ci.sh +++ b/tools/ci.sh @@ -630,6 +630,8 @@ function ci_unix_coverage_run_mpy_merge_tests { outdir=$(mktemp -d) allmpy=() + export MICROPYPATH="${mptop}/lib/micropython-lib/python-stdlib/unittest" + # Compile a selection of tests to .mpy and execute them, collecting the output. # None of the tests should SKIP. for inpy in $mptop/tests/basics/[acdel]*.py; do From b98064566d877bdfe60f7982d516639280d9f45d Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 26 Feb 2025 17:34:50 +1100 Subject: [PATCH 11/15] windows: Double the stack size allowance on windows port. This seems like it's only really a problem on Debug builds, but I think can't hurt to increase it on all windows builds. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- ports/unix/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ports/unix/main.c b/ports/unix/main.c index 530e20a3863b4..a835b6f661946 100644 --- a/ports/unix/main.c +++ b/ports/unix/main.c @@ -480,8 +480,8 @@ int main(int argc, char **argv) { // Define a reasonable stack limit to detect stack overflow. mp_uint_t stack_size = 40000 * (sizeof(void *) / 4); - #if defined(__arm__) && !defined(__thumb2__) - // ARM (non-Thumb) architectures require more stack. + #if (defined(__arm__) && !defined(__thumb2__)) || defined(_MSC_VER) + // ARM (non-Thumb) architectures require more stack, as does Windows stack_size *= 2; #endif From 241b10c6bd307113f922b51fe3414ed0bbeb00a2 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 4 Mar 2025 18:17:38 +1100 Subject: [PATCH 12/15] tests: Add test coverage for converting array to bytes. Validates that the memory representation for arrays matches int.to_bytes() results using native endianness and any padding that the array type has. Signed-off-by: Angus Gratton --- tests/basics/array_int_repr.py | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tests/basics/array_int_repr.py diff --git a/tests/basics/array_int_repr.py b/tests/basics/array_int_repr.py new file mode 100644 index 0000000000000..95d5487a69c37 --- /dev/null +++ b/tests/basics/array_int_repr.py @@ -0,0 +1,80 @@ +# Test array integer representations in memory +# +# This has to be a unit test because correct internal representation depends on +# native endianness +# +# These test cases should pass on both CPython and MicroPython. + +try: + from array import array + from sys import byteorder +except ImportError: + print("SKIP") + raise SystemExit + +import unittest + +# Ports without bigint support don't support typecode 'q' +try: + array('q', []) + array_has_typecode_q = True +except: + array_has_typecode_q = False + +class TestIntReprs(unittest.TestCase): + def _test_repr(self, typecode, values): + # create an array with the specified typecode and list of values + a = array(typecode, values) + a_hex = memoryview(a).hex() + print(a, a_hex) + + self.assertEqual(len(a_hex) % len(values), 0) + # no array.itemsize in MicroPython, so calculate item size + sz = len(a_hex) // 2 // len(values) + if hasattr(a, "itemsize"): + self.assertEqual(a.itemsize, sz) + + # build alternative hex representation of the array using int.to_bytes() + # on each value + values_hex = "" + for v in values: + v_bytes = v.to_bytes(sz, byteorder=byteorder, signed=typecode.islower()) + values_hex += v_bytes.hex() + + # compare with the raw array contents + self.assertEqual(a_hex, values_hex) + + def test_smaller_ints(self): + for (typecode, initialiser) in ( + ('b', [1, -1, 120, -120]), + ('B', [1, 5, 220]), + ('h', [5, -1, 32_000, -32_000]), + ('H', [5, 1, 32_000, 65_535]), + ('i', [5, -1, 32_000, -32_000]), # CPython only guarantees min 2 bytes, C style! + ('I', [5, 1, 32_000, 65_535]), + ('l', [5, -1, 2_000_000, -2_000_000, 0x7FFF_FFFF]), + ('L', [5, 1, 65_536, 2_000_000, 0x7FFF_FFFF, 0xFFFF_FFFF]), + ): + self._test_repr(typecode, initialiser) + + @unittest.skipIf(not array_has_typecode_q, "port has no bigint support") + def test_bigints(self): + # Note: need to be careful not to write any literal expressions that can't be compiled on non-bigint MP + a = 0x1FFF_FFF + b = 62 + + try: + # this calculation will trigger OverflowError if bigint is set to long long + max_uint64 = [2 ** (b + 1)] + except OverflowError: + max_uint64 = [] + + for (typecode, initialiser) in ( + ('q', [a * 5, -a * 10, 2 ** b, (2 ** b) * -1]), + ('Q', [a * 5, a * 10, 2 ** b, (2 ** b) - 1, (2 ** b) + 1] + max_uint64) + ): + self._test_repr(typecode, initialiser) + + +if __name__ == "__main__": + unittest.main() From cdb30b8388bafc1fbba8d0562b59327a1e837496 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 23 Jul 2025 11:16:57 +1000 Subject: [PATCH 13/15] stm32/adc: Set values in array from smallint. This removes the last usage of mp_binary_set_val_array_from_int(). It will be a little slower, but shouldn't be measurably so compared to the ADC sampling. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- ports/stm32/adc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ports/stm32/adc.c b/ports/stm32/adc.c index f47e9eaad7b35..d68b6427cdb53 100644 --- a/ports/stm32/adc.c +++ b/ports/stm32/adc.c @@ -695,7 +695,7 @@ static mp_obj_t adc_read_timed(mp_obj_t self_in, mp_obj_t buf_in, mp_obj_t freq_ if (typesize == 1) { value >>= 4; } - mp_binary_set_val_array_from_int(bufinfo.typecode, bufinfo.buf, index, value); + mp_binary_set_val_array(bufinfo.typecode, bufinfo.buf, index, MP_OBJ_NEW_SMALL_INT(value)); } // turn the ADC off @@ -803,7 +803,7 @@ static mp_obj_t adc_read_timed_multi(mp_obj_t adc_array_in, mp_obj_t buf_array_i if (typesize == 1) { value >>= 4; } - mp_binary_set_val_array_from_int(bufinfo.typecode, bufptrs[array_index], elem_index, value); + mp_binary_set_val_array(bufinfo.typecode, bufptrs[array_index], elem_index, MP_OBJ_NEW_SMALL_INT(value)); } } From 2a329257c17517141d12852b44737487f368b7ca Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 23 Jul 2025 11:18:33 +1000 Subject: [PATCH 14/15] py/binary: Remove mp_binary_set_val_array_from_int(). This is a long-standing C function in binary.h, but it's no longer called from inside MicroPython. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- ports/unix/coverage.c | 13 ------- py/binary.c | 50 -------------------------- py/binary.h | 1 - tests/ports/unix/extra_coverage.py.exp | 3 -- 4 files changed, 67 deletions(-) diff --git a/ports/unix/coverage.c b/ports/unix/coverage.c index 68340d7f239a8..6e7c18f4e2c28 100644 --- a/ports/unix/coverage.c +++ b/ports/unix/coverage.c @@ -627,19 +627,6 @@ static mp_obj_t extra_coverage(void) { mp_printf(&mp_plat_print, "%s\n", buf2); } - // binary - { - mp_printf(&mp_plat_print, "# binary\n"); - - // call function with float and double typecodes - float far[1]; - double dar[1]; - mp_binary_set_val_array_from_int('f', far, 0, 123); - mp_printf(&mp_plat_print, "%.0f\n", (double)far[0]); - mp_binary_set_val_array_from_int('d', dar, 0, 456); - mp_printf(&mp_plat_print, "%.0lf\n", dar[0]); - } - // VM { mp_printf(&mp_plat_print, "# VM\n"); diff --git a/py/binary.c b/py/binary.c index 3642a80355eb6..62a5c8a8ff5c2 100644 --- a/py/binary.c +++ b/py/binary.c @@ -491,53 +491,3 @@ void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_ } } } - -void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_int_t val) { - switch (typecode) { - case 'b': - ((signed char *)p)[index] = val; - break; - case BYTEARRAY_TYPECODE: - case 'B': - ((unsigned char *)p)[index] = val; - break; - case 'h': - ((short *)p)[index] = val; - break; - case 'H': - ((unsigned short *)p)[index] = val; - break; - case 'i': - ((int *)p)[index] = val; - break; - case 'I': - ((unsigned int *)p)[index] = val; - break; - case 'l': - ((long *)p)[index] = val; - break; - case 'L': - ((unsigned long *)p)[index] = val; - break; - #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE - case 'q': - ((long long *)p)[index] = val; - break; - case 'Q': - ((unsigned long long *)p)[index] = val; - break; - #endif - #if MICROPY_PY_BUILTINS_FLOAT - case 'f': - ((float *)p)[index] = (float)val; - break; - case 'd': - ((double *)p)[index] = (double)val; - break; - #endif - // Extension to CPython: array of pointers - case 'P': - ((void **)p)[index] = (void *)(uintptr_t)val; - break; - } -} diff --git a/py/binary.h b/py/binary.h index 80a37daeaa627..851dc50110eae 100644 --- a/py/binary.h +++ b/py/binary.h @@ -37,7 +37,6 @@ size_t mp_binary_get_size(char struct_type, char val_type, size_t *palign); mp_obj_t mp_binary_get_val_array(char typecode, void *p, size_t index); void mp_binary_set_val_array(char typecode, void *p, size_t index, mp_obj_t val_in); -void mp_binary_set_val_array_from_int(char typecode, void *p, size_t index, mp_int_t val); mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte *p_base, byte **ptr); void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte *p_base, byte **ptr); long long mp_binary_get_int(size_t size, bool is_signed, bool big_endian, const byte *src); diff --git a/tests/ports/unix/extra_coverage.py.exp b/tests/ports/unix/extra_coverage.py.exp index e20871273d709..f4da30fe9e53c 100644 --- a/tests/ports/unix/extra_coverage.py.exp +++ b/tests/ports/unix/extra_coverage.py.exp @@ -131,9 +131,6 @@ Warning: test ? +1e+00 +1e+00 -# binary -123 -456 # VM 2 1 # scheduler From aef2a4916b28c0fe0b9a9e773176b448fc827648 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 23 Jul 2025 15:17:08 +1000 Subject: [PATCH 15/15] tools/ci: Don't include unittest tests in the merged .mpy. If >1 unittest-enabled module is included, the results of the merged module won't match (as it runs some previously registered tests again). Signed-off-by: Angus Gratton --- tools/ci.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/ci.sh b/tools/ci.sh index 6130e945a2ead..b7e4dec2114af 100755 --- a/tools/ci.sh +++ b/tools/ci.sh @@ -635,6 +635,12 @@ function ci_unix_coverage_run_mpy_merge_tests { # Compile a selection of tests to .mpy and execute them, collecting the output. # None of the tests should SKIP. for inpy in $mptop/tests/basics/[acdel]*.py; do + if grep -q "import unittest" $inpy; then + # Merging >1 unittest-enabled module leads to unexpected + # results, as each file runs all previously registered unittest cases + echo "SKIPPING $inpy" + continue + fi test=$(basename $inpy .py) echo $test outmpy=$outdir/$test.mpy pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy