From 2d8d64059fbc7cd72e40503e5af19eded73c3d47 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 18 Mar 2025 12:07:00 +1100 Subject: [PATCH 1/8] tests: Add specific tests for "long long" 64-bit bigints. These will run on all ports which support them, but importantly they'll also run on ports that don't support arbitrary precision but do support 64-bit long ints. Includes some test workarounds to account for things which will overflow once "long long" big integers overflow (added in follow-up commit): - uctypes_array_load_store test was failing already, now won't parse. - all the ffi_int tests contain 64-bit unsigned values, that won't parse as long long. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- tests/basics/int_64_basics.py | 126 +++++++++++++++++++++++ tests/extmod/uctypes_array_load_store.py | 7 ++ tests/feature_check/int_64.py | 2 + tests/feature_check/int_64.py.exp | 1 + tests/run-tests.py | 14 ++- 5 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tests/basics/int_64_basics.py create mode 100644 tests/feature_check/int_64.py create mode 100644 tests/feature_check/int_64.py.exp diff --git a/tests/basics/int_64_basics.py b/tests/basics/int_64_basics.py new file mode 100644 index 0000000000000..73a06b64b13a0 --- /dev/null +++ b/tests/basics/int_64_basics.py @@ -0,0 +1,126 @@ +# test support for 64-bit long integers +# (some ports don't support arbitrary precision but do support these) + +# this test is adapted from int_big1.py with numbers kept within 64-bit signed range + +# to test arbitrary precision integers + +x = 1000000000000000000 +xn = -1000000000000000000 +y = 2000000000000000000 + +# printing +print(x) +print(y) +print('%#X' % (x - x)) # print prefix +print('{:#,}'.format(x)) # print with commas + +# construction +print(int(x)) + +# addition +print(x + 1) +print(x + y) +print(x + xn == 0) +print(bool(x + xn)) + +# subtraction +print(x - 1) +print(x - y) +print(y - x) +print(x - x == 0) +print(bool(x - x)) + +# multiplication +print(x * 2) +print(1090511627776 * 1048500) + +# integer division +print(x // 2) +print(y // x) + +# bit inversion +print(~x) +print(~(-x)) + +# left shift +print("left shift positive") +x = 0x40000000 +for i in range(32): + x = x << 1 + print(x) + +# right shift +print("right shift positive") +x = 0x2000000000000000 # TODO: why can't second-tip bit be set? +for i in range(64): + x = x >> 1 + print(x) + +# left shift of a negative number +print("left shift negative") +for i in range(8): + print(-10000000000000000 << i) + print(-10000000000000001 << i) + print(-10000000000000002 << i) + print(-10000000000000003 << i) + print(-10000000000000004 << i) + + +# right shift of a negative number +print("right shift negative") +for i in range(8): + print(-1000000000000000000 >> i) + print(-1000000000000000001 >> i) + print(-1000000000000000002 >> i) + print(-1000000000000000003 >> i) + print(-1000000000000000004 >> i) + +# conversion from string +print(int("1234567890123456789")) +print(int("-1234567890123456789")) +print(int("1234567890abcdef", 16)) +print(int("1234567890ABCDEF", 16)) +print(int("-1234567890ABCDEF", 16)) +print(int("ijklmnopqrsz", 36)) + +# numbers close to 64-bit limits +print(int("-9111222333444555666")) +print(int("9111222333444555666")) + +# numbers with preceding 0s +print(int("-00000000000000000000009111222333444555666")) +print(int("0000000000000000000000009111222333444555666")) + +# invalid characters in string +try: + print(int("1234567890abcdef")) +except ValueError: + print('ValueError'); +try: + print(int("123456789\x01")) +except ValueError: + print('ValueError'); + +# test parsing ints just on threshold of small to big +# for 32 bit archs +x = 1073741823 # small +x = -1073741823 # small +x = 1073741824 # big +x = -1073741824 # big +# for 64 bit archs +x = 4611686018427387903 # small +x = -4611686018427387903 # small +x = 4611686018427387904 # big +x = -4611686018427387904 # big + +# sys.maxsize is a constant bigint, so test it's compatible with dynamic ones +import sys +if hasattr(sys, "maxsize"): + print(sys.maxsize + 1 - 1 == sys.maxsize) +else: + print(True) # No maxsize property in this config + +# test extraction of big int value via mp_obj_get_int_maybe +x = 1 << 62 +print('a' * (x + 4 - x)) diff --git a/tests/extmod/uctypes_array_load_store.py b/tests/extmod/uctypes_array_load_store.py index 3b9bb6d7308ca..df7deb6837a17 100644 --- a/tests/extmod/uctypes_array_load_store.py +++ b/tests/extmod/uctypes_array_load_store.py @@ -6,6 +6,13 @@ print("SKIP") raise SystemExit +# 'int' needs to be able to represent UINT64 for this test +try: + int("FF" * 8, 16) +except OverflowError: + print("SKIP") + raise SystemExit + N = 5 for endian in ("NATIVE", "LITTLE_ENDIAN", "BIG_ENDIAN"): diff --git a/tests/feature_check/int_64.py b/tests/feature_check/int_64.py new file mode 100644 index 0000000000000..4d053782ca82b --- /dev/null +++ b/tests/feature_check/int_64.py @@ -0,0 +1,2 @@ +# Check whether 64-bit long integers are supported +print(1 << 62) diff --git a/tests/feature_check/int_64.py.exp b/tests/feature_check/int_64.py.exp new file mode 100644 index 0000000000000..aef5454e66263 --- /dev/null +++ b/tests/feature_check/int_64.py.exp @@ -0,0 +1 @@ +4611686018427387904 diff --git a/tests/run-tests.py b/tests/run-tests.py index fe338d7ffbaef..c218afae7194a 100755 --- a/tests/run-tests.py +++ b/tests/run-tests.py @@ -628,6 +628,7 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1): skip_tests = set() skip_native = False skip_int_big = False + skip_int_64 = False skip_bytearray = False skip_set_type = False skip_slice = False @@ -658,6 +659,11 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1): if output != b"1000000000000000000000000000000000000000000000\n": skip_int_big = True + # Check if 'long long' precision integers are supported, even if arbitrary precision is not + output = run_feature_check(pyb, args, "int_64.py") + if output != b"4611686018427387904\n": + skip_int_64 = True + # Check if bytearray is supported, and skip such tests if it's not output = run_feature_check(pyb, args, "bytearray.py") if output != b"bytearray\n": @@ -885,7 +891,12 @@ def run_one_test(test_file): test_name = os.path.splitext(os.path.basename(test_file))[0] is_native = test_name.startswith("native_") or test_name.startswith("viper_") is_endian = test_name.endswith("_endian") - is_int_big = test_name.startswith("int_big") or test_name.endswith("_intbig") + is_int_big = ( + test_name.startswith("int_big") + or test_name.endswith("_intbig") + or test_name.startswith("ffi_int") # these tests contain large integer literals + ) + is_int_64 = test_name.startswith("int_64") or test_name.endswith("_int64") is_bytearray = test_name.startswith("bytearray") or test_name.endswith("_bytearray") is_set_type = test_name.startswith(("set_", "frozenset")) or test_name.endswith("_set") is_slice = test_name.find("slice") != -1 or test_name in misc_slice_tests @@ -899,6 +910,7 @@ def run_one_test(test_file): skip_it |= skip_native and is_native skip_it |= skip_endian and is_endian skip_it |= skip_int_big and is_int_big + skip_it |= skip_int_64 and is_int_64 skip_it |= skip_bytearray and is_bytearray skip_it |= skip_set_type and is_set_type skip_it |= skip_slice and is_slice From 6d93b150b894c73656c33c56d049e45f70f8e3db Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 18 Mar 2025 13:27:02 +1100 Subject: [PATCH 2/8] tests/extmod/json_loads_int_64.py: Add test cases for LONGINT parse. These tests cover the use of mp_obj_new_int_from_str_len when mp_parse_num_integer overflows the SMALLINT limit, and also the case where the value may not be null terminated. Placed in a separate test file so that extmod/json test doesn't rely on bigint support. Signed-off-by: Yoctopuce dev Signed-off-by: Angus Gratton --- tests/extmod/json_loads_int_64.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/extmod/json_loads_int_64.py diff --git a/tests/extmod/json_loads_int_64.py b/tests/extmod/json_loads_int_64.py new file mode 100644 index 0000000000000..193a3c28d8282 --- /dev/null +++ b/tests/extmod/json_loads_int_64.py @@ -0,0 +1,16 @@ +# Parse 64-bit integers from JSON payloads. +# +# This also exercises parsing integers from strings +# where the value may not be null terminated (last line) +try: + import json +except ImportError: + print("SKIP") + raise SystemExit + + +print(json.loads("9111222333444555666")) +print(json.loads("-9111222333444555666")) +print(json.loads("9111222333444555666")) +print(json.loads("-9111222333444555666")) +print(json.loads("[\"9111222333444555666777\",9111222333444555666]")) From a54b5d9aed871b20d76df3c45cd78bf51d28b249 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 18 Mar 2025 17:09:17 +1100 Subject: [PATCH 3/8] unix/variants: Add a 'longlong' variant to test 64-bit bigints in CI. Signed-off-by: Angus Gratton --- .github/workflows/ports_unix.yml | 14 +++++++ .../unix/variants/longlong/mpconfigvariant.h | 37 +++++++++++++++++++ .../unix/variants/longlong/mpconfigvariant.mk | 8 ++++ tools/ci.sh | 8 ++++ 4 files changed, 67 insertions(+) create mode 100644 ports/unix/variants/longlong/mpconfigvariant.h create mode 100644 ports/unix/variants/longlong/mpconfigvariant.mk diff --git a/.github/workflows/ports_unix.yml b/.github/workflows/ports_unix.yml index 4b22926eaf8e5..60c0244a8f9e2 100644 --- a/.github/workflows/ports_unix.yml +++ b/.github/workflows/ports_unix.yml @@ -134,6 +134,20 @@ jobs: if: failure() run: tests/run-tests.py --print-failures + longlong: + runs-on: ubuntu-22.04 # use 22.04 to get python2, and libffi-dev:i386 + steps: + - uses: actions/checkout@v4 + - name: Install packages + run: source tools/ci.sh && ci_unix_32bit_setup + - name: Build + run: source tools/ci.sh && ci_unix_longlong_build + - name: Run main test suite + run: source tools/ci.sh && ci_unix_longlong_run_tests + - name: Print failures + if: failure() + run: tests/run-tests.py --print-failures + float: runs-on: ubuntu-latest steps: diff --git a/ports/unix/variants/longlong/mpconfigvariant.h b/ports/unix/variants/longlong/mpconfigvariant.h new file mode 100644 index 0000000000000..20c52e98f9dda --- /dev/null +++ b/ports/unix/variants/longlong/mpconfigvariant.h @@ -0,0 +1,37 @@ +/* + * This file is part of the MicroPython project, http://micropython.org/ + * + * The MIT License (MIT) + * + * Copyright (c) 2016 Damien P. George + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +// This config exists to test that the MICROPY_LONGINT_IMPL_LONGLONG variant +// (i.e. minimal form of "big integer" that's backed by 64-bit int only) builds +// and passes tests. + +#define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_LONGLONG) + +// Set base feature level. +#define MICROPY_CONFIG_ROM_LEVEL (MICROPY_CONFIG_ROM_LEVEL_EXTRA_FEATURES) + +// Enable extra Unix features. +#include "../mpconfigvariant_common.h" diff --git a/ports/unix/variants/longlong/mpconfigvariant.mk b/ports/unix/variants/longlong/mpconfigvariant.mk new file mode 100644 index 0000000000000..2d2c3706469fb --- /dev/null +++ b/ports/unix/variants/longlong/mpconfigvariant.mk @@ -0,0 +1,8 @@ +# build interpreter with "bigints" implemented as "longlong" + +# otherwise, small int is essentially 64-bit +MICROPY_FORCE_32BIT := 1 + +MICROPY_PY_FFI := 0 + +MPY_TOOL_FLAGS = -mlongint-impl longlong diff --git a/tools/ci.sh b/tools/ci.sh index 518eb7449713b..564b7810f57b3 100755 --- a/tools/ci.sh +++ b/tools/ci.sh @@ -695,6 +695,14 @@ function ci_unix_nanbox_run_tests { ci_unix_run_tests_full_no_native_helper nanbox PYTHON=python2.7 } +function ci_unix_longlong_build { + ci_unix_build_helper VARIANT=longlong +} + +function ci_unix_longlong_run_tests { + ci_unix_run_tests_full_helper longlong +} + function ci_unix_float_build { ci_unix_build_helper VARIANT=standard CFLAGS_EXTRA="-DMICROPY_FLOAT_IMPL=MICROPY_FLOAT_IMPL_FLOAT" ci_unix_build_ffi_lib_helper gcc From 0cf1e7c0598c5daee6d63c8b0dff0d9d67899fec Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 19 Mar 2025 10:35:58 +1100 Subject: [PATCH 4/8] tests/thread: Rename thread_lock4 test to thread_lock4_intbig. Relies on arbitrary precision math, so won't run on a port which has threads & limited bigint support. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- tests/thread/{thread_lock4.py => thread_lock4_intbig.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/thread/{thread_lock4.py => thread_lock4_intbig.py} (100%) diff --git a/tests/thread/thread_lock4.py b/tests/thread/thread_lock4_intbig.py similarity index 100% rename from tests/thread/thread_lock4.py rename to tests/thread/thread_lock4_intbig.py From d07f103d68d8bb1b65cba047b9bef093b9375ebd Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 25 Mar 2025 09:53:44 +1100 Subject: [PATCH 5/8] tests: Skip bm_pidigits perf test if no arbitrary precision int support. The other performance tests run and pass with only 64-bit big integer support. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- tests/perf_bench/bm_pidigits.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/perf_bench/bm_pidigits.py b/tests/perf_bench/bm_pidigits.py index bdaa73cec7e9f..c935f103c5b78 100644 --- a/tests/perf_bench/bm_pidigits.py +++ b/tests/perf_bench/bm_pidigits.py @@ -5,6 +5,12 @@ # This benchmark stresses big integer arithmetic. # Adapted from code on: http://benchmarksgame.alioth.debian.org/ +try: + int("0x10000000000000000", 16) +except: + print("SKIP") # No support for >64-bit integers + raise SystemExit + def compose(a, b): aq, ar, as_, at = a From 516aa02104c3344903bdda078b7c87f71f94938d Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Wed, 26 Mar 2025 11:07:52 +1100 Subject: [PATCH 6/8] py/objint_longlong: Add arithmetic overflow checks. Long long big integer support now raises an exception on overflow rather than returning an undefined result. Also adds an error when shifting by a negative value. The new arithmetic checks are added in the misc.h header. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- py/misc.h | 105 ++++++++++++++++++++++++++++-- py/objint_longlong.c | 51 +++++++++++---- tests/basics/int_64_basics.py | 13 +++- tests/extmod/uctypes_addressof.py | 7 +- 4 files changed, 154 insertions(+), 22 deletions(-) diff --git a/py/misc.h b/py/misc.h index 5d0893bbdd3f2..e034485838954 100644 --- a/py/misc.h +++ b/py/misc.h @@ -33,10 +33,15 @@ #include #include #include +#include typedef unsigned char byte; typedef unsigned int uint; +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + /** generic ops *************************************************/ #ifndef MIN @@ -374,26 +379,23 @@ static inline bool mp_check(bool value) { static inline uint32_t mp_popcount(uint32_t x) { return __popcnt(x); } -#else +#else // _MSC_VER #define mp_clz(x) __builtin_clz(x) #define mp_clzl(x) __builtin_clzl(x) #define mp_clzll(x) __builtin_clzll(x) #define mp_ctz(x) __builtin_ctz(x) #define mp_check(x) (x) -#if defined __has_builtin #if __has_builtin(__builtin_popcount) #define mp_popcount(x) __builtin_popcount(x) -#endif -#endif -#if !defined(mp_popcount) +#else static inline uint32_t mp_popcount(uint32_t x) { x = x - ((x >> 1) & 0x55555555); x = (x & 0x33333333) + ((x >> 2) & 0x33333333); x = (x + (x >> 4)) & 0x0F0F0F0F; return (x * 0x01010101) >> 24; } -#endif -#endif +#endif // __has_builtin(__builtin_popcount) +#endif // _MSC_VER #define MP_FIT_UNSIGNED(bits, value) (((value) & (~0U << (bits))) == 0) #define MP_FIT_SIGNED(bits, value) \ @@ -426,4 +428,93 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { #endif } +// Overflow-checked operations for long long + +// Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10 +// +// Note that the builtins has a defined result when overflow occurs, whereas the custom +// functions below don't update the result if an overflow would occur (to avoid UB). +#define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5) + +#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_mul_ull_overflow __builtin_umulll_overflow +#else +inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) { + if (y > 0 && x > (ULLONG_MAX / y)) { + return true; // overflow + } + *res = x * y; + return false; +} +#endif + +#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_mul_ll_overflow __builtin_smulll_overflow +#else +inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) { + bool overflow; + + // Check for multiply overflow; see CERT INT32-C + if (x > 0) { // x is positive + if (y > 0) { // x and y are positive + overflow = (x > (LLONG_MAX / y)); + } else { // x positive, y nonpositive + overflow = (y < (LLONG_MIN / x)); + } // x positive, y nonpositive + } else { // x is nonpositive + if (y > 0) { // x is nonpositive, y is positive + overflow = (x < (LLONG_MIN / y)); + } else { // x and y are nonpositive + overflow = (x != 0 && y < (LLONG_MAX / x)); + } // End if x and y are nonpositive + } // End if x is nonpositive + + if (!overflow) { + *res = x * y; + } + + return overflow; +} +#endif + +#if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_add_ll_overflow __builtin_saddll_overflow +#else +inline static bool mp_add_ll_overflow(long long int lhs, long long int rhs, long long int *res) { + bool overflow; + + if (rhs > 0) { + overflow = (lhs > LLONG_MAX - rhs); + } else { + overflow = (lhs < LLONG_MIN - rhs); + } + + if (!overflow) { + *res = lhs + rhs; + } + + return overflow; +} +#endif + +#if __has_builtin(__builtin_ssubll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#define mp_sub_ll_overflow __builtin_ssubll_overflow +#else +inline static bool mp_sub_ll_overflow(long long int lhs, long long int rhs, long long int *res) { + bool overflow; + + if (rhs > 0) { + overflow = (lhs < LLONG_MIN + rhs); + } else { + overflow = (lhs > LLONG_MAX + rhs); + } + + if (!overflow) { + *res = lhs - rhs; + } + + return overflow; +} +#endif + #endif // MICROPY_INCLUDED_PY_MISC_H diff --git a/py/objint_longlong.c b/py/objint_longlong.c index 5b60eb65ad85e..db09503215110 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -31,6 +31,7 @@ #include "py/smallint.h" #include "py/objint.h" #include "py/runtime.h" +#include "py/misc.h" #if MICROPY_PY_BUILTINS_FLOAT #include @@ -43,6 +44,10 @@ const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX}; #endif +static void raise_long_long_overflow(void) { + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage")); +} + mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) { int delta = 1; if (!big_endian) { @@ -120,7 +125,6 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) { // small int if the value fits without truncation case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val); - case MP_UNARY_OP_POSITIVE: return o_in; case MP_UNARY_OP_NEGATIVE: @@ -147,6 +151,8 @@ mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in) { mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in) { long long lhs_val; long long rhs_val; + bool overflow = false; + long long result; if (mp_obj_is_small_int(lhs_in)) { lhs_val = MP_OBJ_SMALL_INT_VALUE(lhs_in); @@ -167,13 +173,16 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i switch (op) { case MP_BINARY_OP_ADD: case MP_BINARY_OP_INPLACE_ADD: - return mp_obj_new_int_from_ll(lhs_val + rhs_val); + overflow = mp_add_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_SUBTRACT: case MP_BINARY_OP_INPLACE_SUBTRACT: - return mp_obj_new_int_from_ll(lhs_val - rhs_val); + overflow = mp_sub_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_MULTIPLY: case MP_BINARY_OP_INPLACE_MULTIPLY: - return mp_obj_new_int_from_ll(lhs_val * rhs_val); + overflow = mp_mul_ll_overflow(lhs_val, rhs_val, &result); + break; case MP_BINARY_OP_FLOOR_DIVIDE: case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE: if (rhs_val == 0) { @@ -199,9 +208,21 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i case MP_BINARY_OP_LSHIFT: case MP_BINARY_OP_INPLACE_LSHIFT: - return mp_obj_new_int_from_ll(lhs_val << (int)rhs_val); + if ((int)rhs_val < 0) { + // negative shift not allowed + mp_raise_ValueError(MP_ERROR_TEXT("negative shift count")); + } + result = lhs_val << (int)rhs_val; + // Left-shifting of negative values is implementation defined in C, but assume compiler + // will give us typical 2s complement behaviour unless the value overflows + overflow = rhs_val > 0 && ((lhs_val >= 0 && result < lhs_val) || (lhs_val < 0 && result > lhs_val)); + break; case MP_BINARY_OP_RSHIFT: case MP_BINARY_OP_INPLACE_RSHIFT: + if ((int)rhs_val < 0) { + // negative shift not allowed + mp_raise_ValueError(MP_ERROR_TEXT("negative shift count")); + } return mp_obj_new_int_from_ll(lhs_val >> (int)rhs_val); case MP_BINARY_OP_POWER: @@ -213,18 +234,18 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i mp_raise_ValueError(MP_ERROR_TEXT("negative power with no float support")); #endif } - long long ans = 1; - while (rhs_val > 0) { + result = 1; + while (rhs_val > 0 && !overflow) { if (rhs_val & 1) { - ans *= lhs_val; + overflow = mp_mul_ll_overflow(result, lhs_val, &result); } - if (rhs_val == 1) { + if (rhs_val == 1 || overflow) { break; } rhs_val /= 2; - lhs_val *= lhs_val; + overflow = mp_mul_ll_overflow(lhs_val, lhs_val, &lhs_val); } - return mp_obj_new_int_from_ll(ans); + break; } case MP_BINARY_OP_LESS: @@ -242,6 +263,12 @@ mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_i return MP_OBJ_NULL; // op not supported } + if (overflow) { + raise_long_long_overflow(); + } + + return mp_obj_new_int_from_ll(result); + zero_division: mp_raise_msg(&mp_type_ZeroDivisionError, MP_ERROR_TEXT("divide by zero")); } @@ -267,7 +294,7 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) { mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) { // TODO raise an exception if the unsigned long long won't fit if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) { - mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("ulonglong too large")); + raise_long_long_overflow(); } return mp_obj_new_int_from_ll(val); } diff --git a/tests/basics/int_64_basics.py b/tests/basics/int_64_basics.py index 73a06b64b13a0..289ea49b65ece 100644 --- a/tests/basics/int_64_basics.py +++ b/tests/basics/int_64_basics.py @@ -117,10 +117,21 @@ # sys.maxsize is a constant bigint, so test it's compatible with dynamic ones import sys if hasattr(sys, "maxsize"): - print(sys.maxsize + 1 - 1 == sys.maxsize) + print(sys.maxsize - 1 + 1 == sys.maxsize) else: print(True) # No maxsize property in this config # test extraction of big int value via mp_obj_get_int_maybe x = 1 << 62 print('a' * (x + 4 - x)) + +# negative shifts are invalid +try: + print((1 << 48) >> -4) +except ValueError as e: + print(e) + +try: + print((1 << 48) << -6) +except ValueError as e: + print(e) diff --git a/tests/extmod/uctypes_addressof.py b/tests/extmod/uctypes_addressof.py index c83089d0f72af..213fcc05eee2b 100644 --- a/tests/extmod/uctypes_addressof.py +++ b/tests/extmod/uctypes_addressof.py @@ -12,5 +12,8 @@ print(uctypes.addressof(uctypes.bytearray_at(1 << i, 8))) # Test address that is bigger than the greatest small-int but still within the address range. -large_addr = maxsize + 1 -print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr) +try: + large_addr = maxsize + 1 + print(uctypes.addressof(uctypes.bytearray_at(large_addr, 8)) == large_addr) +except OverflowError: + print(True) # systems with 64-bit bigints will overflow on the above operation From e9845ab20ec798c1d5bf00bd3b64ff5d96d94500 Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Fri, 2 May 2025 15:39:35 +1000 Subject: [PATCH 7/8] py/smallint: Update mp_small_int_mul_overflow() to perform the multiply. Makes it compatible with the __builtin_mul_overflow() syntax, used in follow-up commit. Includes optimisation in runtime.c to minimise the code size impact from additional param. Signed-off-by: Damien George Signed-off-by: Angus Gratton --- py/parsenum.c | 4 ++-- py/runtime.c | 13 +++++++------ py/smallint.c | 5 ++++- py/smallint.h | 5 ++++- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/py/parsenum.c b/py/parsenum.c index 7e6695fbfcd70..31b332c180e31 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -99,10 +99,10 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m } // add next digi and check for overflow - if (mp_small_int_mul_overflow(int_val, base)) { + if (mp_small_int_mul_overflow(int_val, base, &int_val)) { goto overflow; } - int_val = int_val * base + dig; + int_val += dig; if (!MP_SMALL_INT_FITS(int_val)) { goto overflow; } diff --git a/py/runtime.c b/py/runtime.c index 90587a010a460..0ab0626ef9407 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -505,13 +505,14 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs } #endif - if (mp_small_int_mul_overflow(lhs_val, rhs_val)) { + mp_int_t int_res; + if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) { // use higher precision lhs = mp_obj_new_int_from_ll(lhs_val); goto generic_binary_op; } else { // use standard precision - return MP_OBJ_NEW_SMALL_INT(lhs_val * rhs_val); + return MP_OBJ_NEW_SMALL_INT(int_res); } } case MP_BINARY_OP_FLOOR_DIVIDE: @@ -552,19 +553,19 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs mp_int_t ans = 1; while (rhs_val > 0) { if (rhs_val & 1) { - if (mp_small_int_mul_overflow(ans, lhs_val)) { + if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) { goto power_overflow; } - ans *= lhs_val; } if (rhs_val == 1) { break; } rhs_val /= 2; - if (mp_small_int_mul_overflow(lhs_val, lhs_val)) { + mp_int_t int_res; + if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) { goto power_overflow; } - lhs_val *= lhs_val; + lhs_val = int_res; } lhs_val = ans; } diff --git a/py/smallint.c b/py/smallint.c index aa542ca7bf29a..a494093d61a21 100644 --- a/py/smallint.c +++ b/py/smallint.c @@ -26,7 +26,7 @@ #include "py/smallint.h" -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y) { +bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { // Check for multiply overflow; see CERT INT32-C if (x > 0) { // x is positive if (y > 0) { // x and y are positive @@ -49,6 +49,9 @@ bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y) { } } // End if x and y are nonpositive } // End if x is nonpositive + + // Result doesn't overflow + *res = x * y; return false; } diff --git a/py/smallint.h b/py/smallint.h index 584e0018d1ba3..e50f98651e6ae 100644 --- a/py/smallint.h +++ b/py/smallint.h @@ -68,7 +68,10 @@ // The number of bits in a MP_SMALL_INT including the sign bit. #define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1) -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y); +// Multiply two small ints. +// If returns false, the correct result is stored in 'res' +// If returns true, the multiplication would have overflowed. 'res' is unchanged. +bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res); mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor); mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom); From 17fbc5abdc7e139a922f6a11619deb7cb031e0cb Mon Sep 17 00:00:00 2001 From: Angus Gratton Date: Tue, 15 Jul 2025 11:23:28 +1000 Subject: [PATCH 8/8] py/parsenum: Extend mp_parse_num_integer() to parse long long. If big integer support is 'long long' then mp_parse_num_integer() can parse to it directly instead of failing over from small int. This means strtoll() is no longer pulled in, and fixes some bugs parsing long long integers (i.e. can now parse negative values correctly, can now parse values which aren't NULL terminated). The (default) smallint parsing compiled code should stay the same here, macros and a typedef are used to abstract some parts of it out. When bigint is long long we parse to 'unsigned long long' first (to avoid the code size hit of pulling in signed 64-bit math routines) and the convert to signed at the end. One tricky case this routine correctly overflows on is int("9223372036854775808") which is one more than LLONG_MAX in decimal. No unit test case added for this as it's too hard to detect 64-bit long integer mode. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton --- py/objint_longlong.c | 10 --------- py/parsenum.c | 51 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/py/objint_longlong.c b/py/objint_longlong.c index db09503215110..22ac0ba12efa3 100644 --- a/py/objint_longlong.c +++ b/py/objint_longlong.c @@ -292,22 +292,12 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) { } mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) { - // TODO raise an exception if the unsigned long long won't fit if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) { raise_long_long_overflow(); } return mp_obj_new_int_from_ll(val); } -mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) { - // TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated - // TODO check overflow - char *endptr; - mp_obj_t result = mp_obj_new_int_from_ll(strtoll(*str, &endptr, base)); - *str = endptr; - return result; -} - mp_int_t mp_obj_int_get_truncated(mp_const_obj_t self_in) { if (mp_obj_is_small_int(self_in)) { return MP_OBJ_SMALL_INT_VALUE(self_in); diff --git a/py/parsenum.c b/py/parsenum.c index 31b332c180e31..fcc69091737d0 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -46,6 +46,27 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) { nlr_raise(exc); } +#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG +// For the common small integer parsing case, we parse directly to mp_int_t and +// check that the value doesn't overflow a smallint (in which case we fail over +// to bigint parsing if supported) +typedef mp_int_t parsed_int_t; + +#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow +#define PARSED_INT_FITS MP_SMALL_INT_FITS +#else +// In the special case where bigint support is long long, we save code size by +// parsing directly to long long and then return either a bigint or smallint +// from the same result. +// +// To avoid pulling in (slow) signed 64-bit math routines we do the initial +// parsing to an unsigned long long and only convert to signed at the end. +typedef unsigned long long parsed_int_t; + +#define PARSED_INT_MUL_OVERFLOW mp_mul_ull_overflow +#define PARSED_INT_FITS(I) ((I) <= (unsigned long long)LLONG_MAX) +#endif + mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, mp_lexer_t *lex) { const byte *restrict str = (const byte *)str_; const byte *restrict top = str + len; @@ -76,7 +97,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m str += mp_parse_num_base((const char *)str, top - str, &base); // string should be an integer number - mp_int_t int_val = 0; + parsed_int_t parsed_val = 0; const byte *restrict str_val_start = str; for (; str < top; str++) { // get next digit as a value @@ -98,25 +119,29 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m break; } - // add next digi and check for overflow - if (mp_small_int_mul_overflow(int_val, base, &int_val)) { + // add next digit and check for overflow + if (PARSED_INT_MUL_OVERFLOW(parsed_val, base, &parsed_val)) { goto overflow; } - int_val += dig; - if (!MP_SMALL_INT_FITS(int_val)) { + parsed_val += dig; + if (!PARSED_INT_FITS(parsed_val)) { goto overflow; } } - // negate value if needed + #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG + // The PARSED_INT_FITS check above ensures parsed_val fits in small int representation + ret_val = MP_OBJ_NEW_SMALL_INT(neg ? (-parsed_val) : parsed_val); +have_ret_val: + #else + // The PARSED_INT_FITS check above ensures parsed_val won't overflow signed long long + long long signed_val = parsed_val; if (neg) { - int_val = -int_val; + signed_val = -signed_val; } + ret_val = mp_obj_new_int_from_ll(signed_val); // Could be large or small int + #endif - // create the small int - ret_val = MP_OBJ_NEW_SMALL_INT(int_val); - -have_ret_val: // check we parsed something if (str == str_val_start) { goto value_error; @@ -135,6 +160,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m return ret_val; overflow: + #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG // reparse using long int { const char *s2 = (const char *)str_val_start; @@ -142,6 +168,9 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m str = (const byte *)s2; goto have_ret_val; } + #else + mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage")); + #endif value_error: { pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy