Skip to content

Commit c8f23ee

Browse files
committed
py/parsenum: Extend mp_parse_num_integer() to parse long long.
If big integer support is 'long long' then mp_parse_num_integer() can parse to it directly instead of failing over from small int. This means strtoll() is no longer pulled in, and fixes some bugs parsing long long integers (i.e. can now parse negative values correctly, can now parse values which aren't NULL terminated). The (default) smallint parsing compiled code should stay the same here, macros and a typedef are used to abstract some parts of it out. When bigint is long long we parse to 'unsigned long long' first (to avoid the code size hit of pulling in signed 64-bit math routines) and the convert to signed at the end. One tricky case this routine correctly overflows on is int("9223372036854775808") which is one more than LLONG_MAX in decimal. No unit test case added for this as it's too hard to detect 64-bit long integer mode. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
1 parent 5b58624 commit c8f23ee

File tree

2 files changed

+40
-23
lines changed

2 files changed

+40
-23
lines changed

py/objint_longlong.c

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@
3939

4040
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_LONGLONG
4141

42-
#include <errno.h>
43-
4442
#if MICROPY_PY_SYS_MAXSIZE
4543
// Export value for sys.maxsize
4644
const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
@@ -294,22 +292,12 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) {
294292
}
295293

296294
mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
297-
// TODO raise an exception if the unsigned long long won't fit
298295
if (val >> (sizeof(unsigned long long) * 8 - 1) != 0) {
299296
raise_long_long_overflow();
300297
}
301298
return mp_obj_new_int_from_ll(val);
302299
}
303300

304-
mp_obj_t mp_obj_new_int_from_str_len(const char **str, size_t len, bool neg, unsigned int base) {
305-
// TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated
306-
// TODO check overflow
307-
char *endptr;
308-
mp_obj_t result = mp_obj_new_int_from_ll(strtoll(*str, &endptr, base));
309-
*str = endptr;
310-
return result;
311-
}
312-
313301
mp_int_t mp_obj_int_get_truncated(mp_const_obj_t self_in) {
314302
if (mp_obj_is_small_int(self_in)) {
315303
return MP_OBJ_SMALL_INT_VALUE(self_in);

py/parsenum.c

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,27 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
4646
nlr_raise(exc);
4747
}
4848

49+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
50+
// For the common small integer parsing case, we parse directly to mp_int_t and
51+
// check that the value doesn't overflow a smallint (in which case we fail over
52+
// to bigint parsing if supported)
53+
typedef mp_int_t parsed_int_t;
54+
55+
#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
56+
#define PARSED_INT_FITS MP_SMALL_INT_FITS
57+
#else
58+
// In the special case where bigint support is long long, we save code size by
59+
// parsing directly to long long and then return either a bigint or smallint
60+
// from the same result.
61+
//
62+
// To avoid pulling in (slow) signed 64-bit math routines we do the initial
63+
// parsing to an unsigned long long and only convert to signed at the end.
64+
typedef unsigned long long parsed_int_t;
65+
66+
#define PARSED_INT_MUL_OVERFLOW mp_mul_ull_overflow
67+
#define PARSED_INT_FITS(I) ((I) <= (unsigned long long)LLONG_MAX)
68+
#endif
69+
4970
mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, mp_lexer_t *lex) {
5071
const byte *restrict str = (const byte *)str_;
5172
const byte *restrict top = str + len;
@@ -76,7 +97,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
7697
str += mp_parse_num_base((const char *)str, top - str, &base);
7798

7899
// string should be an integer number
79-
mp_int_t int_val = 0;
100+
parsed_int_t parsed_val = 0;
80101
const byte *restrict str_val_start = str;
81102
for (; str < top; str++) {
82103
// get next digit as a value
@@ -98,25 +119,29 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
98119
break;
99120
}
100121

101-
// add next digi and check for overflow
102-
if (mp_small_int_mul_overflow(int_val, base, &int_val)) {
122+
// add next digit and check for overflow
123+
if (PARSED_INT_MUL_OVERFLOW(parsed_val, base, &parsed_val)) {
103124
goto overflow;
104125
}
105-
int_val += dig;
106-
if (!MP_SMALL_INT_FITS(int_val)) {
126+
parsed_val += dig;
127+
if (!PARSED_INT_FITS(parsed_val)) {
107128
goto overflow;
108129
}
109130
}
110131

111-
// negate value if needed
132+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
133+
// The PARSED_INT_FITS check above ensures parsed_val fits in small int representation
134+
ret_val = MP_OBJ_NEW_SMALL_INT(neg ? (-parsed_val) : parsed_val);
135+
have_ret_val:
136+
#else
137+
// The PARSED_INT_FITS check above ensures parsed_val won't overflow signed long long
138+
long long signed_val = parsed_val;
112139
if (neg) {
113-
int_val = -int_val;
140+
signed_val = -signed_val;
114141
}
142+
ret_val = mp_obj_new_int_from_ll(signed_val); // Could be large or small int
143+
#endif
115144

116-
// create the small int
117-
ret_val = MP_OBJ_NEW_SMALL_INT(int_val);
118-
119-
have_ret_val:
120145
// check we parsed something
121146
if (str == str_val_start) {
122147
goto value_error;
@@ -135,13 +160,17 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
135160
return ret_val;
136161

137162
overflow:
163+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
138164
// reparse using long int
139165
{
140166
const char *s2 = (const char *)str_val_start;
141167
ret_val = mp_obj_new_int_from_str_len(&s2, top - str_val_start, neg, base);
142168
str = (const byte *)s2;
143169
goto have_ret_val;
144170
}
171+
#else
172+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage"));
173+
#endif
145174

146175
value_error:
147176
{

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy