Skip to content

Commit 84895f1

Browse files
committed
py/parsenum: Improve parsing of floating point numbers.
This patch improves parsing of floating point numbers by converting all the digits (integer and fractional) together into a number 1 or greater, and then applying the correct power of 10 at the very end. In particular the multiple "multiply by 0.1" operations to build a fraction are now combined together and applied at the same time as the exponent, at the very end. This helps to retain precision during parsing of floats, and also includes a check that the number doesn't overflow during the parsing. One benefit is that a float will have the same value no matter where the decimal point is located, eg 1.23 == 123e-2.
1 parent f59c6b4 commit 84895f1

File tree

4 files changed

+60
-6
lines changed

4 files changed

+60
-6
lines changed

py/parsenum.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,14 @@ typedef enum {
170170

171171
mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex) {
172172
#if MICROPY_PY_BUILTINS_FLOAT
173+
174+
// DEC_VAL_MAX only needs to be rough and is used to retain precision while not overflowing
175+
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
176+
#define DEC_VAL_MAX 1e20F
177+
#elif MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE
178+
#define DEC_VAL_MAX 1e200
179+
#endif
180+
173181
const char *top = str + len;
174182
mp_float_t dec_val = 0;
175183
bool dec_neg = false;
@@ -214,20 +222,27 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
214222
// string should be a decimal number
215223
parse_dec_in_t in = PARSE_DEC_IN_INTG;
216224
bool exp_neg = false;
217-
mp_float_t frac_mult = 0.1;
218225
mp_int_t exp_val = 0;
226+
mp_int_t exp_extra = 0;
219227
while (str < top) {
220228
mp_uint_t dig = *str++;
221229
if ('0' <= dig && dig <= '9') {
222230
dig -= '0';
223231
if (in == PARSE_DEC_IN_EXP) {
224232
exp_val = 10 * exp_val + dig;
225233
} else {
226-
if (in == PARSE_DEC_IN_FRAC) {
227-
dec_val += dig * frac_mult;
228-
frac_mult *= MICROPY_FLOAT_CONST(0.1);
229-
} else {
234+
if (dec_val < DEC_VAL_MAX) {
235+
// dec_val won't overflow so keep accumulating
230236
dec_val = 10 * dec_val + dig;
237+
if (in == PARSE_DEC_IN_FRAC) {
238+
--exp_extra;
239+
}
240+
} else {
241+
// dec_val might overflow and we anyway can't represent more digits
242+
// of precision, so ignore the digit and just adjust the exponent
243+
if (in == PARSE_DEC_IN_INTG) {
244+
++exp_extra;
245+
}
231246
}
232247
}
233248
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
@@ -261,7 +276,7 @@ mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool
261276
}
262277

263278
// apply the exponent
264-
dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
279+
dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val + exp_extra);
265280
}
266281

267282
// negate value if needed

tests/float/float_parse.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# test parsing of floats
2+
3+
inf = float('inf')
4+
5+
# it shouldn't matter where the decimal point is if the exponent balances the value
6+
print(float('1234') - float('0.1234e4'))
7+
print(float('1.015625') - float('1015625e-6'))
8+
9+
# very large integer part with a very negative exponent should cancel out
10+
print(float('9' * 60 + 'e-60'))
11+
print(float('9' * 60 + 'e-40'))
12+
print(float('9' * 60 + 'e-20') == float('1e40'))
13+
14+
# many fractional digits
15+
print(float('.' + '9' * 70))
16+
print(float('.' + '9' * 70 + 'e20'))
17+
print(float('.' + '9' * 70 + 'e-50') == float('1e-50'))
18+
19+
# tiny fraction with large exponent
20+
print(float('.' + '0' * 60 + '1e10') == float('1e-51'))
21+
print(float('.' + '0' * 60 + '9e25'))
22+
print(float('.' + '0' * 60 + '9e40'))

tests/float/float_parse_doubleprec.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# test parsing of floats, requiring double-precision
2+
3+
# very large integer part with a very negative exponent should cancel out
4+
print(float('9' * 400 + 'e-100'))
5+
print(float('9' * 400 + 'e-200'))
6+
print(float('9' * 400 + 'e-400'))
7+
8+
# many fractional digits
9+
print(float('.' + '9' * 400))
10+
print(float('.' + '9' * 400 + 'e100'))
11+
print(float('.' + '9' * 400 + 'e-100'))
12+
13+
# tiny fraction with large exponent
14+
print(float('.' + '0' * 400 + '9e100'))
15+
print(float('.' + '0' * 400 + '9e200'))
16+
print(float('.' + '0' * 400 + '9e400'))

tests/run-tests

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ def run_tests(pyb, tests, args, base_path="."):
271271
if upy_float_precision < 64:
272272
skip_tests.add('float/float_divmod.py') # tested by float/float_divmod_relaxed.py instead
273273
skip_tests.add('float/float2int_doubleprec_intbig.py')
274+
skip_tests.add('float/float_parse_doubleprec.py')
274275

275276
if not has_complex:
276277
skip_tests.add('float/complex1.py')

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy