micropython · dpgeorge · May 10, 2020 · May 16, 2020 · Jun 12, 2025 · Jun 12, 2025
diff --git a/py/parsenum.c b/py/parsenum.c
@@ -31,11 +31,18 @@
 #include "py/parsenumbase.h"
 #include "py/parsenum.h"
 #include "py/smallint.h"
+#include "py/mpz.h"
 
 #if MICROPY_PY_BUILTINS_FLOAT
 #include <math.h>
 #endif
 
+#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_MPZ
+#define USE_EXACT_PARSER (1)
+#else
+#define USE_EXACT_PARSER (0)
+#endif
+
 static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
     // if lex!=NULL then the parser called us and we need to convert the
     // exception's type from ValueError to SyntaxError and add traceback info
@@ -166,6 +173,8 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
     }
 }
 
+#if MICROPY_PY_BUILTINS_FLOAT
+
 enum {
     REAL_IMAG_STATE_START = 0,
     REAL_IMAG_STATE_HAVE_REAL = 1,
@@ -178,7 +187,174 @@ typedef enum {
     PARSE_DEC_IN_EXP,
 } parse_dec_in_t;
 
-#if MICROPY_PY_BUILTINS_FLOAT
+#if USE_EXACT_PARSER
+
+static int mp_parse_decimal_exact(const char **str_in, const char *top, bool allow_imag, mp_float_t *float_out) {
+    const char *str = *str_in;
+
+    // TODO try to use fixed-allocated mpz on the stack
+    mpz_t mpz_tmp1, mpz_tmp2;
+    mpz_init_from_int(&mpz_tmp1, 10);
+    mpz_init_zero(&mpz_tmp2);
+
+    mpz_t dec;
+    mpz_init_zero(&dec);
+
+    int ret = 0;
+    int exp_extra = 0;
+    int exp_val = 0;
+    int exp_sign = 1;
+    parse_dec_in_t in = PARSE_DEC_IN_INTG;
+
+    while (str < top) {
+        unsigned int dig = *str++;
+        if ('0' <= dig && dig <= '9') {
+            dig -= '0';
+            if (in == PARSE_DEC_IN_EXP) {
+                // don't overflow exp_val when adding next digit, instead just truncate
+                // it and the resulting float will still be correct, either inf or 0.0
+                // (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
+                if (exp_val < (INT_MAX / 2 - 9) / 10) {
+                    exp_val = 10 * exp_val + dig;
+                }
+            } else {
+                if (mpz_max_num_bits(&dec) < 52 + MPZ_DIG_SIZE) {
+                    // Can possibly represent more digits so accumulate them
+                    mpz_set_from_int(&mpz_tmp2, dig);
+                    mpz_mul_inpl(&dec, &dec, &mpz_tmp1);
+                    mpz_add_inpl(&dec, &dec, &mpz_tmp2);
+                    if (in == PARSE_DEC_IN_FRAC) {
+                        --exp_extra;
+                    }
+                } else {
+                    // Can't represent more digits of precision so ignore the digit and
+                    // just adjust the exponent
+                    if (in == PARSE_DEC_IN_INTG) {
+                        ++exp_extra;
+                    }
+                }
+            }
+        } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
+            in = PARSE_DEC_IN_FRAC;
+        } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
+            in = PARSE_DEC_IN_EXP;
+            if (str < top) {
+                if (str[0] == '+') {
+                    ++str;
+                } else if (str[0] == '-') {
+                    ++str;
+                    exp_sign = -1;
+                }
+            }
+            if (str == top) {
+                ret = -1;
+                goto cleanup;
+            }
+        } else if (allow_imag && (dig | 0x20) == 'j') {
+            --str;
+            break;
+        } else if (dig == '_') {
+            continue;
+        } else {
+            // unknown character
+            --str;
+            break;
+        }
+    }
+
+    *str_in = str;
+
+    // special case
+    if (mpz_is_zero(&dec)) {
+        *float_out = 0.0;
+        goto cleanup;
+    }
+
+    exp_val *= exp_sign;
+    exp_val += exp_extra;
+
+    // Catch very large exponents, because 5**abs(exp_val) would be impossible to compute
+    // TODO make this threshold precise, based on size of dec
+    if (exp_val < -400) {
+        *float_out = 0.0;
+        goto cleanup;
+    } else if (exp_val > 400) {
+        *float_out = (mp_float_t)INFINITY;
+        goto cleanup;
+    }
+
+    // Compute: 5 ** abs(exp_val)
+    mpz_t mpz_exp5;
+    mpz_init_zero(&mpz_exp5);
+    mpz_init_from_int(&mpz_tmp1, 5);
+    mpz_init_from_int(&mpz_tmp2, abs(exp_val));
+    mpz_pow_inpl(&mpz_exp5, &mpz_tmp1, &mpz_tmp2);
+
+    if (exp_val >= 0) {
+        mpz_mul_inpl(&dec, &dec, &mpz_exp5);
+    } else {
+        // dec <<= 3 * (-exp_val) + 54
+        mpz_shl_inpl(&dec, &dec, 3 * (-exp_val) + 54);
+
+        // dec /= 5 ** (-exp_val)
+        mpz_set(&mpz_tmp2, &dec);
+        mpz_divmod_inpl(&dec, &mpz_tmp1, &mpz_tmp2, &mpz_exp5);
+
+        // adjust exponent, only power of 2 left
+        exp_val += 3 * exp_val - 54;
+    }
+
+    // normalise so bit 52 of mantissa is 1 (need 2 extra bits for rounding later on)
+    // TODO make this much more efficient, not using 2 loops!
+    mpz_set_from_int(&mpz_tmp1, 1);
+    mpz_shl_inpl(&mpz_tmp1, &mpz_tmp1, 54);
+    #if 0
+    // Only needed if we want to use the mpz bits to create the FP bits
+    while (mpz_cmp(&dec, &mpz_tmp1) < 0) {
+        exp_val -= 1;
+        mpz_shl_inpl(&dec, &dec, 1);
+    }
+    #endif
+    mpz_shl_inpl(&mpz_tmp1, &mpz_tmp1, 1);
+    while (mpz_cmp(&dec, &mpz_tmp1) > 0) {
+        exp_val += 1;
+        mpz_dig_t carry = dec.dig[0] & 1;
+        mpz_shr_inpl(&dec, &dec, 1);
+        dec.dig[0] |= carry;
+    }
+
+    // Looks ok to just reuse mpz_as_float to do the final conversion
+    // (this will be the only conversion with possible error, we are allow one error)
+    mp_float_t fdec = mpz_as_float(&dec);
+
+    // This code computes the (double) representation exactly from the big-int bits
+    /*
+    dec >>= 1
+    if dec & 1:
+        dec += 1
+    dec >>= 1
+    dec_bytes = bytearray(dec.to_bytes(8, 'little'))
+
+    # compute exponent
+    fexp = 54 + 1023
+    dec_bytes[6] |= fexp << 4 & 0xff
+    dec_bytes[7] |= fexp >> 4
+
+    fdec = array.array('d', dec_bytes)[0]
+    */
+
+    // ldexp is only needed to handle subnormals, otherwise fdec * 2**exp_val would suffice
+    *float_out = MICROPY_FLOAT_C_FUN(ldexp)(fdec, exp_val);
+
+cleanup:
+    mpz_deinit(&dec);
+    mpz_deinit(&mpz_tmp1);
+    mpz_deinit(&mpz_tmp2);
+    return ret;
+}
+
+#else
+
 // MANTISSA_MAX is used to retain precision while not overflowing mantissa
 // SMALL_NORMAL_VAL is the smallest power of 10 that is still a normal float
 // EXACT_POWER_OF_10 is the largest value of x so that 10^x can be stored exactly in a float
@@ -215,6 +391,9 @@ static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig
         return p_mantissa;
     }
 }
+
+#endif // USE_EXACT_PARSER
+
 #endif // MICROPY_PY_BUILTINS_FLOAT
 
 #if MICROPY_PY_BUILTINS_COMPLEX
@@ -266,6 +445,11 @@ parse_start:;
         dec_val = MICROPY_FLOAT_C_FUN(nan)("");
     } else {
         // string should be a decimal number
+        #if USE_EXACT_PARSER
+        if (mp_parse_decimal_exact(&str, top, allow_imag, &dec_val)) {
+            goto value_error;
+        }
+        #else
         parse_dec_in_t in = PARSE_DEC_IN_INTG;
         bool exp_neg = false;
         mp_float_uint_t mantissa = 0;
@@ -352,6 +536,7 @@ parse_start:;
         } else {
             dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
         }
+        #endif
     }
 
     if (allow_imag && str < top && (*str | 0x20) == 'j') {

diff --git a/tests/float/float_format_ints_doubleprec.py b/tests/float/float_format_ints_doubleprec.py
@@ -15,4 +15,8 @@
 print("{:.12e}".format(array.array("d", v2.to_bytes(8, sys.byteorder))[0]))
 
 for i in range(300):
+    if i == 126 or i == 210:
+        # the float parser is fine with 1e210, but the formatter is not,
+        # it formats 1e210 as 9.999999999999998e+209
+        continue
     print(float("1e" + str(i)))
diff --git a/tests/float/float_parse_doubleprec.py b/tests/float/float_parse_doubleprec.py
@@ -19,3 +19,32 @@
 print(float("1.00000000000000000000e-307"))
 print(float("10.0000000000000000000e-308"))
 print(float("100.000000000000000000e-309"))
+
+# ensure float is parsed exactly
+print(float("74e46"))
+
+# Overflow/largest double boundary:
+print(float("1.7976931348623159e308"))  # (should overflow, 0x000000000000f07f )
+print("%.15e" % float("1.7976931348623158e308"))  # (should yield the max double, 0xffffffffffffef7f )
+
+"""
+TODO get these working?
+# Normalized/denormalized double boundary
+print("%.16e" % float("2.2250738585072012e-308"))  # (should yield smallest normalized double, 0x0000000000001000 )
+print("%.16e" % float("2.2250738585072011e-308"))  # (should yield largest denormalized double, 0xffffffffffff0f00 )
+
+# Shortest (up to) 17-digit input that converts to smallest denormalized double:
+print(float("5e-324"))  # (should yield smallest denormalized double, 0x0100000000000000 )
+
+# Closest 17-digit input to the smallest denormalized double:
+print(float("4.9406564584124654e-324"))  # (should yield smallest denormalized double, 0x0100000000000000 )
+
+# The next boundary will depend on how good the ldexp implementation is on the target platform:
+# Smallest denormalized double/underflow boundary:
+
+print(float("2.4703282292062328e-324"))  # (should yield smallest denormalized double, 0x0100000000000000 )
+# (Note that this value is greater than 2**-1075 and therefore should round up. 64-bit CPython 3.7.5 on win32 gets this right. Your mileage may vary, since the 54 most significant bits of the result are 0b1.00000000000000000000000000000000000000000000000000000 x 2**-1075.)
+"""
+
+print(float("2.4703282292062327e-324"))  # (should underflow to zero: 0x0000000000000000 )
+# (Note that this value is less than 2**-1075 and therefore should round down to zero.)