py/formatfloat: Improve accuracy of float formatting code.

yoctopuce · yoctopuce · commit 6efb9056f3bf · 2025-06-06T19:43:45.000+02:00
Following discussions in PR #16666, this commit updates the float formatting code to reduce the `repr` reversibility error, i.e. the percentage of valid floating point numbers that do not parse back to the same number when formatted by `repr`. The baseline before this commit is an error rate of ~46%, when using double-precision floats. This new code initially brings the error down to ~41%, using an integer representation of the decimal mantissa rather than working on floats. It will also improve the rounding in some conditions. An additional improvement to the accuracy can be turned on to bring the error down to 4.5%, by iterative refinement. This extra code however makes the code slightly slower than CPython, when tested on ports/unix. The residual error rate appears to be due to the parse code itself, which is unable to produce some specific floating point values, regardless of the string provided as input. Signed-off-by: Yoctopuce dev <dev@yoctopuce.com>
diff --git a/py/formatfloat.c b/py/formatfloat.c
diff --git a/py/objfloat.c b/py/objfloat.c
@@ -114,14 +114,10 @@ static void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
     mp_float_t o_val = mp_obj_float_get(o_in);
     #if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
     char buf[16];
-    #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
-    const int precision = 6;
-    #else
-    const int precision = 7;
-    #endif
+    const int precision = 8;
     #else
     char buf[32];
-    const int precision = 16;
+    const int precision = 18;
     #endif
     mp_format_float(o_val, buf, sizeof(buf), 'g', precision, '\0');
     mp_print_str(print, buf);
diff --git a/py/parsenum.c b/py/parsenum.c
@@ -197,6 +197,23 @@ typedef enum {
 #define EXACT_POWER_OF_10 (22)
 #endif
 
+mp_float_t mp_decimal_exp(mp_float_t num, int exp_val) {
+    if (exp_val == 0) {
+        return num;
+    }
+    // If possible, we would rather manipulate numbers that have an exact representation 
+    // in IEEE754. It turns out small positive powers of 10 do, whereas small negative 
+    // powers of 10 don't. So in that case, we'll yield a division of exact values rather 
+    // than a multiplication of slightly erroneous values.
+    if (exp_val < 0 && exp_val >= -EXACT_POWER_OF_10) {
+        num /= MICROPY_FLOAT_C_FUN(pow)(10, -exp_val);
+    } else {
+        num *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
+    }
+    return num;
+}
+
+
 // Break out inner digit accumulation routine to ease trailing zero deferral.
 static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) {
     // Core routine to ingest an additional digit.
@@ -215,6 +232,92 @@ static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig
         return p_mantissa;
     }
 }
+
+// internal function to parse an unsigned decimal number
+const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res) {
+    const char *top = str + len;
+
+    parse_dec_in_t in = PARSE_DEC_IN_INTG;
+    bool exp_neg = false;
+    mp_float_uint_t mantissa = 0;
+    int exp_val = 0;
+    int exp_extra = 0;
+    int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
+    while (str < top) {
+        unsigned int dig = *str++;
+        if ('0' <= dig && dig <= '9') {
+            dig -= '0';
+            if (in == PARSE_DEC_IN_EXP) {
+                // don't overflow exp_val when adding next digit, instead just truncate
+                // it and the resulting float will still be correct, either inf or 0.0
+                // (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
+                if (exp_val < (INT_MAX / 2 - 9) / 10) {
+                    exp_val = 10 * exp_val + dig;
+                }
+            } else {
+                if (dig == 0 || mantissa >= MANTISSA_MAX) {
+                    // Defer treatment of zeros in fractional part.  If nothing comes afterwards, ignore them.
+                    // Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
+                    if (in == PARSE_DEC_IN_INTG) {
+                        ++trailing_zeros_intg;
+                    } else {
+                        ++trailing_zeros_frac;
+                    }
+                } else {
+                    // Time to un-defer any trailing zeros.  Intg zeros first.
+                    while (trailing_zeros_intg) {
+                        mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
+                        --trailing_zeros_intg;
+                    }
+                    while (trailing_zeros_frac) {
+                        mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
+                        --trailing_zeros_frac;
+                    }
+                    mantissa = accept_digit(mantissa, dig, &exp_extra, in);
+                }
+            }
+        } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
+            in = PARSE_DEC_IN_FRAC;
+        } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
+            in = PARSE_DEC_IN_EXP;
+            if (str < top) {
+                if (str[0] == '+') {
+                    str++;
+                } else if (str[0] == '-') {
+                    str++;
+                    exp_neg = true;
+                }
+            }
+            if (str == top) {
+                return NULL;
+            }
+        } else if (dig == '_') {
+            continue;
+        } else {
+            // unknown character
+            str--;
+            break;
+        }
+    }
+
+    // work out the exponent
+    if (exp_neg) {
+        exp_val = -exp_val;
+    }
+
+    // apply the exponent, making sure it's not a subnormal value
+    exp_val += exp_extra + trailing_zeros_intg;
+    mp_float_t dec_val = (mp_float_t)mantissa;
+    if (exp_val < SMALL_NORMAL_EXP) {
+        exp_val -= SMALL_NORMAL_EXP;
+        dec_val *= SMALL_NORMAL_VAL;
+    }
+
+    // At this point, we just need to multiply the mantissa by its base 10 exponent.
+    *res = mp_decimal_exp(dec_val, exp_val);
+
+    return str;
+}
 #endif // MICROPY_PY_BUILTINS_FLOAT
 
 #if MICROPY_PY_BUILTINS_COMPLEX
@@ -272,91 +375,9 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex
         }
     } else {
         // string should be a decimal number
-        parse_dec_in_t in = PARSE_DEC_IN_INTG;
-        bool exp_neg = false;
-        mp_float_uint_t mantissa = 0;
-        int exp_val = 0;
-        int exp_extra = 0;
-        int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
-        while (str < top) {
-            unsigned int dig = *str++;
-            if ('0' <= dig && dig <= '9') {
-                dig -= '0';
-                if (in == PARSE_DEC_IN_EXP) {
-                    // don't overflow exp_val when adding next digit, instead just truncate
-                    // it and the resulting float will still be correct, either inf or 0.0
-                    // (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
-                    if (exp_val < (INT_MAX / 2 - 9) / 10) {
-                        exp_val = 10 * exp_val + dig;
-                    }
-                } else {
-                    if (dig == 0 || mantissa >= MANTISSA_MAX) {
-                        // Defer treatment of zeros in fractional part.  If nothing comes afterwards, ignore them.
-                        // Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
-                        if (in == PARSE_DEC_IN_INTG) {
-                            ++trailing_zeros_intg;
-                        } else {
-                            ++trailing_zeros_frac;
-                        }
-                    } else {
-                        // Time to un-defer any trailing zeros.  Intg zeros first.
-                        while (trailing_zeros_intg) {
-                            mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
-                            --trailing_zeros_intg;
-                        }
-                        while (trailing_zeros_frac) {
-                            mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
-                            --trailing_zeros_frac;
-                        }
-                        mantissa = accept_digit(mantissa, dig, &exp_extra, in);
-                    }
-                }
-            } else if (in == PARSE_DEC_IN_INTG && dig == '.') {
-                in = PARSE_DEC_IN_FRAC;
-            } else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
-                in = PARSE_DEC_IN_EXP;
-                if (str < top) {
-                    if (str[0] == '+') {
-                        str++;
-                    } else if (str[0] == '-') {
-                        str++;
-                        exp_neg = true;
-                    }
-                }
-                if (str == top) {
-                    goto value_error;
-                }
-            } else if (dig == '_') {
-                continue;
-            } else {
-                // unknown character
-                str--;
-                break;
-            }
-        }
-
-        // work out the exponent
-        if (exp_neg) {
-            exp_val = -exp_val;
-        }
-
-        // apply the exponent, making sure it's not a subnormal value
-        exp_val += exp_extra + trailing_zeros_intg;
-        dec_val = (mp_float_t)mantissa;
-        if (exp_val < SMALL_NORMAL_EXP) {
-            exp_val -= SMALL_NORMAL_EXP;
-            dec_val *= SMALL_NORMAL_VAL;
-        }
-
-        // At this point, we need to multiply the mantissa by its base 10 exponent. If possible,
-        // we would rather manipulate numbers that have an exact representation in IEEE754. It
-        // turns out small positive powers of 10 do, whereas small negative powers of 10 don't.
-        // So in that case, we'll yield a division of exact values rather than a multiplication
-        // of slightly erroneous values.
-        if (exp_val < 0 && exp_val >= -EXACT_POWER_OF_10) {
-            dec_val /= MICROPY_FLOAT_C_FUN(pow)(10, -exp_val);
-        } else {
-            dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
+        str = mp_parse_float_internal(str, top - str, &dec_val);
+        if (!str) {
+            goto value_error;
         }
     }
 
diff --git a/py/parsenum.h b/py/parsenum.h
@@ -34,6 +34,11 @@
 
 mp_obj_t mp_parse_num_integer(const char *restrict str, size_t len, int base, mp_lexer_t *lex);
 
+#if MICROPY_PY_BUILTINS_FLOAT
+mp_float_t mp_decimal_exp(mp_float_t num, int exp_val);
+const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res);
+#endif
+
 #if MICROPY_PY_BUILTINS_COMPLEX
 mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex);
 
diff --git a/tests/float/float_format_ints.py b/tests/float/float_format_ints.py
@@ -1,7 +1,7 @@
 # Test that integers format to exact values.
 
 for b in [13, 123, 457, 23456]:
-    for r in range(1, 10):
+    for r in range(1, 9):
         e_fmt = "{:." + str(r) + "e}"
         f_fmt = "{:." + str(r) + "f}"
         g_fmt = "{:." + str(r) + "g}"
diff --git a/tests/float/float_struct_e.py b/tests/float/float_struct_e.py
@@ -32,7 +32,7 @@
     for i in (j, -j):
         x = struct.pack("<e", i)
         v = struct.unpack("<e", x)[0]
-        print("%.7f %s %.15f %s" % (i, x, v, i == v))
+        print("%.6f %s %.6f %s" % (i, x, v, i == v))
 
 # In CPython, packing a float that doesn't fit into a half-float raises OverflowError.
 # But in MicroPython it does not, but rather stores the value as inf.
diff --git a/tests/float/float_struct_e_doubleprec.py b/tests/float/float_struct_e_doubleprec.py
@@ -0,0 +1,43 @@
+# Test struct pack/unpack with 'e' typecode.
+
+try:
+    import struct
+except ImportError:
+    print("SKIP")
+    raise SystemExit
+
+test_values = (
+    1e-7,
+    2e-7,
+    1e-6,
+    1e-5,
+    1e-4,
+    1e-3,
+    1e-2,
+    0.1,
+    0,
+    1,
+    2,
+    4,
+    8,
+    10,
+    100,
+    1e3,
+    1e4,
+    6e4,
+    float("inf"),
+)
+
+for j in test_values:
+    for i in (j, -j):
+        x = struct.pack("<e", i)
+        v = struct.unpack("<e", x)[0]
+        print("%.7f %s %.15f %s" % (i, x, v, i == v))
+
+# In CPython, packing a float that doesn't fit into a half-float raises OverflowError.
+# But in MicroPython it does not, but rather stores the value as inf.
+# This test is here for coverage.
+try:
+    struct.pack("e", 1e15)
+except OverflowError:
+    pass
diff --git a/tests/ports/unix/extra_coverage.py.exp b/tests/ports/unix/extra_coverage.py.exp
@@ -101,7 +101,7 @@ ValueError:
 Warning: test
 # format float
 ?
-+1e+00
++1
 +1e+00
 # binary
 123
diff --git a/tests/run-tests.py b/tests/run-tests.py
@@ -781,6 +781,7 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1):
     if upy_float_precision < 64:
         skip_tests.add("float/float_divmod.py")  # tested by float/float_divmod_relaxed.py instead
         skip_tests.add("float/float2int_doubleprec_intbig.py")
+        skip_tests.add("float/float_struct_e_doubleprec.py")
         skip_tests.add("float/float_format_ints_doubleprec.py")
         skip_tests.add("float/float_parse_doubleprec.py")
 

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ ValueError:`
`101`	`101`	`Warning: test`
`102`	`102`	`# format float`
`103`	`103`	`?`
`104`		`-+1e+00`
	`104`	`++1`
`105`	`105`	`+1e+00`
`106`	`106`	`# binary`
`107`	`107`	`123`