Skip to content

Commit 6efb905

Browse files
committed
py/formatfloat: Improve accuracy of float formatting code.
Following discussions in PR #16666, this commit updates the float formatting code to reduce the `repr` reversibility error, i.e. the percentage of valid floating point numbers that do not parse back to the same number when formatted by `repr`. The baseline before this commit is an error rate of ~46%, when using double-precision floats. This new code initially brings the error down to ~41%, using an integer representation of the decimal mantissa rather than working on floats. It will also improve the rounding in some conditions. An additional improvement to the accuracy can be turned on to bring the error down to 4.5%, by iterative refinement. This extra code however makes the code slightly slower than CPython, when tested on ports/unix. The residual error rate appears to be due to the parse code itself, which is unable to produce some specific floating point values, regardless of the string provided as input. Signed-off-by: Yoctopuce dev <dev@yoctopuce.com>
1 parent 5f058e9 commit 6efb905

File tree

9 files changed

+420
-242
lines changed

9 files changed

+420
-242
lines changed

py/formatfloat.c

Lines changed: 260 additions & 148 deletions
Large diffs are not rendered by default.

py/objfloat.c

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,10 @@ static void float_print(const mp_print_t *print, mp_obj_t o_in, mp_print_kind_t
114114
mp_float_t o_val = mp_obj_float_get(o_in);
115115
#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_FLOAT
116116
char buf[16];
117-
#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C
118-
const int precision = 6;
119-
#else
120-
const int precision = 7;
121-
#endif
117+
const int precision = 8;
122118
#else
123119
char buf[32];
124-
const int precision = 16;
120+
const int precision = 18;
125121
#endif
126122
mp_format_float(o_val, buf, sizeof(buf), 'g', precision, '\0');
127123
mp_print_str(print, buf);

py/parsenum.c

Lines changed: 106 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,23 @@ typedef enum {
197197
#define EXACT_POWER_OF_10 (22)
198198
#endif
199199

200+
mp_float_t mp_decimal_exp(mp_float_t num, int exp_val) {
201+
if (exp_val == 0) {
202+
return num;
203+
}
204+
// If possible, we would rather manipulate numbers that have an exact representation
205+
// in IEEE754. It turns out small positive powers of 10 do, whereas small negative
206+
// powers of 10 don't. So in that case, we'll yield a division of exact values rather
207+
// than a multiplication of slightly erroneous values.
208+
if (exp_val < 0 && exp_val >= -EXACT_POWER_OF_10) {
209+
num /= MICROPY_FLOAT_C_FUN(pow)(10, -exp_val);
210+
} else {
211+
num *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
212+
}
213+
return num;
214+
}
215+
216+
200217
// Break out inner digit accumulation routine to ease trailing zero deferral.
201218
static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig, int *p_exp_extra, int in) {
202219
// Core routine to ingest an additional digit.
@@ -215,6 +232,92 @@ static mp_float_uint_t accept_digit(mp_float_uint_t p_mantissa, unsigned int dig
215232
return p_mantissa;
216233
}
217234
}
235+
236+
// internal function to parse an unsigned decimal number
237+
const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res) {
238+
const char *top = str + len;
239+
240+
parse_dec_in_t in = PARSE_DEC_IN_INTG;
241+
bool exp_neg = false;
242+
mp_float_uint_t mantissa = 0;
243+
int exp_val = 0;
244+
int exp_extra = 0;
245+
int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
246+
while (str < top) {
247+
unsigned int dig = *str++;
248+
if ('0' <= dig && dig <= '9') {
249+
dig -= '0';
250+
if (in == PARSE_DEC_IN_EXP) {
251+
// don't overflow exp_val when adding next digit, instead just truncate
252+
// it and the resulting float will still be correct, either inf or 0.0
253+
// (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
254+
if (exp_val < (INT_MAX / 2 - 9) / 10) {
255+
exp_val = 10 * exp_val + dig;
256+
}
257+
} else {
258+
if (dig == 0 || mantissa >= MANTISSA_MAX) {
259+
// Defer treatment of zeros in fractional part. If nothing comes afterwards, ignore them.
260+
// Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
261+
if (in == PARSE_DEC_IN_INTG) {
262+
++trailing_zeros_intg;
263+
} else {
264+
++trailing_zeros_frac;
265+
}
266+
} else {
267+
// Time to un-defer any trailing zeros. Intg zeros first.
268+
while (trailing_zeros_intg) {
269+
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
270+
--trailing_zeros_intg;
271+
}
272+
while (trailing_zeros_frac) {
273+
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
274+
--trailing_zeros_frac;
275+
}
276+
mantissa = accept_digit(mantissa, dig, &exp_extra, in);
277+
}
278+
}
279+
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
280+
in = PARSE_DEC_IN_FRAC;
281+
} else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
282+
in = PARSE_DEC_IN_EXP;
283+
if (str < top) {
284+
if (str[0] == '+') {
285+
str++;
286+
} else if (str[0] == '-') {
287+
str++;
288+
exp_neg = true;
289+
}
290+
}
291+
if (str == top) {
292+
return NULL;
293+
}
294+
} else if (dig == '_') {
295+
continue;
296+
} else {
297+
// unknown character
298+
str--;
299+
break;
300+
}
301+
}
302+
303+
// work out the exponent
304+
if (exp_neg) {
305+
exp_val = -exp_val;
306+
}
307+
308+
// apply the exponent, making sure it's not a subnormal value
309+
exp_val += exp_extra + trailing_zeros_intg;
310+
mp_float_t dec_val = (mp_float_t)mantissa;
311+
if (exp_val < SMALL_NORMAL_EXP) {
312+
exp_val -= SMALL_NORMAL_EXP;
313+
dec_val *= SMALL_NORMAL_VAL;
314+
}
315+
316+
// At this point, we just need to multiply the mantissa by its base 10 exponent.
317+
*res = mp_decimal_exp(dec_val, exp_val);
318+
319+
return str;
320+
}
218321
#endif // MICROPY_PY_BUILTINS_FLOAT
219322

220323
#if MICROPY_PY_BUILTINS_COMPLEX
@@ -272,91 +375,9 @@ mp_obj_t mp_parse_num_float(const char *str, size_t len, bool allow_imag, mp_lex
272375
}
273376
} else {
274377
// string should be a decimal number
275-
parse_dec_in_t in = PARSE_DEC_IN_INTG;
276-
bool exp_neg = false;
277-
mp_float_uint_t mantissa = 0;
278-
int exp_val = 0;
279-
int exp_extra = 0;
280-
int trailing_zeros_intg = 0, trailing_zeros_frac = 0;
281-
while (str < top) {
282-
unsigned int dig = *str++;
283-
if ('0' <= dig && dig <= '9') {
284-
dig -= '0';
285-
if (in == PARSE_DEC_IN_EXP) {
286-
// don't overflow exp_val when adding next digit, instead just truncate
287-
// it and the resulting float will still be correct, either inf or 0.0
288-
// (use INT_MAX/2 to allow adding exp_extra at the end without overflow)
289-
if (exp_val < (INT_MAX / 2 - 9) / 10) {
290-
exp_val = 10 * exp_val + dig;
291-
}
292-
} else {
293-
if (dig == 0 || mantissa >= MANTISSA_MAX) {
294-
// Defer treatment of zeros in fractional part. If nothing comes afterwards, ignore them.
295-
// Also, once we reach MANTISSA_MAX, treat every additional digit as a trailing zero.
296-
if (in == PARSE_DEC_IN_INTG) {
297-
++trailing_zeros_intg;
298-
} else {
299-
++trailing_zeros_frac;
300-
}
301-
} else {
302-
// Time to un-defer any trailing zeros. Intg zeros first.
303-
while (trailing_zeros_intg) {
304-
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_INTG);
305-
--trailing_zeros_intg;
306-
}
307-
while (trailing_zeros_frac) {
308-
mantissa = accept_digit(mantissa, 0, &exp_extra, PARSE_DEC_IN_FRAC);
309-
--trailing_zeros_frac;
310-
}
311-
mantissa = accept_digit(mantissa, dig, &exp_extra, in);
312-
}
313-
}
314-
} else if (in == PARSE_DEC_IN_INTG && dig == '.') {
315-
in = PARSE_DEC_IN_FRAC;
316-
} else if (in != PARSE_DEC_IN_EXP && ((dig | 0x20) == 'e')) {
317-
in = PARSE_DEC_IN_EXP;
318-
if (str < top) {
319-
if (str[0] == '+') {
320-
str++;
321-
} else if (str[0] == '-') {
322-
str++;
323-
exp_neg = true;
324-
}
325-
}
326-
if (str == top) {
327-
goto value_error;
328-
}
329-
} else if (dig == '_') {
330-
continue;
331-
} else {
332-
// unknown character
333-
str--;
334-
break;
335-
}
336-
}
337-
338-
// work out the exponent
339-
if (exp_neg) {
340-
exp_val = -exp_val;
341-
}
342-
343-
// apply the exponent, making sure it's not a subnormal value
344-
exp_val += exp_extra + trailing_zeros_intg;
345-
dec_val = (mp_float_t)mantissa;
346-
if (exp_val < SMALL_NORMAL_EXP) {
347-
exp_val -= SMALL_NORMAL_EXP;
348-
dec_val *= SMALL_NORMAL_VAL;
349-
}
350-
351-
// At this point, we need to multiply the mantissa by its base 10 exponent. If possible,
352-
// we would rather manipulate numbers that have an exact representation in IEEE754. It
353-
// turns out small positive powers of 10 do, whereas small negative powers of 10 don't.
354-
// So in that case, we'll yield a division of exact values rather than a multiplication
355-
// of slightly erroneous values.
356-
if (exp_val < 0 && exp_val >= -EXACT_POWER_OF_10) {
357-
dec_val /= MICROPY_FLOAT_C_FUN(pow)(10, -exp_val);
358-
} else {
359-
dec_val *= MICROPY_FLOAT_C_FUN(pow)(10, exp_val);
378+
str = mp_parse_float_internal(str, top - str, &dec_val);
379+
if (!str) {
380+
goto value_error;
360381
}
361382
}
362383

py/parsenum.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@
3434

3535
mp_obj_t mp_parse_num_integer(const char *restrict str, size_t len, int base, mp_lexer_t *lex);
3636

37+
#if MICROPY_PY_BUILTINS_FLOAT
38+
mp_float_t mp_decimal_exp(mp_float_t num, int exp_val);
39+
const char *mp_parse_float_internal(const char *str, size_t len, mp_float_t *res);
40+
#endif
41+
3742
#if MICROPY_PY_BUILTINS_COMPLEX
3843
mp_obj_t mp_parse_num_decimal(const char *str, size_t len, bool allow_imag, bool force_complex, mp_lexer_t *lex);
3944

tests/float/float_format_ints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Test that integers format to exact values.
22

33
for b in [13, 123, 457, 23456]:
4-
for r in range(1, 10):
4+
for r in range(1, 9):
55
e_fmt = "{:." + str(r) + "e}"
66
f_fmt = "{:." + str(r) + "f}"
77
g_fmt = "{:." + str(r) + "g}"

tests/float/float_struct_e.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
for i in (j, -j):
3333
x = struct.pack("<e", i)
3434
v = struct.unpack("<e", x)[0]
35-
print("%.7f %s %.15f %s" % (i, x, v, i == v))
35+
print("%.6f %s %.6f %s" % (i, x, v, i == v))
3636

3737
# In CPython, packing a float that doesn't fit into a half-float raises OverflowError.
3838
# But in MicroPython it does not, but rather stores the value as inf.
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Test struct pack/unpack with 'e' typecode.
2+
3+
try:
4+
import struct
5+
except ImportError:
6+
print("SKIP")
7+
raise SystemExit
8+
9+
test_values = (
10+
1e-7,
11+
2e-7,
12+
1e-6,
13+
1e-5,
14+
1e-4,
15+
1e-3,
16+
1e-2,
17+
0.1,
18+
0,
19+
1,
20+
2,
21+
4,
22+
8,
23+
10,
24+
100,
25+
1e3,
26+
1e4,
27+
6e4,
28+
float("inf"),
29+
)
30+
31+
for j in test_values:
32+
for i in (j, -j):
33+
x = struct.pack("<e", i)
34+
v = struct.unpack("<e", x)[0]
35+
print("%.7f %s %.15f %s" % (i, x, v, i == v))
36+
37+
# In CPython, packing a float that doesn't fit into a half-float raises OverflowError.
38+
# But in MicroPython it does not, but rather stores the value as inf.
39+
# This test is here for coverage.
40+
try:
41+
struct.pack("e", 1e15)
42+
except OverflowError:
43+
pass

tests/ports/unix/extra_coverage.py.exp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ ValueError:
101101
Warning: test
102102
# format float
103103
?
104-
+1e+00
104+
+1
105105
+1e+00
106106
# binary
107107
123

tests/run-tests.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,7 @@ def run_tests(pyb, tests, args, result_dir, num_threads=1):
781781
if upy_float_precision < 64:
782782
skip_tests.add("float/float_divmod.py") # tested by float/float_divmod_relaxed.py instead
783783
skip_tests.add("float/float2int_doubleprec_intbig.py")
784+
skip_tests.add("float/float_struct_e_doubleprec.py")
784785
skip_tests.add("float/float_format_ints_doubleprec.py")
785786
skip_tests.add("float/float_parse_doubleprec.py")
786787

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy