-
-
Notifications
You must be signed in to change notification settings - Fork 8.3k
py/parse: Add support for math module constants and float folding #16666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -336,18 +336,34 @@ static uint8_t peek_rule(parser_t *parser, size_t n) { | |
} | ||
#endif | ||
|
||
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o) { | ||
#if MICROPY_COMP_CONST_FOLDING || MICROPY_EMIT_INLINE_ASM | ||
static bool mp_parse_node_get_number_maybe(mp_parse_node_t pn, mp_obj_t *o) { | ||
dpgeorge marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { | ||
*o = MP_OBJ_NEW_SMALL_INT(MP_PARSE_NODE_LEAF_SMALL_INT(pn)); | ||
return true; | ||
} else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_const_object)) { | ||
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; | ||
*o = mp_parse_node_extract_const_object(pns); | ||
return mp_obj_is_int(*o); | ||
return mp_obj_is_int(*o) | ||
#if MICROPY_COMP_CONST_FLOAT | ||
|| mp_obj_is_float(*o) | ||
#endif | ||
; | ||
} else { | ||
return false; | ||
} | ||
} | ||
#endif | ||
|
||
#if MICROPY_EMIT_INLINE_ASM | ||
bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o) { | ||
return mp_parse_node_get_number_maybe(pn, o) | ||
#if MICROPY_COMP_CONST_FLOAT | ||
&& mp_obj_is_int(*o) | ||
#endif | ||
; | ||
} | ||
#endif | ||
|
||
#if MICROPY_COMP_CONST_TUPLE || MICROPY_COMP_CONST | ||
static bool mp_parse_node_is_const(mp_parse_node_t pn) { | ||
|
@@ -642,12 +658,32 @@ static const mp_rom_map_elem_t mp_constants_table[] = { | |
#if MICROPY_PY_UCTYPES | ||
{ MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) }, | ||
#endif | ||
#if MICROPY_PY_BUILTINS_FLOAT && MICROPY_PY_MATH && MICROPY_COMP_CONST_FLOAT | ||
{ MP_ROM_QSTR(MP_QSTR_math), MP_ROM_PTR(&mp_module_math) }, | ||
#endif | ||
// Extra constants as defined by a port | ||
MICROPY_PORT_CONSTANTS | ||
}; | ||
static MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table); | ||
#endif | ||
|
||
static bool binary_op_maybe(mp_binary_op_t op, mp_obj_t lhs, mp_obj_t rhs, mp_obj_t *res) { | ||
nlr_buf_t nlr; | ||
if (nlr_push(&nlr) == 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This What are the cases in which it's needed? From what I can gather it's:
The int cases are already handled explicitly. Maybe it's not much extra code to explicitly guard against the invalid float operations, so that this It's a bit of a trade-off here, whether to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought about that initially, but I was afraid to miss a corner case which would then break compilation. For instance, on builds with limited integers ( Are you mostly concerned by the impact on stack space, or more on compilation time ? |
||
mp_obj_t tmp = mp_binary_op(op, lhs, rhs); | ||
#if MICROPY_PY_BUILTINS_COMPLEX | ||
if (mp_obj_is_type(tmp, &mp_type_complex)) { | ||
return false; | ||
} | ||
#endif | ||
*res = tmp; | ||
nlr_pop(); | ||
return true; | ||
} else { | ||
return false; | ||
} | ||
} | ||
|
||
static bool fold_logical_constants(parser_t *parser, uint8_t rule_id, size_t *num_args) { | ||
if (rule_id == RULE_or_test | ||
|| rule_id == RULE_and_test) { | ||
|
@@ -706,7 +742,7 @@ static bool fold_logical_constants(parser_t *parser, uint8_t rule_id, size_t *nu | |
} | ||
|
||
static bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) { | ||
// this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4 | ||
// this code does folding of arbitrary numeric expressions, eg 1 + 2 * 3 + 4 | ||
// it does not do partial folding, eg 1 + 2 + x -> 3 + x | ||
|
||
mp_obj_t arg0; | ||
|
@@ -716,7 +752,7 @@ static bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) { | |
|| rule_id == RULE_power) { | ||
// folding for binary ops: | ^ & ** | ||
mp_parse_node_t pn = peek_result(parser, num_args - 1); | ||
if (!mp_parse_node_get_int_maybe(pn, &arg0)) { | ||
if (!mp_parse_node_get_number_maybe(pn, &arg0)) { | ||
return false; | ||
} | ||
mp_binary_op_t op; | ||
|
@@ -732,58 +768,61 @@ static bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) { | |
for (ssize_t i = num_args - 2; i >= 0; --i) { | ||
pn = peek_result(parser, i); | ||
mp_obj_t arg1; | ||
if (!mp_parse_node_get_int_maybe(pn, &arg1)) { | ||
if (!mp_parse_node_get_number_maybe(pn, &arg1)) { | ||
return false; | ||
} | ||
#if !MICROPY_COMP_CONST_FLOAT | ||
if (op == MP_BINARY_OP_POWER && mp_obj_int_sign(arg1) < 0) { | ||
// ** can't have negative rhs | ||
return false; | ||
} | ||
arg0 = mp_binary_op(op, arg0, arg1); | ||
#endif | ||
if (!binary_op_maybe(op, arg0, arg1, &arg0)) { | ||
return false; | ||
} | ||
} | ||
} else if (rule_id == RULE_shift_expr | ||
|| rule_id == RULE_arith_expr | ||
|| rule_id == RULE_term) { | ||
// folding for binary ops: << >> + - * @ / % // | ||
mp_parse_node_t pn = peek_result(parser, num_args - 1); | ||
if (!mp_parse_node_get_int_maybe(pn, &arg0)) { | ||
if (!mp_parse_node_get_number_maybe(pn, &arg0)) { | ||
return false; | ||
} | ||
for (ssize_t i = num_args - 2; i >= 1; i -= 2) { | ||
pn = peek_result(parser, i - 1); | ||
mp_obj_t arg1; | ||
if (!mp_parse_node_get_int_maybe(pn, &arg1)) { | ||
if (!mp_parse_node_get_number_maybe(pn, &arg1)) { | ||
return false; | ||
} | ||
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i)); | ||
if (tok == MP_TOKEN_OP_AT || tok == MP_TOKEN_OP_SLASH) { | ||
// Can't fold @ or / | ||
if (tok == MP_TOKEN_OP_AT) { | ||
// Can't fold @ | ||
return false; | ||
} | ||
#if !MICROPY_COMP_CONST_FLOAT | ||
if (tok == MP_TOKEN_OP_SLASH) { | ||
// Can't fold / | ||
return false; | ||
} | ||
#endif | ||
mp_binary_op_t op = MP_BINARY_OP_LSHIFT + (tok - MP_TOKEN_OP_DBL_LESS); | ||
int rhs_sign = mp_obj_int_sign(arg1); | ||
if (op <= MP_BINARY_OP_RSHIFT) { | ||
// << and >> can't have negative rhs | ||
if (rhs_sign < 0) { | ||
return false; | ||
} | ||
} else if (op >= MP_BINARY_OP_FLOOR_DIVIDE) { | ||
// % and // can't have zero rhs | ||
if (rhs_sign == 0) { | ||
return false; | ||
} | ||
if (!binary_op_maybe(op, arg0, arg1, &arg0)) { | ||
return false; | ||
} | ||
arg0 = mp_binary_op(op, arg0, arg1); | ||
} | ||
} else if (rule_id == RULE_factor_2) { | ||
// folding for unary ops: + - ~ | ||
mp_parse_node_t pn = peek_result(parser, 0); | ||
if (!mp_parse_node_get_int_maybe(pn, &arg0)) { | ||
dpgeorge marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (!mp_parse_node_get_number_maybe(pn, &arg0)) { | ||
return false; | ||
} | ||
mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1)); | ||
mp_unary_op_t op; | ||
if (tok == MP_TOKEN_OP_TILDE) { | ||
if (!mp_obj_is_int(arg0)) { | ||
return false; | ||
} | ||
op = MP_UNARY_OP_INVERT; | ||
} else { | ||
assert(tok == MP_TOKEN_OP_PLUS || tok == MP_TOKEN_OP_MINUS); // should be | ||
|
@@ -855,7 +894,7 @@ static bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) { | |
return false; | ||
} | ||
// id1.id2 | ||
// look it up in constant table, see if it can be replaced with an integer | ||
// look it up in constant table, see if it can be replaced with an integer or a float | ||
mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t *)pn1; | ||
assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0])); | ||
qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0); | ||
|
@@ -866,7 +905,7 @@ static bool fold_constants(parser_t *parser, uint8_t rule_id, size_t num_args) { | |
} | ||
mp_obj_t dest[2]; | ||
mp_load_method_maybe(elem->value, q_attr, dest); | ||
if (!(dest[0] != MP_OBJ_NULL && mp_obj_is_int(dest[0]) && dest[1] == MP_OBJ_NULL)) { | ||
if (!(dest[0] != MP_OBJ_NULL && (mp_obj_is_int(dest[0]) || mp_obj_is_float(dest[0])) && dest[1] == MP_OBJ_NULL)) { | ||
dpgeorge marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return false; | ||
} | ||
arg0 = dest[0]; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,5 +5,3 @@ SyntaxError | |
SyntaxError | ||
SyntaxError | ||
SyntaxError | ||
SyntaxError | ||
SyntaxError |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# test constant optimisation, with consts that are floats | ||
try: | ||
float("3.14") | ||
except NameError: | ||
print("SKIP") | ||
raise SystemExit | ||
|
||
from micropython import const | ||
|
||
# check we can make consts from floats | ||
F1 = const(2.5) | ||
F2 = const(-0.3) | ||
print(type(F1), F1) | ||
print(type(F2), F2) | ||
|
||
# check arithmetic with floats | ||
F3 = const(F1 + F2) | ||
F4 = const(F1**2) | ||
print(F3, F4) | ||
|
||
# check int operations with float results | ||
F5 = const(1 / 2) | ||
F6 = const(2**-2) | ||
print(F5, F6) | ||
|
||
# note: we also test float expression folding when | ||
# we're compiling test cases in tests/float, as | ||
# many expressions are resolved at compile time. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
<class 'float'> 2.5 | ||
<class 'float'> -0.3 | ||
2.2 6.25 | ||
0.5 0.25 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# Test expressions based on math module constants | ||
try: | ||
import math | ||
except (ImportError, AttributeError): | ||
print("SKIP") | ||
raise SystemExit | ||
|
||
from micropython import const | ||
|
||
# check that we can make consts from math constants | ||
two_pi = const(2.0 * math.pi) | ||
print(math.cos(two_pi)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
1.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm still trying to understand why this code is necessary.
Is it only necessary when floats are double precision? Or also single precision?
Can you give an example of a number that needs this additional digit of precision?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The numbers that require this extra digit are not the the same for single-precision and double-precision (as the rounding effects differs), but a typical example for double-precision is
2.0 ** 100
, which is the test case I have added tofloat_parse_doubleprec.py
. Without the improvedrepr
code, numbers which appear to be the same as perrepr()
would actually not match with regards to == operator:With the improved
repr
code, we get the expected behaviour:With float folding enabled, large expanded decimal numbers become more frequent in
mpy
files and this could quickly cause functional differences.Without this improved repr code, the coverage tests fail in
float_float2int_intbig
due to the missing half-digit.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, thanks for the info. I now understand the problem. Let me restate it in two ways:
MicroPython at the moment uses 16 digits which is not enough to faithfully represent doubles.
So we do need to fix something here, but it's not clear what or how. My testing shows that in CPython doing
'{:.17g}'.format(n)
is enough digits to properly represent every number. Ie:The issue is that in MicroPython the above is not true. In fact there's no repr precision in MicroPython that allows you to satisfy the above for all
n
(eg even.20g
doesn't work).For some numbers, eg
39.0**100
, the above is true in MicroPython for precision of 17 and 19 but false for 16, 18 and 20!Numbers like
12.0**100
and18.0**100
get worse going from 16 to 17 digits (in MicroPython).(And others like
40.0**100
are accurate at 17, but at 16 are printed only with 15, and so the length increases by 2 digits to get full accuracy.)The real issue here is that MicroPython's
mp_format_float()
code is inaccurate. Eg:That's a problem! It means we can't really use
mp_format_float()
to store floats in .mpy files, because it's inaccurate.What to do?
There is #12008 which stores floats as binary in .mpy files. That's definitely one way to fix it, but I don't want to change the .mpy version number at this stage.
We could try to improve
mp_format_float()
, but I think that's very difficult, and probably needs a rewrite to make it more accurate.Note that currently (without this PR) certain floats are already stored inaccurately in .mpy files, due to using 16 digit precision only. So does this PR make things worse? I think yes, because now more floats have the chance to be store and hence stored inaccurately. Eg those in
tests/float/float2int_intbig.py
.That said, I don't think this PR (constant folding) will make things that much worse in .mpy files.
My suggestion to move forward is:
1.2345
rendering as1.23499999999
. I think if you userepr(a_float)
then you want accuracy. If you want a set number of digits, you'd be using a precision specifier like'{:.10g}'.format(a_float)
. (It's pretty expensive to callmp_format_float()
so I'd rather not do it twice.)array.array
with binary representation, or escape the const folding with egfloat("2.0") + 3.4
if they need 100% accuracy.If you don't like (1) because it makes
1.2345
print out as1.234999999
at the REPL, then maybe we can adjust the code inpy/persistentcode.c:save_obj
to save floats explicitly with a higher precision (17 digits for double). (From my testing, always using 17 digits is slightly better than using 17 only if it grows 16 by one digit.)(I didn't investigate single precision floats, but I assume all the above logic/reasoning applies there as well.)
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your understanding is correct, and indeed, single precision runs into the exact same kind of issues.
As you have guessed, I am not a fan of
1.234499999999
, but if your testing shows that using 17 digits provides overall better results than conditionally shorting to 16, then this is the way to go.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made a test:
That generates 10 million random doubles and checks
repr
and formatting with 16 through 19 digits of precision, to see if the rendered number is equivalent to the original.On Python it gives:
So that means 100% of the numbers are accurate using repr and 17 or more digits of precision. Using 16 digits, only 54.6% represent correctly.
MicroPython unix port with this PR (ie repr using 16 or 17 digits) gives:
That shows ... it's pretty terrible. But at least we see that using repr with this PR is much better than master which just uses 16 digits: 52.2% for repr vs 38% for always using 16 digits. But also, always using 17 digits gives a little more accuracy than repr, in this case 53.9% which is about 1.7% more than repr.
Note that MicroPython may also have inaccuracies converting str to float, so that may add to the errors here. But that's representative of .mpy loading because the loader uses the same float parsing code.
Also note that MicroPython was faster than CPython in the above test! That might be because MicroPython's float printing code is taking shortcuts. It also means that rendering the float twice in repr to try and extract the extra half digit is not taking up that much extra time.
Open questions:
mp_float_format()
using a different algorithm, eg fully integer based?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried extending the 16/17 trick to the following:
That uses 17 digits if it extends up to 3 digits more. That gives better results in the above test:
That means
repr
is almost as good as unconditionally doing 17 digits, 53.85% vs 53.94%. But note that these percentages change a bit across runs, due to the randomness.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have been working on this today. It is now clear to me as well that the proper solution is indeed to fix
mp_float_format()
, rather than adding a quick fix inrepr
.I have implemented today an alternate algorithm for
mp_float_format()
, based on the same idea as my previous enhancement tomp_parse_num_float
: using anmp_float_uint_t
to convert the mantissa, rather than working directly on floats. This brings the correct percentage up to 60%, which is a good start.Once the mantissa is computed using an integer, it is possible fix it incrementally to ensure that the parse algorithm will get back to the original number. I have been working on that, and got up to 95% correct conversions using 18 digits. I still have a bit of work to fix corner cases (forced round-ups) before I can push that code.
Do you want me to send the new
mp_float_format
code in a separate PR ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wow, very nice!
Yes please.