From fec85d84ab1efa659c5b76f52221880629272690 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 14:01:27 +1100 Subject: [PATCH 1/9] py/mpz: Use consistent comment style. Remove unused code. Signed-off-by: Jim Mussared --- py/mpz.c | 558 +++++++++++-------------------------------------------- 1 file changed, 111 insertions(+), 447 deletions(-) diff --git a/py/mpz.c b/py/mpz.c index b61997e2fd4ed..872d3af2435c6 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -36,18 +36,16 @@ #define DIG_MSB (MPZ_LONG_1 << (DIG_SIZE - 1)) #define DIG_BASE (MPZ_LONG_1 << DIG_SIZE) -/* - mpz is an arbitrary precision integer type with a public API. +// mpz is an arbitrary precision integer type with a public API. - mpn functions act on non-negative integers represented by an array of generalised - digits (eg a word per digit). You also need to specify separately the length of the - array. There is no public API for mpn. Rather, the functions are used by mpz to - implement its features. +// mpn functions act on non-negative integers represented by an array of generalised +// digits (eg a word per digit). You also need to specify separately the length of the +// array. There is no public API for mpn. Rather, the functions are used by mpz to +// implement its features. - Integer values are stored little endian (first digit is first in memory). +// Integer values are stored little endian (first digit is first in memory). - Definition of normalise: ? -*/ +// Definition of normalise: ? STATIC size_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) { for (--idig; idig >= oidig && *idig == 0; --idig) { @@ -55,10 +53,9 @@ STATIC size_t mpn_remove_trailing_zeros(mpz_dig_t *oidig, mpz_dig_t *idig) { return idig + 1 - oidig; } -/* compares i with j - returns sign(i - j) - assumes i, j are normalised -*/ +// Compares i with j +// Returns sign(i - j) +// Assumes i, j are normalised STATIC int mpn_cmp(const mpz_dig_t *idig, size_t ilen, const mpz_dig_t *jdig, size_t jlen) { if (ilen < jlen) { return -1; @@ -80,11 +77,10 @@ STATIC int mpn_cmp(const mpz_dig_t *idig, size_t ilen, const mpz_dig_t *jdig, si return 0; } -/* computes i = j << n - returns number of digits in i - assumes enough memory in i; assumes normalised j; assumes n > 0 - can have i, j pointing to same memory -*/ +// Computes i = j << n +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j; assumes n > 0 +// Can have i, j pointing to same memory STATIC size_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) { mp_uint_t n_whole = (n + DIG_SIZE - 1) / DIG_SIZE; mp_uint_t n_part = n % DIG_SIZE; @@ -119,11 +115,10 @@ STATIC size_t mpn_shl(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n return jlen; } -/* computes i = j >> n - returns number of digits in i - assumes enough memory in i; assumes normalised j; assumes n > 0 - can have i, j pointing to same memory -*/ +// Computes i = j >> n +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j; assumes n > 0 +// Can have i, j pointing to same memory STATIC size_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n) { mp_uint_t n_whole = n / DIG_SIZE; mp_uint_t n_part = n % DIG_SIZE; @@ -151,11 +146,10 @@ STATIC size_t mpn_shr(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mp_uint_t n return jlen; } -/* computes i = j + k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j + k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; mpz_dbl_dig_t carry = 0; @@ -181,11 +175,10 @@ STATIC size_t mpn_add(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return idig - oidig; } -/* computes i = j - k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes j >= k - can have i, j, k pointing to same memory -*/ +// Computes i = j - k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes j >= k +// Can have i, j, k pointing to same memory STATIC size_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; mpz_dbl_dig_signed_t borrow = 0; @@ -208,12 +201,10 @@ STATIC size_t mpn_sub(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const } #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j & k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen (jlen argument not needed) - can have i, j, k pointing to same memory -*/ +// Computes i = j & k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen (jlen argument not needed) +// Can have i, j, k pointing to same memory STATIC size_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -223,18 +214,16 @@ STATIC size_t mpn_and(mpz_dig_t *idig, const mpz_dig_t *jdig, const mpz_dig_t *k return mpn_remove_trailing_zeros(oidig, idig); } - #endif -/* i = -((-j) & (-k)) = ~((~j + 1) & (~k + 1)) + 1 - i = (j & (-k)) = (j & (~k + 1)) = ( j & (~k + 1)) - i = ((-j) & k) = ((~j + 1) & k) = ((~j + 1) & k ) - computes general form: - i = (im ^ (((j ^ jm) + jc) & ((k ^ km) + kc))) + ic where Xm = Xc == 0 ? 0 : DIG_MASK - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ +// i = -((-j) & (-k)) = ~((~j + 1) & (~k + 1)) + 1 +// i = (j & (-k)) = (j & (~k + 1)) = ( j & (~k + 1)) +// i = ((-j) & k) = ((~j + 1) & k) = ((~j + 1) & k ) +// Computes general form: +// i = (im ^ (((j ^ jm) + jc) & ((k ^ km) + kc))) + ic where Xm = Xc == 0 ? 0 : DIG_MASK +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory STATIC size_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -260,12 +249,10 @@ STATIC size_t mpn_and_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, c } #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j | k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j | k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -281,21 +268,17 @@ STATIC size_t mpn_or(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return idig - oidig; } - #endif -/* i = -((-j) | (-k)) = ~((~j + 1) | (~k + 1)) + 1 - i = -(j | (-k)) = -(j | (~k + 1)) = ~( j | (~k + 1)) + 1 - i = -((-j) | k) = -((~j + 1) | k) = ~((~j + 1) | k ) + 1 - computes general form: - i = ~(((j ^ jm) + jc) | ((k ^ km) + kc)) + 1 where Xm = Xc == 0 ? 0 : DIG_MASK - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ - +// i = -((-j) | (-k)) = ~((~j + 1) | (~k + 1)) + 1 +// i = -(j | (-k)) = -(j | (~k + 1)) = ~( j | (~k + 1)) + 1 +// i = -((-j) | k) = -((~j + 1) | k) = ~((~j + 1) | k ) + 1 +// Computes general form: +// i = ~(((j ^ jm) + jc) | ((k ^ km) + kc)) + 1 where Xm = Xc == 0 ? 0 : DIG_MASK +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory #if MICROPY_OPT_MPZ_BITWISE - STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -323,9 +306,7 @@ STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, co return mpn_remove_trailing_zeros(oidig, idig); } - #else - STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -348,16 +329,13 @@ STATIC size_t mpn_or_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, co return mpn_remove_trailing_zeros(oidig, idig); } - #endif #if MICROPY_OPT_MPZ_BITWISE - -/* computes i = j ^ k - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen - can have i, j, k pointing to same memory -*/ +// Computes i = j ^ k +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes jlen >= klen +// Can have i, j, k pointing to same memory STATIC size_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; @@ -373,18 +351,16 @@ STATIC size_t mpn_xor(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const return mpn_remove_trailing_zeros(oidig, idig); } - #endif -/* i = (-j) ^ (-k) = ~(j - 1) ^ ~(k - 1) = (j - 1) ^ (k - 1) - i = -(j ^ (-k)) = -(j ^ ~(k - 1)) = ~(j ^ ~(k - 1)) + 1 = (j ^ (k - 1)) + 1 - i = -((-j) ^ k) = -(~(j - 1) ^ k) = ~(~(j - 1) ^ k) + 1 = ((j - 1) ^ k) + 1 - computes general form: - i = ((j - 1 + jc) ^ (k - 1 + kc)) + ic - returns number of digits in i - assumes enough memory in i; assumes normalised j, k; assumes length j >= length k - can have i, j, k pointing to same memory -*/ +// i = (-j) ^ (-k) = ~(j - 1) ^ ~(k - 1) = (j - 1) ^ (k - 1) +// i = -(j ^ (-k)) = -(j ^ ~(k - 1)) = ~(j ^ ~(k - 1)) + 1 = (j ^ (k - 1)) + 1 +// i = -((-j) ^ k) = -(~(j - 1) ^ k) = ~(~(j - 1) ^ k) + 1 = ((j - 1) ^ k) + 1 +// Computes general form: +// i = ((j - 1 + jc) ^ (k - 1 + kc)) + ic +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised j, k; assumes length j >= length k +// Can have i, j, k pointing to same memory STATIC size_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, const mpz_dig_t *kdig, size_t klen, mpz_dbl_dig_t carryi, mpz_dbl_dig_t carryj, mpz_dbl_dig_t carryk) { mpz_dig_t *oidig = idig; @@ -406,10 +382,9 @@ STATIC size_t mpn_xor_neg(mpz_dig_t *idig, const mpz_dig_t *jdig, size_t jlen, c return mpn_remove_trailing_zeros(oidig, idig); } -/* computes i = i * d1 + d2 - returns number of digits in i - assumes enough memory in i; assumes normalised i; assumes dmul != 0 -*/ +// Computes i = i * d1 + d2 +// Returns number of digits in i +// Assumes enough memory in i; assumes normalised i; assumes dmul != 0 STATIC size_t mpn_mul_dig_add_dig(mpz_dig_t *idig, size_t ilen, mpz_dig_t dmul, mpz_dig_t dadd) { mpz_dig_t *oidig = idig; mpz_dbl_dig_t carry = dadd; @@ -427,11 +402,10 @@ STATIC size_t mpn_mul_dig_add_dig(mpz_dig_t *idig, size_t ilen, mpz_dig_t dmul, return idig - oidig; } -/* computes i = j * k - returns number of digits in i - assumes enough memory in i; assumes i is zeroed; assumes normalised j, k - can have j, k point to same memory -*/ +// Computes i = j * k +// Returns number of digits in i +// Assumes enough memory in i; assumes i is zeroed; assumes normalised j, k +// Can have j, k point to same memory STATIC size_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mpz_dig_t *kdig, size_t klen) { mpz_dig_t *oidig = idig; size_t ilen = 0; @@ -457,12 +431,11 @@ STATIC size_t mpn_mul(mpz_dig_t *idig, mpz_dig_t *jdig, size_t jlen, mpz_dig_t * return ilen; } -/* natural_div - quo * den + new_num = old_num (ie num is replaced with rem) - assumes den != 0 - assumes num_dig has enough memory to be extended by 1 digit - assumes quo_dig has enough memory (as many digits as num) - assumes quo_dig is filled with zeros -*/ +// natural_div - quo * den + new_num = old_num (ie num is replaced with rem) +// Assumes den != 0 +// Assumes num_dig has enough memory to be extended by 1 digit +// Assumes quo_dig has enough memory (as many digits as num) +// Assumes quo_dig is filled with zeros STATIC void mpn_div(mpz_dig_t *num_dig, size_t *num_len, const mpz_dig_t *den_dig, size_t den_len, mpz_dig_t *quo_dig, size_t *quo_len) { mpz_dig_t *orig_num_dig = num_dig; mpz_dig_t *orig_quo_dig = quo_dig; @@ -632,42 +605,6 @@ void mpz_deinit(mpz_t *z) { } } -#if 0 -these functions are unused - -mpz_t *mpz_zero(void) { - mpz_t *z = m_new_obj(mpz_t); - mpz_init_zero(z); - return z; -} - -mpz_t *mpz_from_int(mp_int_t val) { - mpz_t *z = mpz_zero(); - mpz_set_from_int(z, val); - return z; -} - -mpz_t *mpz_from_ll(long long val, bool is_signed) { - mpz_t *z = mpz_zero(); - mpz_set_from_ll(z, val, is_signed); - return z; -} - -#if MICROPY_PY_BUILTINS_FLOAT -mpz_t *mpz_from_float(mp_float_t val) { - mpz_t *z = mpz_zero(); - mpz_set_from_float(z, val); - return z; -} -#endif - -mpz_t *mpz_from_str(const char *str, size_t len, bool neg, unsigned int base) { - mpz_t *z = mpz_zero(); - mpz_set_from_str(z, str, len, neg, base); - return z; -} -#endif - STATIC void mpz_free(mpz_t *z) { if (z != NULL) { m_del(mpz_dig_t, z->dig, z->alloc); @@ -701,9 +638,8 @@ STATIC mpz_t *mpz_clone(const mpz_t *src) { return z; } -/* sets dest = src - can have dest, src the same -*/ +// Sets dest = src +// Can have dest, src the same void mpz_set(mpz_t *dest, const mpz_t *src) { mpz_need_dig(dest, src->len); dest->neg = src->neg; @@ -813,7 +749,7 @@ void mpz_set_from_float(mpz_t *z, mp_float_t src) { } #endif -// returns number of bytes from str that were processed +// Returns number of bytes from str that were processed size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base) { assert(base <= 36); @@ -883,22 +819,6 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len); } -#if 0 -these functions are unused - -bool mpz_is_pos(const mpz_t *z) { - return z->len > 0 && z->neg == 0; -} - -bool mpz_is_odd(const mpz_t *z) { - return z->len > 0 && (z->dig[0] & 1) != 0; -} - -bool mpz_is_even(const mpz_t *z) { - return z->len == 0 || (z->dig[0] & 1) == 0; -} -#endif - int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { int cmp = (int)z2->neg - (int)z1->neg; if (cmp != 0) { @@ -911,130 +831,8 @@ int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { return cmp; } -#if 0 -// obsolete -// compares mpz with an integer that fits within DIG_SIZE bits -mp_int_t mpz_cmp_sml_int(const mpz_t *z, mp_int_t sml_int) { - mp_int_t cmp; - if (z->neg == 0) { - if (sml_int < 0) { - return 1; - } - if (sml_int == 0) { - if (z->len == 0) { - return 0; - } - return 1; - } - if (z->len == 0) { - return -1; - } - assert(sml_int < (1 << DIG_SIZE)); - if (z->len != 1) { - return 1; - } - cmp = z->dig[0] - sml_int; - } else { - if (sml_int > 0) { - return -1; - } - if (sml_int == 0) { - if (z->len == 0) { - return 0; - } - return -1; - } - if (z->len == 0) { - return 1; - } - assert(sml_int > -(1 << DIG_SIZE)); - if (z->len != 1) { - return -1; - } - cmp = -z->dig[0] - sml_int; - } - if (cmp < 0) { - return -1; - } - if (cmp > 0) { - return 1; - } - return 0; -} -#endif - -#if 0 -these functions are unused - -/* returns abs(z) -*/ -mpz_t *mpz_abs(const mpz_t *z) { - // TODO: handle case of z->alloc=0 - mpz_t *z2 = mpz_clone(z); - z2->neg = 0; - return z2; -} - -/* returns -z -*/ -mpz_t *mpz_neg(const mpz_t *z) { - // TODO: handle case of z->alloc=0 - mpz_t *z2 = mpz_clone(z); - z2->neg = 1 - z2->neg; - return z2; -} - -/* returns lhs + rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_add(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_add_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs - rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_sub(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_sub_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs * rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_mul(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_mul_inpl(z, lhs, rhs); - return z; -} - -/* returns lhs ** rhs - can have lhs, rhs the same -*/ -mpz_t *mpz_pow(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *z = mpz_zero(); - mpz_pow_inpl(z, lhs, rhs); - return z; -} - -/* computes new integers in quo and rem such that: - quo * rhs + rem = lhs - 0 <= rem < rhs - can have lhs, rhs the same -*/ -void mpz_divmod(const mpz_t *lhs, const mpz_t *rhs, mpz_t **quo, mpz_t **rem) { - *quo = mpz_zero(); - *rem = mpz_zero(); - mpz_divmod_inpl(*quo, *rem, lhs, rhs); -} -#endif - -/* computes dest = abs(z) - can have dest, z the same -*/ +// Computes dest = abs(z) +// Can have dest, z the same void mpz_abs_inpl(mpz_t *dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); @@ -1042,9 +840,8 @@ void mpz_abs_inpl(mpz_t *dest, const mpz_t *z) { dest->neg = 0; } -/* computes dest = -z - can have dest, z the same -*/ +// Computes dest = -z +// Can have dest, z the same void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); @@ -1054,9 +851,8 @@ void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { } } -/* computes dest = ~z (= -z - 1) - can have dest, z the same -*/ +// Computes dest = ~z (= -z - 1) +// Can have dest, z the same void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); @@ -1078,9 +874,8 @@ void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { } } -/* computes dest = lhs << rhs - can have dest, lhs the same -*/ +// Computes dest = lhs << rhs +// Can have dest, lhs the same void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); @@ -1091,9 +886,8 @@ void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { } } -/* computes dest = lhs >> rhs - can have dest, lhs the same -*/ +// Computes dest = lhs >> rhs +// Can have dest, lhs the same void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); @@ -1129,9 +923,8 @@ void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { } } -/* computes dest = lhs + rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs + rhs +// Can have dest, lhs, rhs the same void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { if (mpn_cmp(lhs->dig, lhs->len, rhs->dig, rhs->len) < 0) { const mpz_t *temp = lhs; @@ -1150,9 +943,8 @@ void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { dest->neg = lhs->neg & !!dest->len; } -/* computes dest = lhs - rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs - rhs +// Can have dest, lhs, rhs the same void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { bool neg = false; @@ -1180,9 +972,8 @@ void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { } } -/* computes dest = lhs & rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs & rhs +// Can have dest, lhs, rhs the same void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { @@ -1214,9 +1005,8 @@ void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs | rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs | rhs +// Can have dest, lhs, rhs the same void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { @@ -1248,9 +1038,8 @@ void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs ^ rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs ^ rhs +// Can have dest, lhs, rhs the same void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { @@ -1286,9 +1075,8 @@ void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { #endif } -/* computes dest = lhs * rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs * rhs +// Can have dest, lhs, rhs the same void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->len == 0) { mpz_set_from_int(dest, 0); @@ -1318,9 +1106,8 @@ void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { mpz_free(temp); } -/* computes dest = lhs ** rhs - can have dest, lhs, rhs the same -*/ +// Computes dest = lhs ** rhs +// Can have dest, lhs, rhs the same void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->neg != 0) { mpz_set_from_int(dest, 0); @@ -1352,9 +1139,8 @@ void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { mpz_free(n); } -/* computes dest = (lhs ** rhs) % mod - can have dest, lhs, rhs the same; mod can't be the same as dest -*/ +// Computes dest = (lhs ** rhs) % mod +// Can have dest, lhs, rhs the same; mod can't be the same as dest void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod) { if (lhs->len == 0 || rhs->neg != 0 || (mod->len == 1 && mod->dig[0] == 1)) { mpz_set_from_int(dest, 0); @@ -1390,96 +1176,11 @@ void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t mpz_free(n); } -#if 0 -these functions are unused - -/* computes gcd(z1, z2) - based on Knuth's modified gcd algorithm (I think?) - gcd(z1, z2) >= 0 - gcd(0, 0) = 0 - gcd(z, 0) = abs(z) -*/ -mpz_t *mpz_gcd(const mpz_t *z1, const mpz_t *z2) { - if (z1->len == 0) { - // TODO: handle case of z2->alloc=0 - mpz_t *a = mpz_clone(z2); - a->neg = 0; - return a; - } else if (z2->len == 0) { - mpz_t *a = mpz_clone(z1); - a->neg = 0; - return a; - } - - mpz_t *a = mpz_clone(z1); - mpz_t *b = mpz_clone(z2); - mpz_t c; - mpz_init_zero(&c); - a->neg = 0; - b->neg = 0; - - for (;;) { - if (mpz_cmp(a, b) < 0) { - if (a->len == 0) { - mpz_free(a); - mpz_deinit(&c); - return b; - } - mpz_t *t = a; - a = b; - b = t; - } - if (!(b->len >= 2 || (b->len == 1 && b->dig[0] > 1))) { // compute b > 0; could be mpz_cmp_small_int(b, 1) > 0 - break; - } - mpz_set(&c, b); - do { - mpz_add_inpl(&c, &c, &c); - } while (mpz_cmp(&c, a) <= 0); - c.len = mpn_shr(c.dig, c.dig, c.len, 1); - mpz_sub_inpl(a, a, &c); - } - - mpz_deinit(&c); - - if (b->len == 1 && b->dig[0] == 1) { // compute b == 1; could be mpz_cmp_small_int(b, 1) == 0 - mpz_free(a); - return b; - } else { - mpz_free(b); - return a; - } -} - -/* computes lcm(z1, z2) - = abs(z1) / gcd(z1, z2) * abs(z2) - lcm(z1, z1) >= 0 - lcm(0, 0) = 0 - lcm(z, 0) = 0 -*/ -mpz_t *mpz_lcm(const mpz_t *z1, const mpz_t *z2) { - if (z1->len == 0 || z2->len == 0) { - return mpz_zero(); - } - - mpz_t *gcd = mpz_gcd(z1, z2); - mpz_t *quo = mpz_zero(); - mpz_t *rem = mpz_zero(); - mpz_divmod_inpl(quo, rem, z1, gcd); - mpz_mul_inpl(rem, quo, z2); - mpz_free(gcd); - mpz_free(quo); - rem->neg = 0; - return rem; -} -#endif - -/* computes new integers in quo and rem such that: - quo * rhs + rem = lhs - 0 <= rem < rhs - can have lhs, rhs the same - assumes rhs != 0 (undefined behaviour if it is) -*/ +// Computes new integers in quo and rem such that: +// quo * rhs + rem = lhs +// 0 <= rem < rhs +// Can have lhs, rhs the same +// Assumes rhs != 0 (undefined behaviour if it is) void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs) { assert(!mpz_is_zero(rhs)); @@ -1504,35 +1205,7 @@ void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const m } } -#if 0 -these functions are unused - -/* computes floor(lhs / rhs) - can have lhs, rhs the same -*/ -mpz_t *mpz_div(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t *quo = mpz_zero(); - mpz_t rem; - mpz_init_zero(&rem); - mpz_divmod_inpl(quo, &rem, lhs, rhs); - mpz_deinit(&rem); - return quo; -} - -/* computes lhs % rhs ( >= 0) - can have lhs, rhs the same -*/ -mpz_t *mpz_mod(const mpz_t *lhs, const mpz_t *rhs) { - mpz_t quo; - mpz_init_zero(&quo); - mpz_t *rem = mpz_zero(); - mpz_divmod_inpl(&quo, rem, lhs, rhs); - mpz_deinit(&quo); - return rem; -} -#endif - -// must return actual int value if it fits in mp_int_t +// Must return actual int value if it fits in mp_int_t mp_int_t mpz_hash(const mpz_t *z) { mp_uint_t val = 0; mpz_dig_t *d = z->dig + z->len; @@ -1648,18 +1321,9 @@ mp_float_t mpz_as_float(const mpz_t *i) { } #endif -#if 0 -this function is unused -char *mpz_as_str(const mpz_t *i, unsigned int base) { - char *s = m_new(char, mp_int_format_size(mpz_max_num_bits(i), base, NULL, '\0')); - mpz_as_str_inpl(i, base, NULL, 'a', '\0', s); - return s; -} -#endif - -// assumes enough space in str as calculated by mp_int_format_size +// Assumes enough space in str as calculated by mp_int_format_size // base must be between 2 and 32 inclusive -// returns length of string, not including null byte +// Returns length of string, not including null byte size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, char base_char, char comma, char *str) { assert(str != NULL); assert(2 <= base && base <= 32); From ec5b80bdd5d5cf09bb9561929da33658dc9ad54b Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 14:10:23 +1100 Subject: [PATCH 2/9] py/mpz: Remove mpz_set from public API. It's unused outside of mpz.c. Signed-off-by: Jim Mussared --- py/mpz.c | 2 +- py/mpz.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/py/mpz.c b/py/mpz.c index 872d3af2435c6..9635200435f4d 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -640,7 +640,7 @@ STATIC mpz_t *mpz_clone(const mpz_t *src) { // Sets dest = src // Can have dest, src the same -void mpz_set(mpz_t *dest, const mpz_t *src) { +STATIC void mpz_set(mpz_t *dest, const mpz_t *src) { mpz_need_dig(dest, src->len); dest->neg = src->neg; dest->len = src->len; diff --git a/py/mpz.h b/py/mpz.h index d27f5724047ae..d01d646ccefd3 100644 --- a/py/mpz.h +++ b/py/mpz.h @@ -107,7 +107,6 @@ void mpz_init_from_int(mpz_t *z, mp_int_t val); void mpz_init_fixed_from_int(mpz_t *z, mpz_dig_t *dig, size_t dig_alloc, mp_int_t val); void mpz_deinit(mpz_t *z); -void mpz_set(mpz_t *dest, const mpz_t *src); void mpz_set_from_int(mpz_t *z, mp_int_t src); void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed); #if MICROPY_PY_BUILTINS_FLOAT From 26418c0ce6454daab35649477c880e3ee270df36 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:09:43 +1100 Subject: [PATCH 3/9] py/py.mk: Remove support for CSUPEROPT. This will be replaced with a function attribute approach, configured via mpconfig. Signed-off-by: Jim Mussared --- ports/bare-arm/Makefile | 1 - ports/minimal/Makefile | 2 -- ports/stm32/Makefile | 1 - py/py.mk | 17 ----------------- 4 files changed, 21 deletions(-) diff --git a/ports/bare-arm/Makefile b/ports/bare-arm/Makefile index 1a21eb56a867b..e48342924b951 100644 --- a/ports/bare-arm/Makefile +++ b/ports/bare-arm/Makefile @@ -16,7 +16,6 @@ PYDFU ?= $(TOP)/tools/pydfu.py CFLAGS += -I. -I$(TOP) -I$(BUILD) CFLAGS += -Wall -Werror -std=c99 -nostdlib CFLAGS += -mthumb -mtune=cortex-m4 -mcpu=cortex-m4 -msoft-float -CSUPEROPT = -Os # save some code space for performance-critical code # Select debugging or optimisation build. ifeq ($(DEBUG), 1) diff --git a/ports/minimal/Makefile b/ports/minimal/Makefile index 050c4ddf526a5..554a024c66041 100644 --- a/ports/minimal/Makefile +++ b/ports/minimal/Makefile @@ -31,8 +31,6 @@ CFLAGS += $(INC) -Wall -Werror -Wdouble-promotion -Wfloat-conversion -std=c99 $( LDFLAGS += -Wl,-Map=$@.map,--cref -Wl,--gc-sections endif -CSUPEROPT = -Os # save some code space - # Tune for Debugging or Optimization CFLAGS += -g # always include debug info in the ELF ifeq ($(DEBUG), 1) diff --git a/ports/stm32/Makefile b/ports/stm32/Makefile index e44a542395182..edc59b742a875 100644 --- a/ports/stm32/Makefile +++ b/ports/stm32/Makefile @@ -284,7 +284,6 @@ SRC_O += \ $(SYSTEM_FILE) ifeq ($(MCU_SERIES),$(filter $(MCU_SERIES),f0 g0 l0)) -CSUPEROPT = -Os # save some code space SRC_O += \ resethandler_m0.o \ shared/runtime/gchelper_thumb1.o diff --git a/py/py.mk b/py/py.mk index e81df52fb7f99..18beed3452961 100644 --- a/py/py.mk +++ b/py/py.mk @@ -18,9 +18,6 @@ endif QSTR_GLOBAL_DEPENDENCIES += $(PY_SRC)/mpconfig.h mpconfigport.h QSTR_GLOBAL_REQUIREMENTS += $(HEADER_BUILD)/mpversion.h -# some code is performance bottleneck and compiled with other optimization options -CSUPEROPT = -O3 - # Enable building 32-bit code on 64-bit host. ifeq ($(MICROPY_FORCE_32BIT),1) CC += -m32 @@ -254,17 +251,3 @@ $(BUILD)/shared/libc/string0.o: CFLAGS += $(CFLAGS_BUILTIN) # Force nlr code to always be compiled with space-saving optimisation so # that the function preludes are of a minimal and predictable form. $(PY_BUILD)/nlr%.o: CFLAGS += -Os - -# optimising gc for speed; 5ms down to 4ms on pybv2 -$(PY_BUILD)/gc.o: CFLAGS += $(CSUPEROPT) - -# optimising vm for speed, adds only a small amount to code size but makes a huge difference to speed (20% faster) -$(PY_BUILD)/vm.o: CFLAGS += $(CSUPEROPT) -# Optimizing vm.o for modern deeply pipelined CPUs with branch predictors -# may require disabling tail jump optimization. This will make sure that -# each opcode has its own dispatching jump which will improve branch -# branch predictor efficiency. -# https://marc.info/?l=lua-l&m=129778596120851 -# http://hg.python.org/cpython/file/b127046831e2/Python/ceval.c#l828 -# http://www.emulators.com/docs/nx25_nostradamus.htm -#-fno-crossjumping From c64b7107da0ab5b1554f37ff459113b5a9cf6fdc Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:16:41 +1100 Subject: [PATCH 4/9] py/mpconfig.h: Move MICROPY_WRAP macros to place-of-use. It's useful to provide a way for a port/board to customise an individual function, but no point cluttering up mpconfig.h. These wrap macros are now defined in terms of stastandardised levels (O3+ram, O3+mayberam, O3, maybeO3) which are defined in mpconfig.h. This is what most ports/boards should configure instead. Currently only level 1 and 2 are used, and the various functions have been assigned levels to match the way esp32 currently overrides them. Signed-off-by: Jim Mussared --- py/map.c | 4 ++++ py/mpconfig.h | 49 ++++++++++++++++--------------------------------- py/obj.c | 4 ++++ py/runtime.c | 12 ++++++++++++ py/scheduler.c | 16 ++++++++++++++++ py/vm.c | 15 +++++++++++++++ 6 files changed, 67 insertions(+), 33 deletions(-) diff --git a/py/map.c b/py/map.c index c18df5a9f333c..2157c4d7d823b 100644 --- a/py/map.c +++ b/py/map.c @@ -147,6 +147,10 @@ STATIC void mp_map_rehash(mp_map_t *map) { m_del(mp_map_elem_t, old_table, old_alloc); } +#ifndef MICROPY_WRAP_MP_MAP_LOOKUP +#define MICROPY_WRAP_MP_MAP_LOOKUP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // MP_MAP_LOOKUP behaviour: // - returns NULL if not found, else the slot it was found in with key,value non-null // MP_MAP_LOOKUP_ADD_IF_NOT_FOUND behaviour: diff --git a/py/mpconfig.h b/py/mpconfig.h index a1e9660bf46a6..4a73653dc4edb 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1784,46 +1784,29 @@ typedef double mp_float_t; #endif /*****************************************************************************/ -/* Hooks for a port to wrap functions with attributes */ +/* Hooks for a port to wrap functions with performance-tuning attributes */ -#ifndef MICROPY_WRAP_MP_BINARY_OP -#define MICROPY_WRAP_MP_BINARY_OP(f) f +// Ideally apply full compiler optimisations and place in RAM. +// Use this on small functions that need the highest possible performance. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_1 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) f #endif -#ifndef MICROPY_WRAP_MP_EXECUTE_BYTECODE -#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) f +// Ideally apply full compiler optimisations and optionally place in RAM (if IRAM available). +// Use this on larger functions that should go in RAM if possible. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_2 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) f #endif -#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL -#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) f +// Ideally apply full compiler optimisation if flash available. +// Use this on functions that are not important enough to place in RAM. +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_3 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) f #endif -#ifndef MICROPY_WRAP_MP_LOAD_NAME -#define MICROPY_WRAP_MP_LOAD_NAME(f) f -#endif - -#ifndef MICROPY_WRAP_MP_MAP_LOOKUP -#define MICROPY_WRAP_MP_MAP_LOOKUP(f) f -#endif - -#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE -#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) f -#endif - -#ifndef MICROPY_WRAP_MP_SCHED_EXCEPTION -#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) f -#endif - -#ifndef MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT -#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) f -#endif - -#ifndef MICROPY_WRAP_MP_SCHED_SCHEDULE -#define MICROPY_WRAP_MP_SCHED_SCHEDULE(f) f -#endif - -#ifndef MICROPY_WRAP_MP_SCHED_VM_ABORT -#define MICROPY_WRAP_MP_SCHED_VM_ABORT(f) f +// Ideally apply full compiler optimisation if flash available (but lower priority than level 3). +#ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_4 +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) f #endif /*****************************************************************************/ diff --git a/py/obj.c b/py/obj.c index 5e01198b6fb4c..6b620170739d1 100644 --- a/py/obj.c +++ b/py/obj.c @@ -44,6 +44,10 @@ MP_NOINLINE void *mp_obj_malloc_helper(size_t num_bytes, const mp_obj_type_t *ty return base; } +#ifndef MICROPY_WRAP_MP_OBJ_GET_TYPE +#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + const mp_obj_type_t *MICROPY_WRAP_MP_OBJ_GET_TYPE(mp_obj_get_type)(mp_const_obj_t o_in) { #if MICROPY_OBJ_IMMEDIATE_OBJS && MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A diff --git a/py/runtime.c b/py/runtime.c index 6d8eddedc8646..5342dd9251bca 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -203,6 +203,10 @@ void mp_call_function_1_from_nlr_jump_callback(void *ctx_in) { ctx->func(ctx->arg); } +#ifndef MICROPY_WRAP_MP_LOAD_NAME +#define MICROPY_WRAP_MP_LOAD_NAME(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + mp_obj_t MICROPY_WRAP_MP_LOAD_NAME(mp_load_name)(qstr qst) { // logic: search locals, globals, builtins DEBUG_OP_printf("load name %s\n", qstr_str(qst)); @@ -216,6 +220,10 @@ mp_obj_t MICROPY_WRAP_MP_LOAD_NAME(mp_load_name)(qstr qst) { return mp_load_global(qst); } +#ifndef MICROPY_WRAP_MP_LOAD_GLOBAL +#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + mp_obj_t MICROPY_WRAP_MP_LOAD_GLOBAL(mp_load_global)(qstr qst) { // logic: search globals, builtins DEBUG_OP_printf("load global %s\n", qstr_str(qst)); @@ -360,6 +368,10 @@ mp_obj_t mp_unary_op(mp_unary_op_t op, mp_obj_t arg) { } } +#ifndef MICROPY_WRAP_MP_BINARY_OP +#define MICROPY_WRAP_MP_BINARY_OP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) +#endif + mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs, mp_obj_t rhs) { DEBUG_OP_printf("binary " UINT_FMT " %q %p %p\n", op, mp_binary_op_method_name[op], lhs, rhs); diff --git a/py/scheduler.c b/py/scheduler.c index 3eae8b4fa366c..682b8dbd70b6c 100644 --- a/py/scheduler.c +++ b/py/scheduler.c @@ -29,6 +29,10 @@ #include "py/mphal.h" #include "py/runtime.h" +#ifndef MICROPY_WRAP_MP_SCHED_EXCEPTION +#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // Schedules an exception on the main thread (for exceptions "thrown" by async // sources such as interrupts and UNIX signal handlers). void MICROPY_WRAP_MP_SCHED_EXCEPTION(mp_sched_exception)(mp_obj_t exc) { @@ -45,6 +49,10 @@ void MICROPY_WRAP_MP_SCHED_EXCEPTION(mp_sched_exception)(mp_obj_t exc) { } #if MICROPY_KBD_EXCEPTION +#ifndef MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT +#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + // This function may be called asynchronously at any time so only do the bare minimum. void MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(mp_sched_keyboard_interrupt)(void) { MP_STATE_VM(mp_kbd_exception).traceback_data = NULL; @@ -53,6 +61,10 @@ void MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(mp_sched_keyboard_interrupt)(void) #endif #if MICROPY_ENABLE_VM_ABORT +#ifndef MICROPY_WRAP_MP_SCHED_VM_ABORT +#define MICROPY_WRAP_MP_SCHED_VM_ABORT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + void MICROPY_WRAP_MP_SCHED_VM_ABORT(mp_sched_vm_abort)(void) { MP_STATE_VM(vm_abort) = true; } @@ -156,6 +168,10 @@ void mp_sched_unlock(void) { MICROPY_END_ATOMIC_SECTION(atomic_state); } +#ifndef MICROPY_WRAP_MP_SCHED_SCHEDULE +#define MICROPY_WRAP_MP_SCHED_SCHEDULE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) +#endif + bool MICROPY_WRAP_MP_SCHED_SCHEDULE(mp_sched_schedule)(mp_obj_t function, mp_obj_t arg) { mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION(); bool ret; diff --git a/py/vm.c b/py/vm.c index a7902d9276732..9e816b4e80bf1 100644 --- a/py/vm.c +++ b/py/vm.c @@ -195,6 +195,21 @@ #define TRACE_TICK(current_ip, current_sp, is_exception) #endif // MICROPY_PY_SYS_SETTRACE +#ifndef MICROPY_WRAP_MP_EXECUTE_BYTECODE +// Using -O3 (rather than -Os) only a small amount to code size but makes a huge difference to speed (20% faster) +#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) + +// Note: +// Optimizing vm.o for modern deeply pipelined CPUs with branch predictors +// may require disabling tail jump optimization. This will make sure that +// each opcode has its own dispatching jump which will improve branch +// branch predictor efficiency. +// https://marc.info/?l=lua-l&m=129778596120851 +// http://hg.python.org/cpython/file/b127046831e2/Python/ceval.c#l828 +// http://www.emulators.com/docs/nx25_nostradamus.htm +// -fno-crossjumping +#endif + // fastn has items in reverse order (fastn[0] is local[0], fastn[-1] is local[1], etc) // sp points to bottom of stack which grows up // returns: From dd18e2f93940bdd03a06e624d5d925125142e46b Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:18:00 +1100 Subject: [PATCH 5/9] py/gc: Add MICROPY_WRAP macros for alloc/realloc/free. This replaces the previous CSUPEROPT used for gc.o, however because it's applied just to the required functions it leads to a code size saving. Defaults to level 3 (i.e. apply `-O3`, but don't place in RAM). Signed-off-by: Jim Mussared --- py/gc.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/py/gc.c b/py/gc.c index b6969dfd42429..f331c6ee0bbf7 100644 --- a/py/gc.c +++ b/py/gc.c @@ -723,7 +723,12 @@ void gc_info(gc_info_t *info) { GC_EXIT(); } -void *gc_alloc(size_t n_bytes, unsigned int alloc_flags) { +#ifndef MICROPY_WRAP_GC_ALLOC +// Optimising gc for speed; 5ms down to 4ms on pybv2 +#define MICROPY_WRAP_GC_ALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + +void *MICROPY_WRAP_GC_ALLOC(gc_alloc)(size_t n_bytes, unsigned int alloc_flags) { bool has_finaliser = alloc_flags & GC_ALLOC_FLAG_HAS_FINALISER; size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK; DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks); @@ -890,9 +895,13 @@ void *gc_alloc_with_finaliser(mp_uint_t n_bytes) { } */ +#ifndef MICROPY_WRAP_GC_FREE +#define MICROPY_WRAP_GC_FREE(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + // force the freeing of a piece of memory // TODO: freeing here does not call finaliser -void gc_free(void *ptr) { +void MICROPY_WRAP_GC_FREE(gc_free)(void *ptr) { if (MP_STATE_THREAD(gc_lock_depth) > 0) { // Cannot free while the GC is locked. However free is an optimisation // to reclaim the memory immediately, this means it will now be left @@ -1021,7 +1030,11 @@ void *gc_realloc(void *ptr, mp_uint_t n_bytes) { #else // Alternative gc_realloc impl -void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) { +#ifndef MICROPY_WRAP_GC_REALLOC +#define MICROPY_WRAP_GC_REALLOC(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) +#endif + +void *MICROPY_WRAP_GC_REALLOC(gc_realloc)(void *ptr_in, size_t n_bytes, bool allow_move) { // check for pure allocation if (ptr_in == NULL) { return gc_alloc(n_bytes, false); From d93cbae8f37bd10506531ba29c6920e402388e9a Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:19:12 +1100 Subject: [PATCH 6/9] py/mpz: Add MICROPY_WRAP macros for public mpz API. This currently uses level 4 (not enabled by default), so should be a no-op change, but now a board with spare flash can opt-into it. Signed-off-by: Jim Mussared --- py/mpz.c | 150 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 125 insertions(+), 25 deletions(-) diff --git a/py/mpz.c b/py/mpz.c index 9635200435f4d..c274457d00de6 100644 --- a/py/mpz.c +++ b/py/mpz.c @@ -647,7 +647,107 @@ STATIC void mpz_set(mpz_t *dest, const mpz_t *src) { memcpy(dest->dig, src->dig, src->len * sizeof(mpz_dig_t)); } -void mpz_set_from_int(mpz_t *z, mp_int_t val) { +#ifndef MICROPY_WRAP_MPZ_SET_FROM_INT +#define MICROPY_WRAP_MPZ_SET_FROM_INT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_LL +#define MICROPY_WRAP_MPZ_SET_FROM_LL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_FLOAT +#define MICROPY_WRAP_MPZ_SET_FROM_FLOAT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_STR +#define MICROPY_WRAP_MPZ_SET_FROM_STR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SET_FROM_BYTES +#define MICROPY_WRAP_MPZ_SET_FROM_BYTES(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_CMP +#define MICROPY_WRAP_MPZ_CMP(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_ABS +#define MICROPY_WRAP_MPZ_ABS(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_NEG +#define MICROPY_WRAP_MPZ_NEG(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_NOT +#define MICROPY_WRAP_MPZ_NOT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SHL +#define MICROPY_WRAP_MPZ_SHL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SHR +#define MICROPY_WRAP_MPZ_SHR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_ADD +#define MICROPY_WRAP_MPZ_ADD(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_SUB +#define MICROPY_WRAP_MPZ_SUB(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AND +#define MICROPY_WRAP_MPZ_AND(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_OR +#define MICROPY_WRAP_MPZ_OR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_XOR +#define MICROPY_WRAP_MPZ_XOR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_MUL +#define MICROPY_WRAP_MPZ_MUL(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_POW +#define MICROPY_WRAP_MPZ_POW(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_POW3 +#define MICROPY_WRAP_MPZ_POW3(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_DIVMOD +#define MICROPY_WRAP_MPZ_DIVMOD(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_HASH +#define MICROPY_WRAP_MPZ_HASH(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_INT +#define MICROPY_WRAP_MPZ_AS_INT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_UINT +#define MICROPY_WRAP_MPZ_AS_UINT(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_BYTES +#define MICROPY_WRAP_MPZ_AS_BYTES(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +#ifndef MICROPY_WRAP_MPZ_AS_STR +#define MICROPY_WRAP_MPZ_AS_STR(f) MICROPY_PERFORMANCE_CRITICAL_LEVEL_4(f) +#endif + +void MICROPY_WRAP_MPZ_SET_FROM_INT(mpz_set_from_int)(mpz_t * z, mp_int_t val) { if (val == 0) { z->neg = 0; z->len = 0; @@ -672,7 +772,7 @@ void mpz_set_from_int(mpz_t *z, mp_int_t val) { } } -void mpz_set_from_ll(mpz_t *z, long long val, bool is_signed) { +void MICROPY_WRAP_MPZ_SET_FROM_LL(mpz_set_from_ll)(mpz_t * z, long long val, bool is_signed) { mpz_need_dig(z, MPZ_NUM_DIG_FOR_LL); unsigned long long uval; @@ -692,7 +792,7 @@ void mpz_set_from_ll(mpz_t *z, long long val, bool is_signed) { } #if MICROPY_PY_BUILTINS_FLOAT -void mpz_set_from_float(mpz_t *z, mp_float_t src) { +void MICROPY_WRAP_MPZ_SET_FROM_FLOAT(mpz_set_from_float)(mpz_t * z, mp_float_t src) { mp_float_union_t u = {src}; z->neg = u.p.sgn; if (u.p.exp == 0) { @@ -750,7 +850,7 @@ void mpz_set_from_float(mpz_t *z, mp_float_t src) { #endif // Returns number of bytes from str that were processed -size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base) { +size_t MICROPY_WRAP_MPZ_SET_FROM_STR(mpz_set_from_str)(mpz_t * z, const char *str, size_t len, bool neg, unsigned int base) { assert(base <= 36); const char *cur = str; @@ -786,7 +886,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne return cur - str; } -void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) { +void MICROPY_WRAP_MPZ_SET_FROM_BYTES(mpz_set_from_bytes)(mpz_t * z, bool big_endian, size_t len, const byte *buf) { int delta = 1; if (big_endian) { buf += len - 1; @@ -819,7 +919,7 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len); } -int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { +int MICROPY_WRAP_MPZ_CMP(mpz_cmp)(const mpz_t * z1, const mpz_t *z2) { int cmp = (int)z2->neg - (int)z1->neg; if (cmp != 0) { return cmp; @@ -833,7 +933,7 @@ int mpz_cmp(const mpz_t *z1, const mpz_t *z2) { // Computes dest = abs(z) // Can have dest, z the same -void mpz_abs_inpl(mpz_t *dest, const mpz_t *z) { +void MICROPY_WRAP_MPZ_ABS(mpz_abs_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } @@ -842,7 +942,7 @@ void mpz_abs_inpl(mpz_t *dest, const mpz_t *z) { // Computes dest = -z // Can have dest, z the same -void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { +void MICROPY_WRAP_MPZ_NEG(mpz_neg_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } @@ -853,7 +953,7 @@ void mpz_neg_inpl(mpz_t *dest, const mpz_t *z) { // Computes dest = ~z (= -z - 1) // Can have dest, z the same -void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { +void MICROPY_WRAP_MPZ_NOT(mpz_not_inpl)(mpz_t * dest, const mpz_t *z) { if (dest != z) { mpz_set(dest, z); } @@ -876,7 +976,7 @@ void mpz_not_inpl(mpz_t *dest, const mpz_t *z) { // Computes dest = lhs << rhs // Can have dest, lhs the same -void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { +void MICROPY_WRAP_MPZ_SHL(mpz_shl_inpl)(mpz_t * dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); } else { @@ -888,7 +988,7 @@ void mpz_shl_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { // Computes dest = lhs >> rhs // Can have dest, lhs the same -void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { +void MICROPY_WRAP_MPZ_SHR(mpz_shr_inpl)(mpz_t * dest, const mpz_t *lhs, mp_uint_t rhs) { if (lhs->len == 0 || rhs == 0) { mpz_set(dest, lhs); } else { @@ -925,7 +1025,7 @@ void mpz_shr_inpl(mpz_t *dest, const mpz_t *lhs, mp_uint_t rhs) { // Computes dest = lhs + rhs // Can have dest, lhs, rhs the same -void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_ADD(mpz_add_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (mpn_cmp(lhs->dig, lhs->len, rhs->dig, rhs->len) < 0) { const mpz_t *temp = lhs; lhs = rhs; @@ -945,7 +1045,7 @@ void mpz_add_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs - rhs // Can have dest, lhs, rhs the same -void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_SUB(mpz_sub_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { bool neg = false; if (mpn_cmp(lhs->dig, lhs->len, rhs->dig, rhs->len) < 0) { @@ -974,7 +1074,7 @@ void mpz_sub_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs & rhs // Can have dest, lhs, rhs the same -void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_AND(mpz_and_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1007,7 +1107,7 @@ void mpz_and_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs | rhs // Can have dest, lhs, rhs the same -void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_OR(mpz_or_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1040,7 +1140,7 @@ void mpz_or_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs ^ rhs // Can have dest, lhs, rhs the same -void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_XOR(mpz_xor_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { // make sure lhs has the most digits if (lhs->len < rhs->len) { const mpz_t *temp = lhs; @@ -1077,7 +1177,7 @@ void mpz_xor_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs * rhs // Can have dest, lhs, rhs the same -void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_MUL(mpz_mul_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->len == 0) { mpz_set_from_int(dest, 0); return; @@ -1108,7 +1208,7 @@ void mpz_mul_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = lhs ** rhs // Can have dest, lhs, rhs the same -void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_POW(mpz_pow_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs) { if (lhs->len == 0 || rhs->neg != 0) { mpz_set_from_int(dest, 0); return; @@ -1141,7 +1241,7 @@ void mpz_pow_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs) { // Computes dest = (lhs ** rhs) % mod // Can have dest, lhs, rhs the same; mod can't be the same as dest -void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod) { +void MICROPY_WRAP_MPZ_POW3(mpz_pow3_inpl)(mpz_t * dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t *mod) { if (lhs->len == 0 || rhs->neg != 0 || (mod->len == 1 && mod->dig[0] == 1)) { mpz_set_from_int(dest, 0); return; @@ -1181,7 +1281,7 @@ void mpz_pow3_inpl(mpz_t *dest, const mpz_t *lhs, const mpz_t *rhs, const mpz_t // 0 <= rem < rhs // Can have lhs, rhs the same // Assumes rhs != 0 (undefined behaviour if it is) -void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs) { +void MICROPY_WRAP_MPZ_DIVMOD(mpz_divmod_inpl)(mpz_t * dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const mpz_t *rhs) { assert(!mpz_is_zero(rhs)); mpz_need_dig(dest_quo, lhs->len + 1); // +1 necessary? @@ -1206,7 +1306,7 @@ void mpz_divmod_inpl(mpz_t *dest_quo, mpz_t *dest_rem, const mpz_t *lhs, const m } // Must return actual int value if it fits in mp_int_t -mp_int_t mpz_hash(const mpz_t *z) { +mp_int_t MICROPY_WRAP_MPZ_HASH(mpz_hash)(const mpz_t * z) { mp_uint_t val = 0; mpz_dig_t *d = z->dig + z->len; @@ -1221,7 +1321,7 @@ mp_int_t mpz_hash(const mpz_t *z) { return val; } -bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { +bool MICROPY_WRAP_MPZ_AS_INT(mpz_as_int_checked)(const mpz_t * i, mp_int_t *value) { mp_uint_t val = 0; mpz_dig_t *d = i->dig + i->len; @@ -1241,7 +1341,7 @@ bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) { return true; } -bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { +bool MICROPY_WRAP_MPZ_AS_UINT(mpz_as_uint_checked)(const mpz_t * i, mp_uint_t *value) { if (i->neg != 0) { // can't represent signed values return false; @@ -1262,7 +1362,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) { return true; } -void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) { +void MICROPY_WRAP_MPZ_AS_BYTES(mpz_as_bytes)(const mpz_t * z, bool big_endian, size_t len, byte *buf) { byte *b = buf; if (big_endian) { b += len; @@ -1324,7 +1424,7 @@ mp_float_t mpz_as_float(const mpz_t *i) { // Assumes enough space in str as calculated by mp_int_format_size // base must be between 2 and 32 inclusive // Returns length of string, not including null byte -size_t mpz_as_str_inpl(const mpz_t *i, unsigned int base, const char *prefix, char base_char, char comma, char *str) { +size_t MICROPY_WRAP_MPZ_AS_STR(mpz_as_str_inpl)(const mpz_t * i, unsigned int base, const char *prefix, char base_char, char comma, char *str) { assert(str != NULL); assert(2 <= base && base <= 32); From f3cc3649cd5c408d6161abba4c0e6390a8ea7640 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:24:05 +1100 Subject: [PATCH 7/9] py/mpconfig.h: Allow per-function super optimisations. Provides a default implementation of a macro that will enable `-O3`, and enable this by default on level 1, 2, and 3. Signed-off-by: Jim Mussared --- py/mpconfig.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/py/mpconfig.h b/py/mpconfig.h index 4a73653dc4edb..47534a64cb5c2 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -1786,22 +1786,32 @@ typedef double mp_float_t; /*****************************************************************************/ /* Hooks for a port to wrap functions with performance-tuning attributes */ +#ifndef MICROPY_APPLY_COMPILER_OPTIMISATIONS +#if defined(__GNUC__) && !defined(__clang__) +// Enable -O3 optimisations. +#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) __attribute__((optimize("O2"))) f +#else +// Unsupported on other compilers, will use global optimisation setting (typically -Os). +#define MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) f +#endif +#endif + // Ideally apply full compiler optimisations and place in RAM. // Use this on small functions that need the highest possible performance. #ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_1 -#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) f +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif // Ideally apply full compiler optimisations and optionally place in RAM (if IRAM available). // Use this on larger functions that should go in RAM if possible. #ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_2 -#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) f +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif // Ideally apply full compiler optimisation if flash available. // Use this on functions that are not important enough to place in RAM. #ifndef MICROPY_PERFORMANCE_CRITICAL_LEVEL_3 -#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) f +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_3(f) MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif // Ideally apply full compiler optimisation if flash available (but lower priority than level 3). From 548187f5f8675a8debb90de368734e85652918e0 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:20:56 +1100 Subject: [PATCH 8/9] esp32/mpconfigport.h: Use performance level macros. Instead of applying to individual functions, configure the levels instead. This should be a no-op change -- the wrap functions map to level 1 and 2 in the same way as the current rules. Signed-off-by: Jim Mussared --- ports/esp32/mpconfigport.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/ports/esp32/mpconfigport.h b/ports/esp32/mpconfigport.h index 5dc4a9c758790..624415c08b085 100644 --- a/ports/esp32/mpconfigport.h +++ b/ports/esp32/mpconfigport.h @@ -231,18 +231,12 @@ void *esp_native_code_commit(void *, size_t, void *); #endif // Functions that should go in IRAM +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_1(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) +#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND) // For ESP32 with SPIRAM workaround, firmware is larger and uses more static IRAM, // so in that configuration don't put too many functions in IRAM. -#if !(CONFIG_IDF_TARGET_ESP32 && CONFIG_SPIRAM && CONFIG_SPIRAM_CACHE_WORKAROUND) -#define MICROPY_WRAP_MP_BINARY_OP(f) IRAM_ATTR f +#define MICROPY_PERFORMANCE_CRITICAL_LEVEL_2(f) IRAM_ATTR MICROPY_APPLY_COMPILER_OPTIMISATIONS(f) #endif -#define MICROPY_WRAP_MP_EXECUTE_BYTECODE(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_LOAD_GLOBAL(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_LOAD_NAME(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_MAP_LOOKUP(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_OBJ_GET_TYPE(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_SCHED_EXCEPTION(f) IRAM_ATTR f -#define MICROPY_WRAP_MP_SCHED_KEYBOARD_INTERRUPT(f) IRAM_ATTR f #define UINT_FMT "%u" #define INT_FMT "%d" From 62fbbe0ef84fb6fc011afbcb7e154aa1f68f98e5 Mon Sep 17 00:00:00 2001 From: Jim Mussared Date: Thu, 25 Jan 2024 15:22:56 +1100 Subject: [PATCH 9/9] {bare-arm,minimal,stm32}/mpconfigport.h: Disable super-optimisations. bare-arm, minimal, and stm32-on-CM0 used to disable `CSUPEROPT`. This re-instates this behavior by disabling `MICROPY_APPLY_COMPILER_OPTIMISATIONS` instead. Signed-off-by: Jim Mussared --- ports/bare-arm/mpconfigport.h | 3 +++ ports/minimal/mpconfigport.h | 3 +++ ports/stm32/mpconfigport.h | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/ports/bare-arm/mpconfigport.h b/ports/bare-arm/mpconfigport.h index 65bb67f7b9a70..7a8fa55d30209 100644 --- a/ports/bare-arm/mpconfigport.h +++ b/ports/bare-arm/mpconfigport.h @@ -37,6 +37,9 @@ // Python internal features #define MICROPY_ERROR_REPORTING (MICROPY_ERROR_REPORTING_NONE) +// Just use -Os for everything to generate the smallest possible binary. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f + // Type definitions for the specific machine typedef int32_t mp_int_t; // must be pointer size diff --git a/ports/minimal/mpconfigport.h b/ports/minimal/mpconfigport.h index 56bef165facda..eaf5abe76397a 100644 --- a/ports/minimal/mpconfigport.h +++ b/ports/minimal/mpconfigport.h @@ -21,6 +21,9 @@ // Use the minimum headroom in the chunk allocator for parse nodes. #define MICROPY_ALLOC_PARSE_CHUNK_INIT (16) +// Just use -Os for everything to generate the smallest possible binary. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f + // type definitions for the specific machine typedef intptr_t mp_int_t; // must be pointer size diff --git a/ports/stm32/mpconfigport.h b/ports/stm32/mpconfigport.h index 300ad086bf473..0a1ce27c2258d 100644 --- a/ports/stm32/mpconfigport.h +++ b/ports/stm32/mpconfigport.h @@ -55,6 +55,12 @@ #define MICROPY_OPT_MAP_LOOKUP_CACHE (__CORTEX_M > 0) #endif +#if __CORTEX_M == 0 +// Just use -Os for everything to avoid using extra flash. Using M0 as a stand-in +// for "likely has small flash", this could be moved to the board config instead. +#define MICROPY_APPLY_COMPILER_EXTRA_OPTIMISATIONS(f) f +#endif + // emitters #define MICROPY_PERSISTENT_CODE_LOAD (1) #ifndef MICROPY_EMIT_THUMB pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy