Skip to content

Commit 8e7dcac

Browse files
committed
py/objint: Fix int.to_bytes() buffer size checks.
* No longer overflows if byte size is 0 (closes #13041) * Raises OverflowError in any case where number won't fit into byte length (Now matches CPython, previously MicroPython would return a truncated bytes object.) * Document that micropython int.to_bytes() behaves as if signed=True, as this was the pre-existing behaviour. Add tests for this, also. Signed-off-by: Angus Gratton <angus@redyak.com.au>
1 parent 094b52b commit 8e7dcac

File tree

8 files changed

+154
-29
lines changed

8 files changed

+154
-29
lines changed

docs/library/builtins.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ Functions and types
8282
In MicroPython, `byteorder` parameter must be positional (this is
8383
compatible with CPython).
8484

85+
.. note:: The optional ``signed`` kwarg from CPython is not supported.
86+
MicroPython currently behaves as if ``signed=True``.
87+
8588
.. function:: isinstance()
8689

8790
.. function:: issubclass()

py/mpz.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
15891589
return true;
15901590
}
15911591

1592-
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
1592+
bool mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
15931593
byte *b = buf;
15941594
if (big_endian) {
15951595
b += len;
@@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
15981598
int bits = 0;
15991599
mpz_dbl_dig_t d = 0;
16001600
mpz_dbl_dig_t carry = 1;
1601+
size_t olen = len; // bytes in output buffer
1602+
bool ok = true;
16011603
for (size_t zlen = z->len; zlen > 0; --zlen) {
16021604
bits += DIG_SIZE;
16031605
d = (d << DIG_SIZE) | *zdig++;
@@ -1607,28 +1609,26 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
16071609
val = (~val & 0xff) + carry;
16081610
carry = val >> 8;
16091611
}
1612+
1613+
if (!olen) {
1614+
// Buffer is full, only OK if all remaining bytes are zeroes
1615+
ok = ok && ((byte)val == 0);
1616+
continue;
1617+
}
1618+
16101619
if (big_endian) {
16111620
*--b = val;
1612-
if (b == buf) {
1613-
return;
1614-
}
16151621
} else {
16161622
*b++ = val;
1617-
if (b == buf + len) {
1618-
return;
1619-
}
16201623
}
1624+
olen--;
16211625
}
16221626
}
16231627

16241628
// fill remainder of buf with zero/sign extension of the integer
1625-
if (big_endian) {
1626-
len = b - buf;
1627-
} else {
1628-
len = buf + len - b;
1629-
buf = b;
1630-
}
1631-
memset(buf, z->neg ? 0xff : 0x00, len);
1629+
memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
1630+
1631+
return ok;
16321632
}
16331633

16341634
#if MICROPY_PY_BUILTINS_FLOAT

py/mpz.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
9393
typedef struct _mpz_t {
9494
// Zero has neg=0, len=0. Negative zero is not allowed.
9595
size_t neg : 1;
96-
size_t fixed_dig : 1;
97-
size_t alloc : (8 * sizeof(size_t) - 2);
98-
size_t len;
96+
size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
97+
size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
98+
size_t len; // number of entries used in 'dig'
9999
mpz_dig_t *dig;
100100
} mpz_t;
101101

@@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
145145
mp_int_t mpz_hash(const mpz_t *z);
146146
bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
147147
bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
148-
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
148+
// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
149+
bool mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
149150
#if MICROPY_PY_BUILTINS_FLOAT
150151
mp_float_t mpz_as_float(const mpz_t *z);
151152
#endif

py/objint.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -421,29 +421,46 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro
421421
STATIC MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
422422

423423
STATIC mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
424-
// TODO: Support signed param (assumes signed=False)
424+
// TODO: Support signed param (currently behaves as if signed=True always)
425425
(void)n_args;
426+
bool overflow;
426427

427-
mp_int_t len = mp_obj_get_int(args[1]);
428-
if (len < 0) {
428+
mp_int_t dlen = mp_obj_get_int(args[1]);
429+
if (dlen < 0) {
429430
mp_raise_ValueError(NULL);
430431
}
431432
bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
432433

433434
vstr_t vstr;
434-
vstr_init_len(&vstr, len);
435+
vstr_init_len(&vstr, dlen);
435436
byte *data = (byte *)vstr.buf;
436-
memset(data, 0, len);
437+
memset(data, 0, dlen);
437438

438439
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
439440
if (!mp_obj_is_small_int(args[0])) {
440-
mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
441+
overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
441442
} else
442443
#endif
443444
{
444445
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
445-
size_t l = MIN((size_t)len, sizeof(val));
446-
mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
446+
mp_uint_t abs_val = val >= 0 ? val : -val;
447+
mp_int_t slen = 0; // Number of actual bytes to represent 'val'
448+
for (mp_int_t tmp = abs_val; tmp != 0; tmp >>= 8) {
449+
slen++;
450+
}
451+
if (val < 0 && (mp_uint_t)(1 << (slen * 8 - 1)) < abs_val) {
452+
slen++; // Need an extra byte to fit the negative sign bit
453+
}
454+
if (slen <= dlen) {
455+
mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
456+
overflow = false;
457+
} else {
458+
overflow = true;
459+
}
460+
}
461+
462+
if (overflow) {
463+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("int too big to convert"));
447464
}
448465

449466
return mp_obj_new_bytes_from_vstr(&vstr);

py/objint.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
5555
int base, const char *prefix, char base_char, char comma);
5656
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
5757
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
58-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
58+
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
59+
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
5960
int mp_obj_int_sign(mp_obj_t self_in);
6061
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6162
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);

py/objint_mpz.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
112112
return MP_OBJ_FROM_PTR(o);
113113
}
114114

115-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
115+
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
116116
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
117117
mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
118-
mpz_as_bytes(&self->mpz, big_endian, len, buf);
118+
return mpz_as_bytes(&self->mpz, big_endian, len, buf);
119119
}
120120

121121
int mp_obj_int_sign(mp_obj_t self_in) {

tests/basics/int_bytes.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
print((10).to_bytes(1, "little"))
24
print((111111).to_bytes(4, "little"))
35
print((100).to_bytes(10, "little"))
@@ -20,3 +22,72 @@
2022
(1).to_bytes(-1, "little")
2123
except ValueError:
2224
print("ValueError")
25+
26+
# zero byte destination should also raise an error
27+
try:
28+
(1).to_bytes(0, "little")
29+
except OverflowError:
30+
print("OverflowError")
31+
32+
# except for converting 0 to a zero-length byte array
33+
print((0).to_bytes(0, "big"))
34+
35+
# byte length can fit the integer directly
36+
print((0xFF).to_bytes(1, "little"))
37+
print((0xFF).to_bytes(1, "big"))
38+
print((0xEFF).to_bytes(2, "little"))
39+
print((0xEFF).to_bytes(2, "big"))
40+
print((0xCDEFF).to_bytes(3, "little"))
41+
print((0xCDEFF).to_bytes(3, "big"))
42+
43+
# OverFlowError if not big enough
44+
45+
try:
46+
(0x123).to_bytes(1, "big")
47+
except OverflowError:
48+
print("OverflowError")
49+
50+
try:
51+
(0x12345).to_bytes(2, "big")
52+
except OverflowError:
53+
print("OverflowError")
54+
55+
try:
56+
(0x1234567).to_bytes(3, "big")
57+
except OverflowError:
58+
print("OverflowError")
59+
60+
61+
# negative representations
62+
63+
# MicroPython int.to_bytes() behaves as if signed=True, always.
64+
if sys.implementation.name == "micropython":
65+
66+
def to_bytes_signed(i, l, e):
67+
return i.to_bytes(l, e)
68+
else:
69+
70+
def to_bytes_signed(i, l, e):
71+
return i.to_bytes(l, e, signed=True)
72+
73+
74+
print(to_bytes_signed(-1, 1, "little"))
75+
print(to_bytes_signed(-1, 1, "big"))
76+
print(to_bytes_signed(-128, 1, "big"))
77+
print(to_bytes_signed(-32768, 2, "big"))
78+
print(to_bytes_signed(-(1 << 23), 3, "big"))
79+
80+
try:
81+
print(to_bytes_signed(-129, 1, "big"))
82+
except OverflowError:
83+
print("OverflowError")
84+
85+
try:
86+
print(to_bytes_signed(-32769, 2, "big"))
87+
except OverflowError:
88+
print("OverflowError")
89+
90+
try:
91+
print(to_bytes_signed(-(1 << 23) - 1, 2, "big"))
92+
except OverflowError:
93+
print("OverflowError")

tests/basics/int_bytes_intbig.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
print((2**64).to_bytes(9, "little"))
24
print((2**64).to_bytes(9, "big"))
35

@@ -10,5 +12,35 @@
1012
print(il.to_bytes(20, "little"))
1113
print(ib.to_bytes(20, "big"))
1214

15+
# check padding comes out correctly
16+
print(il.to_bytes(40, "little"))
17+
print(ib.to_bytes(40, "big"))
18+
1319
# check that extra zero bytes don't change the internal int value
1420
print(int.from_bytes(b + bytes(10), "little") == int.from_bytes(b, "little"))
21+
22+
# can't write to a zero-length bytes object
23+
try:
24+
ib.to_bytes(0, "little")
25+
except OverflowError:
26+
print("OverflowError")
27+
28+
# or one that it too short
29+
try:
30+
ib.to_bytes(18, "big")
31+
except OverflowError:
32+
print("OverflowError")
33+
34+
# negative representations
35+
36+
if sys.implementation.name == "micropython":
37+
# MicroPython int.to_bytes behaves as if signed=True, always
38+
def to_bytes_signed(i, l, e):
39+
return i.to_bytes(l, e)
40+
else:
41+
42+
def to_bytes_signed(i, l, e):
43+
return i.to_bytes(l, e, signed=True)
44+
45+
46+
print(to_bytes_signed(-ib, 20, "big"))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy