Skip to content

Commit 0489c31

Browse files
committed
py/objint.c: Code review of int.from_bytes().
Support signed param: result = int.from_bytes(bytearray(), order='big'|'little', signed=False|True) Add `length`, `byteorder`, `signed` according to the micropython#16311. Signed-off-by: Ihor Nehrutsa <Ihor.Nehrutsa@gmail.com>
1 parent 2264340 commit 0489c31

File tree

8 files changed

+306
-39
lines changed

8 files changed

+306
-39
lines changed

py/mpz.c

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne
850850
return cur - str;
851851
}
852852

853-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) {
853+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool is_signed, size_t len, const byte *buf) {
854854
int delta = 1;
855855
if (big_endian) {
856856
buf += len - 1;
@@ -862,6 +862,9 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf)
862862
mpz_dig_t d = 0;
863863
int num_bits = 0;
864864
z->neg = 0;
865+
if ((is_signed) && (buf[len - 1] & 0x80)) {
866+
z->neg = 1;
867+
}
865868
z->len = 0;
866869
while (len) {
867870
while (len && num_bits < DIG_SIZE) {
@@ -879,7 +882,14 @@ void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf)
879882
#endif
880883
num_bits -= DIG_SIZE;
881884
}
882-
885+
if (z->neg) {
886+
// sign extend
887+
while (num_bits < DIG_SIZE) {
888+
d |= DIG_MSB << num_bits;
889+
num_bits += DIG_SIZE;
890+
}
891+
z->dig[z->len++] = d & DIG_MASK;
892+
}
883893
z->len = mpn_remove_trailing_zeros(z->dig, z->dig + z->len);
884894
}
885895

py/mpz.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
114114
void mpz_set_from_float(mpz_t *z, mp_float_t src);
115115
#endif
116116
size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base);
117-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf);
117+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool is_signed, size_t len, const byte *buf);
118118

119119
static inline bool mpz_is_zero(const mpz_t *z) {
120120
return z->len == 0;

py/objint.c

Lines changed: 72 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -387,37 +387,86 @@ mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp
387387
return MP_OBJ_NULL; // op not supported
388388
}
389389

390-
// this is a classmethod
391-
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
392-
// TODO: Support signed param (assumes signed=False at the moment)
393-
394-
// get the buffer info
395-
mp_buffer_info_t bufinfo;
396-
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
390+
void *reverce_memcpy(void *dest, const void *src, size_t len) {
391+
char *d = (char *)dest + len - 1;
392+
const char *s = src;
393+
while (len--) {
394+
*d-- = *s++;
395+
}
396+
return dest;
397+
}
397398

398-
const byte *buf = (const byte *)bufinfo.buf;
399-
int delta = 1;
400-
bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
401-
if (!big_endian) {
402-
buf += bufinfo.len - 1;
403-
delta = -1;
399+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
400+
if (len > sizeof(mp_int_t)) {
401+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
402+
// Result will overflow a small-int size so construct a big-int
403+
return mp_obj_int_from_bytes_impl(big_endian, is_signed, len, buf);
404+
#else
405+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
406+
#endif
407+
}
408+
union {
409+
mp_int_t value;
410+
mp_uint_t uvalue;
411+
byte buf[sizeof(mp_int_t)];
412+
} result = {0};
413+
// #if sizeof(mp_int_t) != sizeof(mp_uint_t)
414+
// #error "sizeof(mp_int_t) != sizeof(mp_uint_t)"
415+
// #endif
416+
417+
if (big_endian) {
418+
reverce_memcpy(&result, buf, len);
419+
} else { // little-endian
420+
memcpy(&result, buf, len);
404421
}
405422

406-
mp_uint_t value = 0;
407-
size_t len = bufinfo.len;
408-
for (; len--; buf += delta) {
423+
if ((is_signed) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
424+
// Sign propagation in little-endian
425+
// x = 2
426+
// x.to_bytes(1, 'little', True) -> b'\x02'
427+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
428+
// x = -2
429+
// x.to_bytes(1, 'little', True) -> b'\xFE'
430+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
431+
memset(result.buf + len, 0xFF, sizeof(result) - len);
432+
}
433+
if (((!is_signed) && (result.uvalue > MP_SMALL_INT_MAX)) || (is_signed && ((result.value < MP_SMALL_INT_MIN) || (result.value > MP_SMALL_INT_MAX)))) {
434+
// Result will overflow a small-int so construct a big-int
409435
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
410-
if (value > (MP_SMALL_INT_MAX >> 8)) {
411-
// Result will overflow a small-int so construct a big-int
412-
return mp_obj_int_from_bytes_impl(big_endian, bufinfo.len, bufinfo.buf);
413-
}
436+
return mp_obj_int_from_bytes_impl(big_endian, is_signed, len, buf);
437+
#else
438+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
414439
#endif
415-
value = (value << 8) | *buf;
416440
}
417-
return mp_obj_new_int_from_uint(value);
441+
return mp_obj_new_int(result.value);
418442
}
419443

420-
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes);
444+
// this is a classmethod
445+
// result = int.from_bytes(bytearray(), [[length=,] byteorder='big',] signed=False)
446+
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
447+
enum { ARG_length, ARG_byteorder, ARG_signed };
448+
static const mp_arg_t allowed_args[] = {
449+
{ MP_QSTR_length, MP_ARG_INT, { .u_int = 0 } },
450+
{ MP_QSTR_byteorder, MP_ARG_OBJ, { .u_rom_obj = MP_ROM_QSTR(MP_QSTR_big) } },
451+
{ MP_QSTR_signed, MP_ARG_BOOL, {.u_bool = false} },
452+
};
453+
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
454+
mp_arg_parse_all(n_args - 2, pos_args + 2, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
455+
456+
// get the buffer info
457+
mp_buffer_info_t bufinfo;
458+
mp_get_buffer_raise(pos_args[1], &bufinfo, MP_BUFFER_READ);
459+
460+
size_t len = args[ARG_length].u_int;
461+
bool big_endian = args[ARG_byteorder].u_obj != MP_OBJ_NEW_QSTR(MP_QSTR_little);
462+
bool is_signed = args[ARG_signed].u_bool;
463+
464+
if ((len <= 0) || (len > bufinfo.len)) {
465+
len = bufinfo.len;
466+
}
467+
return mp_obj_integer_from_bytes_impl(big_endian, is_signed, len, bufinfo.buf);
468+
}
469+
static MP_DEFINE_CONST_FUN_OBJ_KW(int_from_bytes_fun_obj, 2, int_from_bytes);
421470
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
422471

423472
static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {

py/objint.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
5454
char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
5555
int base, const char *prefix, char base_char, char comma);
5656
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
57-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
57+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf);
58+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf);
5859
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
5960
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
6061
int mp_obj_int_sign(mp_obj_t self_in);
6162
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6263
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6364
mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6465
mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent, mp_obj_t modulus);
66+
void *reverce_memcpy(void *dest, const void *src, size_t len);
6567

6668
#endif // MICROPY_INCLUDED_PY_OBJINT_H

py/objint_longlong.c

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,18 +43,32 @@
4343
const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
4444
#endif
4545

46-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
47-
int delta = 1;
48-
if (!big_endian) {
49-
buf += len - 1;
50-
delta = -1;
46+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
47+
if (len > sizeof(mp_longint_impl_t)) {
48+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("big-int overflow"));
5149
}
50+
union {
51+
mp_longint_impl_t value;
52+
byte buf[sizeof(mp_longint_impl_t)];
53+
} result = {0};
5254

53-
mp_longint_impl_t value = 0;
54-
for (; len--; buf += delta) {
55-
value = (value << 8) | *buf;
55+
if (big_endian) {
56+
reverce_memcpy(&result, buf, len);
57+
} else { // little-endian
58+
memcpy(&result, buf, len);
5659
}
57-
return mp_obj_new_int_from_ll(value);
60+
61+
if ((is_signed) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
62+
// Sign propagation in little-endian
63+
// x = 2
64+
// x.to_bytes(1, 'little', True) -> b'\x02'
65+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
66+
// x = -2
67+
// x.to_bytes(1, 'little', True) -> b'\xFE'
68+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
69+
memset(result.buf + len, 0xFF, sizeof(result) - len);
70+
}
71+
return mp_obj_new_int_from_ll(result.value);
5872
}
5973

6074
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {

py/objint_mpz.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
106106
return str;
107107
}
108108

109-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
109+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool is_signed, size_t len, const byte *buf) {
110110
mp_obj_int_t *o = mp_obj_int_new_mpz();
111-
mpz_set_from_bytes(&o->mpz, big_endian, len, buf);
111+
mpz_set_from_bytes(&o->mpz, big_endian, is_signed, len, buf);
112112
return MP_OBJ_FROM_PTR(o);
113113
}
114114

tests/basics/int_bytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# check that extra zero bytes don't change the internal int value
1111
print(int.from_bytes(bytes(20), "little") == 0)
12-
print(int.from_bytes(b"\x01" + bytes(20), "little") == 1)
12+
print(int.from_bytes(b"\x01" + bytes(7), "little") == 1)
1313

1414
# big-endian conversion
1515
print((10).to_bytes(1, "big"))

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy