Skip to content
This repository was archived by the owner on Oct 28, 2023. It is now read-only.

Commit c89254f

Browse files
alex-robbinspfalcon
authored andcommitted
extmod/modubinascii: Rewrite mod_binascii_a2b_base64.
This implementation ignores invalid characters in the input. This allows it to decode the output of b2a_base64, and also mimics the behavior of CPython.
1 parent 025e5f2 commit c89254f

File tree

2 files changed

+55
-38
lines changed

2 files changed

+55
-38
lines changed

extmod/modubinascii.c

Lines changed: 48 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -105,54 +105,64 @@ mp_obj_t mod_binascii_unhexlify(mp_obj_t data) {
105105
}
106106
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_unhexlify_obj, mod_binascii_unhexlify);
107107

108+
// If ch is a character in the base64 alphabet, and is not a pad character, then
109+
// the corresponding integer between 0 and 63, inclusively, is returned.
110+
// Otherwise, -1 is returned.
111+
static int mod_binascii_sextet(byte ch) {
112+
if (ch >= 'A' && ch <= 'Z') {
113+
return ch - 'A';
114+
} else if (ch >= 'a' && ch <= 'z') {
115+
return ch - 'a' + 26;
116+
} else if (ch >= '0' && ch <= '9') {
117+
return ch - '0' + 52;
118+
} else if (ch == '+') {
119+
return 62;
120+
} else if (ch == '/') {
121+
return 63;
122+
} else {
123+
return -1;
124+
}
125+
}
126+
108127
mp_obj_t mod_binascii_a2b_base64(mp_obj_t data) {
109128
mp_buffer_info_t bufinfo;
110129
mp_get_buffer_raise(data, &bufinfo, MP_BUFFER_READ);
111-
if (bufinfo.len % 4 != 0) {
112-
mp_raise_ValueError("incorrect padding");
113-
}
130+
byte *in = bufinfo.buf;
114131

115132
vstr_t vstr;
116-
byte *in = bufinfo.buf;
117-
if (bufinfo.len == 0) {
118-
vstr_init_len(&vstr, 0);
119-
}
120-
else {
121-
vstr_init_len(&vstr, ((bufinfo.len / 4) * 3) - ((in[bufinfo.len-1] == '=') ? ((in[bufinfo.len-2] == '=') ? 2 : 1 ) : 0));
122-
}
123-
byte *out = (byte*)vstr.buf;
124-
for (mp_uint_t i = bufinfo.len; i; i -= 4) {
125-
char hold[4];
126-
for (int j = 4; j--;) {
127-
if (in[j] >= 'A' && in[j] <= 'Z') {
128-
hold[j] = in[j] - 'A';
129-
} else if (in[j] >= 'a' && in[j] <= 'z') {
130-
hold[j] = in[j] - 'a' + 26;
131-
} else if (in[j] >= '0' && in[j] <= '9') {
132-
hold[j] = in[j] - '0' + 52;
133-
} else if (in[j] == '+') {
134-
hold[j] = 62;
135-
} else if (in[j] == '/') {
136-
hold[j] = 63;
137-
} else if (in[j] == '=') {
138-
if (j < 2 || i > 4) {
139-
mp_raise_ValueError("incorrect padding");
140-
}
141-
hold[j] = 64;
142-
} else {
143-
mp_raise_ValueError("invalid character");
133+
vstr_init(&vstr, (bufinfo.len / 4) * 3 + 1); // Potentially over-allocate
134+
byte *out = (byte *)vstr.buf;
135+
136+
uint shift = 0;
137+
int nbits = 0; // Number of meaningful bits in shift
138+
bool hadpad = false; // Had a pad character since last valid character
139+
for (size_t i = 0; i < bufinfo.len; i++) {
140+
if (in[i] == '=') {
141+
if ((nbits == 2) || ((nbits == 4) && hadpad)) {
142+
nbits = 0;
143+
break;
144144
}
145+
hadpad = true;
145146
}
146-
in += 4;
147147

148-
*out++ = (hold[0]) << 2 | (hold[1]) >> 4;
149-
if (hold[2] != 64) {
150-
*out++ = (hold[1] & 0x0F) << 4 | hold[2] >> 2;
151-
if (hold[3] != 64) {
152-
*out++ = (hold[2] & 0x03) << 6 | hold[3];
153-
}
148+
int sextet = mod_binascii_sextet(in[i]);
149+
if (sextet == -1) {
150+
continue;
151+
}
152+
hadpad = false;
153+
shift = (shift << 6) | sextet;
154+
nbits += 6;
155+
156+
if (nbits >= 8) {
157+
nbits -= 8;
158+
out[vstr.len++] = (shift >> nbits) & 0xFF;
154159
}
155160
}
161+
162+
if (nbits) {
163+
mp_raise_ValueError("incorrect padding");
164+
}
165+
156166
return mp_obj_new_str_from_vstr(&mp_type_bytes, &vstr);
157167
}
158168
MP_DEFINE_CONST_FUN_OBJ_1(mod_binascii_a2b_base64_obj, mod_binascii_a2b_base64);

tests/extmod/ubinascii_a2b_base64.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,13 @@
2121
print(binascii.a2b_base64(b'f4D+')) # convert '+'
2222
print(binascii.a2b_base64(b'MTIzNEFCQ0RhYmNk'))
2323

24+
# Ignore invalid characters and pad sequences
25+
print(binascii.a2b_base64(b'Zm9v\n'))
26+
print(binascii.a2b_base64(b'Zm\x009v\n'))
27+
print(binascii.a2b_base64(b'Zm9v=='))
28+
print(binascii.a2b_base64(b'Zm9v==='))
29+
print(binascii.a2b_base64(b'Zm9v===YmFy'))
30+
2431
try:
2532
print(binascii.a2b_base64(b'abc'))
2633
except ValueError:

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy