From 7706b33e18b277b8bf45c5c4484133bb6ca2a086 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 16:54:42 +0200 Subject: [PATCH 1/4] Fix base64-url parsing errors in email headers --- Lib/email/_encoded_words.py | 9 +++++++-- Lib/email/base64mime.py | 11 +++++++---- Lib/test/test_email/test__encoded_words.py | 6 ++++++ Lib/test/test_email/test_email.py | 3 +++ 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py index 6795a606de037e..7c2dfb360f95ad 100644 --- a/Lib/email/_encoded_words.py +++ b/Lib/email/_encoded_words.py @@ -113,9 +113,14 @@ def decode_b(encoded): # The non-alphabet characters are ignored as far as padding # goes, but we don't know how many there are. So try without adding # padding to see if it works. + # + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode + # both the urlsafe and regular alphabets. + try: return ( - base64.b64decode(encoded, validate=False), + base64.urlsafe_b64decode(encoded), [errors.InvalidBase64CharactersDefect()], ) except binascii.Error: @@ -123,7 +128,7 @@ def decode_b(encoded): # is ignored). try: return ( - base64.b64decode(encoded + b'==', validate=False), + base64.urlsafe_b64decode(encoded + b'=='), [errors.InvalidBase64CharactersDefect(), errors.InvalidBase64PaddingDefect()], ) diff --git a/Lib/email/base64mime.py b/Lib/email/base64mime.py index a5a3f737a97b51..3363a5bc45f2d4 100644 --- a/Lib/email/base64mime.py +++ b/Lib/email/base64mime.py @@ -34,8 +34,8 @@ ] -from base64 import b64encode -from binascii import b2a_base64, a2b_base64 +from base64 import b64encode, urlsafe_b64decode +from binascii import b2a_base64 CRLF = '\r\n' NL = '\n' @@ -102,12 +102,15 @@ def decode(string): base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high level email.header class for that functionality. """ + # We use urlsafe_b64decode here because some mailers apparently use the + # urlsafe b64 alphabet, and urlsafe_b64decode will correctly decode both + # the urlsafe and regular alphabets. if not string: return bytes() elif isinstance(string, str): - return a2b_base64(string.encode('raw-unicode-escape')) + return urlsafe_b64decode(string.encode('raw-unicode-escape')) else: - return a2b_base64(string) + return urlsafe_b64decode(string) # For convenience and backwards compatibility w/ standard base64 module diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 1713962f94caef..e547f372adb5c3 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -38,6 +38,12 @@ def test_missing_padding(self): # 2 missing padding characters self._test(b'dg', b'v', [errors.InvalidBase64PaddingDefect]) + def test_urlsafe_alphabet(self): + self._test( + b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', + b'Anmeldung Netzanschluss S\xfcdring3p.jpg', + [errors.InvalidBase64CharactersDefect]) + def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b8116d073a2670..16b156dc7fc07f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4663,6 +4663,9 @@ def test_decode(self): eq = self.assertEqual eq(base64mime.decode(''), b'') eq(base64mime.decode('aGVsbG8='), b'hello') + eq(base64mime.decode( + 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), + b'Anmeldung Netzanschluss S\xfcdring3p.jpg') def test_encode(self): eq = self.assertEqual From b79dce66038179af62b640af6dfb416727c69626 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 16:59:24 +0200 Subject: [PATCH 2/4] Add news entry --- .../next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst new file mode 100644 index 00000000000000..d23e7eb2bd4c0e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-19-16-59-16.gh-issue-56698.yHSmT_.rst @@ -0,0 +1,2 @@ +Accept urlsafe base64 in email headers, as those are sometimes created by +email clients. From 81cff0ab4c4040cf3f31886c9dfbb0a0c3004b87 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sat, 19 Jul 2025 17:37:41 +0200 Subject: [PATCH 3/4] Add tests for mixed base64 alphabets --- Lib/test/test_email/test__encoded_words.py | 3 +++ Lib/test/test_email/test_email.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index e547f372adb5c3..890c4fb6cbd51e 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -43,6 +43,9 @@ def test_urlsafe_alphabet(self): b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', b'Anmeldung Netzanschluss S\xfcdring3p.jpg', [errors.InvalidBase64CharactersDefect]) + # Mix of 2 base64 alphabets + self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect]) + def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 16b156dc7fc07f..6914f5e7a0cdba 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4666,6 +4666,8 @@ def test_decode(self): eq(base64mime.decode( 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), b'Anmeldung Netzanschluss S\xfcdring3p.jpg') + # Mix of 2 base64 alphabets + eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????') def test_encode(self): eq = self.assertEqual From 6161551bf3a35c03786662f3735eee4241de1fff Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 20:28:51 +0200 Subject: [PATCH 4/4] Address review comments --- Lib/test/test_email/test__encoded_words.py | 9 ++++----- Lib/test/test_email/test_email.py | 6 +++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_email/test__encoded_words.py b/Lib/test/test_email/test__encoded_words.py index 890c4fb6cbd51e..2686448565bbbe 100644 --- a/Lib/test/test_email/test__encoded_words.py +++ b/Lib/test/test_email/test__encoded_words.py @@ -40,13 +40,12 @@ def test_missing_padding(self): def test_urlsafe_alphabet(self): self._test( - b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', - b'Anmeldung Netzanschluss S\xfcdring3p.jpg', - [errors.InvalidBase64CharactersDefect]) - # Mix of 2 base64 alphabets + b'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw==', + b'Anmeldung Netzanschluss S\xfcdring3p.jpg', + [errors.InvalidBase64CharactersDefect]) + # mix of different base64 alphabets self._test(b'aGVsbG8_Pz8/', b'hello????', [errors.InvalidBase64CharactersDefect]) - def test_invalid_character(self): self._test(b'dm\x01k===', b'vi', [errors.InvalidBase64CharactersDefect]) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 6914f5e7a0cdba..03381ea1b72b2f 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -4664,9 +4664,9 @@ def test_decode(self): eq(base64mime.decode(''), b'') eq(base64mime.decode('aGVsbG8='), b'hello') eq(base64mime.decode( - 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), - b'Anmeldung Netzanschluss S\xfcdring3p.jpg') - # Mix of 2 base64 alphabets + 'QW5tZWxkdW5nIE5ldHphbnNjaGx1c3MgU_xkcmluZzNwLmpwZw=='), + b'Anmeldung Netzanschluss S\xfcdring3p.jpg') + # mix of different base64 alphabets eq(base64mime.decode('aGVsbG8_Pz8/'), b'hello????') def test_encode(self): pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy