From 9b5115f7878a818f33a1b281da7e78c5cfa6f3a0 Mon Sep 17 00:00:00 2001 From: Mike Edmunds Date: Tue, 18 Mar 2025 04:07:17 -0700 Subject: [PATCH] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] --------- (cherry picked from commit 295b53df2aa18deb625a7da41f7e4babfe6ef34b) Co-authored-by: Mike Edmunds Co-authored-by: R David Murray Co-authored-by: Petr Viktorin --- Lib/email/_header_value_parser.py | 10 ++++---- .../test_email/test__header_value_parser.py | 25 +++++++++++++++++++ ...-08-06-12-27-34.gh-issue-121284.8rwPxe.rst | 7 ++++++ 3 files changed, 37 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 045a01bcf1e0d7..0183a1508b1219 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1047,7 +1047,7 @@ def get_fws(value): fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') return fws, newvalue -def get_encoded_word(value): +def get_encoded_word(value, terminal_type='vtext'): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ @@ -1086,7 +1086,7 @@ def get_encoded_word(value): ew.append(token) continue chars, *remainder = _wsp_splitter(text, 1) - vtext = ValueTerminal(chars, 'vtext') + vtext = ValueTerminal(chars, terminal_type) _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) @@ -1128,7 +1128,7 @@ def get_unstructured(value): valid_ew = True if value.startswith('=?'): try: - token, value = get_encoded_word(value) + token, value = get_encoded_word(value, 'utext') except _InvalidEwError: valid_ew = False except errors.HeaderParseError: @@ -1157,7 +1157,7 @@ def get_unstructured(value): # the parser to go in an infinite loop. if valid_ew and rfc2047_matcher.search(tok): tok, *remainder = value.partition('=?') - vtext = ValueTerminal(tok, 'vtext') + vtext = ValueTerminal(tok, 'utext') _validate_xtext(vtext) unstructured.append(vtext) value = ''.join(remainder) @@ -2792,7 +2792,7 @@ def _refold_parse_tree(parse_tree, *, policy): continue tstr = str(part) if not want_encoding: - if part.token_type == 'ptext': + if part.token_type in ('ptext', 'vtext'): # Encode if tstr contains special characters. want_encoding = not SPECIALSNL.isdisjoint(tstr) else: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index cd6495490e3d55..6025b34ac4a0f8 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2985,6 +2985,31 @@ def test_address_list_with_unicode_names_in_quotes(self): '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + def test_address_list_with_specials_in_encoded_word(self): + # An encoded-word parsed from a structured header must remain + # encoded when it contains specials. Regression for gh-121284. + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + ('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= ', + 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n' + ' =?utf-8?q?=2C?= comma \n'), + ('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= ', + 'This long name does not need\n' + ' encoded-word \n'), + ('"A véry long name with, comma" ', + # (This isn't the best fold point, but it's not invalid.) + 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n' + ' =?utf-8?q?=2C?= comma \n'), + ('"A véry long name containing a, comma" ', + 'A =?utf-8?q?v=C3=A9ry?= long name\n' + ' containing =?utf-8?q?a=2C?= comma\n' + ' \n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + def test_address_list_with_list_separator_after_fold(self): a = 'x' * 66 + '@example.com' to = f'{a}, "Hübsch Kaktus" ' diff --git a/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst b/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst new file mode 100644 index 00000000000000..923e91170d355f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst @@ -0,0 +1,7 @@ +Fix bug in the folding of rfc2047 encoded-words when flattening an email message +using a modern email policy. Previously when an encoded-word was too long +for a line, it would be decoded, split across lines, and re-encoded. But commas +and other special characters in the original text could be left unencoded and +unquoted. This could theoretically be used to spoof header lines using +a carefully constructed encoded-word if the resulting rendered email was +transmitted or re-parsed. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy