diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 045a01bcf1e0d7..0183a1508b1219 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1047,7 +1047,7 @@ def get_fws(value): fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') return fws, newvalue -def get_encoded_word(value): +def get_encoded_word(value, terminal_type='vtext'): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ @@ -1086,7 +1086,7 @@ def get_encoded_word(value): ew.append(token) continue chars, *remainder = _wsp_splitter(text, 1) - vtext = ValueTerminal(chars, 'vtext') + vtext = ValueTerminal(chars, terminal_type) _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) @@ -1128,7 +1128,7 @@ def get_unstructured(value): valid_ew = True if value.startswith('=?'): try: - token, value = get_encoded_word(value) + token, value = get_encoded_word(value, 'utext') except _InvalidEwError: valid_ew = False except errors.HeaderParseError: @@ -1157,7 +1157,7 @@ def get_unstructured(value): # the parser to go in an infinite loop. if valid_ew and rfc2047_matcher.search(tok): tok, *remainder = value.partition('=?') - vtext = ValueTerminal(tok, 'vtext') + vtext = ValueTerminal(tok, 'utext') _validate_xtext(vtext) unstructured.append(vtext) value = ''.join(remainder) @@ -2792,7 +2792,7 @@ def _refold_parse_tree(parse_tree, *, policy): continue tstr = str(part) if not want_encoding: - if part.token_type == 'ptext': + if part.token_type in ('ptext', 'vtext'): # Encode if tstr contains special characters. want_encoding = not SPECIALSNL.isdisjoint(tstr) else: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index cd6495490e3d55..6025b34ac4a0f8 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2985,6 +2985,31 @@ def test_address_list_with_unicode_names_in_quotes(self): '=?utf-8?q?H=C3=BCbsch?= Kaktus ,\n' ' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= \n') + def test_address_list_with_specials_in_encoded_word(self): + # An encoded-word parsed from a structured header must remain + # encoded when it contains specials. Regression for gh-121284. + policy = self.policy.clone(max_line_length=40) + cases = [ + # (to, folded) + ('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= ', + 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n' + ' =?utf-8?q?=2C?= comma \n'), + ('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= ', + 'This long name does not need\n' + ' encoded-word \n'), + ('"A véry long name with, comma" ', + # (This isn't the best fold point, but it's not invalid.) + 'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n' + ' =?utf-8?q?=2C?= comma \n'), + ('"A véry long name containing a, comma" ', + 'A =?utf-8?q?v=C3=A9ry?= long name\n' + ' containing =?utf-8?q?a=2C?= comma\n' + ' \n'), + ] + for (to, folded) in cases: + with self.subTest(to=to): + self._test(parser.get_address_list(to)[0], folded, policy=policy) + def test_address_list_with_list_separator_after_fold(self): a = 'x' * 66 + '@example.com' to = f'{a}, "Hübsch Kaktus" ' diff --git a/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst b/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst new file mode 100644 index 00000000000000..923e91170d355f --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-08-06-12-27-34.gh-issue-121284.8rwPxe.rst @@ -0,0 +1,7 @@ +Fix bug in the folding of rfc2047 encoded-words when flattening an email message +using a modern email policy. Previously when an encoded-word was too long +for a line, it would be decoded, split across lines, and re-encoded. But commas +and other special characters in the original text could be left unencoded and +unquoted. This could theoretically be used to spoof header lines using +a carefully constructed encoded-word if the resulting rendered email was +transmitted or re-parsed. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy