Skip to content

Commit 0a66052

Browse files
miss-islingtonmedmundsbitdancerencukou
authored
[3.11] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) (GH-131405)
Email generators using email.policy.default may convert an RFC 2047 encoded-word to unencoded form during header refolding. In a structured header, this could allow 'specials' chars outside a quoted-string, leading to invalid address headers and enabling spoofing. This change ensures a parsed encoded-word that contains specials is kept as an encoded-word while the header is refolded. [Better fix from @bitdancer.] (cherry picked from commit 295b53d) Co-authored-by: Mike Edmunds <medmunds@gmail.com> Co-authored-by: R David Murray <rdmurray@bitdance.com> Co-authored-by: Petr Viktorin <encukou@gmail.com>
1 parent 4588712 commit 0a66052

File tree

3 files changed

+37
-5
lines changed

3 files changed

+37
-5
lines changed

Lib/email/_header_value_parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,7 +1047,7 @@ def get_fws(value):
10471047
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
10481048
return fws, newvalue
10491049

1050-
def get_encoded_word(value):
1050+
def get_encoded_word(value, terminal_type='vtext'):
10511051
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
10521052
10531053
"""
@@ -1086,7 +1086,7 @@ def get_encoded_word(value):
10861086
ew.append(token)
10871087
continue
10881088
chars, *remainder = _wsp_splitter(text, 1)
1089-
vtext = ValueTerminal(chars, 'vtext')
1089+
vtext = ValueTerminal(chars, terminal_type)
10901090
_validate_xtext(vtext)
10911091
ew.append(vtext)
10921092
text = ''.join(remainder)
@@ -1128,7 +1128,7 @@ def get_unstructured(value):
11281128
valid_ew = True
11291129
if value.startswith('=?'):
11301130
try:
1131-
token, value = get_encoded_word(value)
1131+
token, value = get_encoded_word(value, 'utext')
11321132
except _InvalidEwError:
11331133
valid_ew = False
11341134
except errors.HeaderParseError:
@@ -1157,7 +1157,7 @@ def get_unstructured(value):
11571157
# the parser to go in an infinite loop.
11581158
if valid_ew and rfc2047_matcher.search(tok):
11591159
tok, *remainder = value.partition('=?')
1160-
vtext = ValueTerminal(tok, 'vtext')
1160+
vtext = ValueTerminal(tok, 'utext')
11611161
_validate_xtext(vtext)
11621162
unstructured.append(vtext)
11631163
value = ''.join(remainder)
@@ -2792,7 +2792,7 @@ def _refold_parse_tree(parse_tree, *, policy):
27922792
continue
27932793
tstr = str(part)
27942794
if not want_encoding:
2795-
if part.token_type == 'ptext':
2795+
if part.token_type in ('ptext', 'vtext'):
27962796
# Encode if tstr contains special characters.
27972797
want_encoding = not SPECIALSNL.isdisjoint(tstr)
27982798
else:

Lib/test/test_email/test__header_value_parser.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2985,6 +2985,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
29852985
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
29862986
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')
29872987

2988+
def test_address_list_with_specials_in_encoded_word(self):
2989+
# An encoded-word parsed from a structured header must remain
2990+
# encoded when it contains specials. Regression for gh-121284.
2991+
policy = self.policy.clone(max_line_length=40)
2992+
cases = [
2993+
# (to, folded)
2994+
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
2995+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
2996+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
2997+
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
2998+
'This long name does not need\n'
2999+
' encoded-word <to@example.com>\n'),
3000+
('"A véry long name with, comma" <to@example.com>',
3001+
# (This isn't the best fold point, but it's not invalid.)
3002+
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
3003+
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
3004+
('"A véry long name containing a, comma" <to@example.com>',
3005+
'A =?utf-8?q?v=C3=A9ry?= long name\n'
3006+
' containing =?utf-8?q?a=2C?= comma\n'
3007+
' <to@example.com>\n'),
3008+
]
3009+
for (to, folded) in cases:
3010+
with self.subTest(to=to):
3011+
self._test(parser.get_address_list(to)[0], folded, policy=policy)
3012+
29883013
def test_address_list_with_list_separator_after_fold(self):
29893014
a = 'x' * 66 + '@example.com'
29903015
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
2+
using a modern email policy. Previously when an encoded-word was too long
3+
for a line, it would be decoded, split across lines, and re-encoded. But commas
4+
and other special characters in the original text could be left unencoded and
5+
unquoted. This could theoretically be used to spoof header lines using
6+
a carefully constructed encoded-word if the resulting rendered email was
7+
transmitted or re-parsed.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy