From 212109443b620cf0f17e81430733a5951f78b964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 30 May 2025 20:06:24 +0200 Subject: [PATCH 1/3] fix quadratic worst-time complexity in `_header_value_parser.py` --- Lib/email/_header_value_parser.py | 95 +++++++++---------- .../test_email/test__header_value_parser.py | 10 ++ ...-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst | 2 + 3 files changed, 59 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index f11fa83d45ed2d..3b01636bc27cbf 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -82,11 +82,13 @@ WSP = set(' \t') CFWS_LEADER = WSP | set('(') +CFWS_LEADER_WITH_DOT = CFWS_LEADER | set('.') SPECIALS = set(r'()<>@,:;.\"[]') ATOM_ENDS = SPECIALS | WSP DOT_ATOM_ENDS = ATOM_ENDS - set('.') # '.', '"', and '(' do not end phrases in order to support obs-phrase PHRASE_ENDS = SPECIALS - set('."(') +PHRASE_ENDS_CHARS = r''.join(PHRASE_ENDS) TSPECIALS = (SPECIALS | set('/?=')) - set('.') TOKEN_ENDS = TSPECIALS | WSP ASPECIALS = TSPECIALS | set("*'%") @@ -1300,6 +1302,12 @@ def get_cfws(value): cfws.append(token) return cfws, value +def get_cfws_digits(value, leader_set): + ind = 0 + while ind < len(value) and value[ind] not in leader_set: + ind += 1 + return value[:ind], value[ind:] + def get_quoted_string(value): """quoted-string = [CFWS] [CFWS] @@ -1443,11 +1451,13 @@ def get_phrase(value): phrase.defects.append(errors.InvalidHeaderDefect( "phrase does not start with word")) while value and value[0] not in PHRASE_ENDS: - if value[0]=='.': - phrase.append(DOT) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "period in 'phrase'")) - value = value[1:] + if value[0] == '.': + tmpvalue = value.lstrip('.') + for _ in range(len(value) - len(tmpvalue)): + phrase.append(DOT) + phrase.defects.append(errors.ObsoleteHeaderDefect( + "period in 'phrase'")) + value = tmpvalue else: try: token, value = get_word(value) @@ -1461,6 +1471,20 @@ def get_phrase(value): phrase.append(token) return phrase, value +def _find_phrase(reslist, value, endchars): + # lstrip() should not strip stuff in 'endchars' + phrase_end_chars = ''.join(PHRASE_ENDS - set(endchars)) + while value and value[0] not in endchars: + if value[0] in PHRASE_ENDS: + tmpvalue = value.lstrip(phrase_end_chars) + for i in range(len(value) - len(tmpvalue)): + reslist.append(ValueTerminal(value[i], 'misplaced-special')) + value = tmpvalue + else: + token, value = get_phrase(value) + reslist.append(token) + return value + def get_local_part(value): """ local-part = dot-atom / quoted-string / obs-local-part @@ -1842,14 +1866,7 @@ def get_invalid_mailbox(value, endchars): """ invalid_mailbox = InvalidMailbox() - while value and value[0] not in endchars: - if value[0] in PHRASE_ENDS: - invalid_mailbox.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_mailbox.append(token) + value = _find_phrase(invalid_mailbox, value, endchars) return invalid_mailbox, value def get_mailbox_list(value): @@ -2196,10 +2213,7 @@ def parse_mime_version(value): if not value: mime_version.defects.append(errors.HeaderMissingRequiredValue( "Expected MIME version number but found only CFWS")) - digits = '' - while value and value[0] != '.' and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] + digits, value = get_cfws_digits(value, CFWS_LEADER_WITH_DOT) if not digits.isdigit(): mime_version.defects.append(errors.InvalidHeaderDefect( "Expected MIME major version number but found {!r}".format(digits))) @@ -2227,10 +2241,7 @@ def parse_mime_version(value): mime_version.defects.append(errors.InvalidHeaderDefect( "Incomplete MIME version; found only major number")) return mime_version - digits = '' - while value and value[0] not in CFWS_LEADER: - digits += value[0] - value = value[1:] + digits, value = get_cfws_digits(value, CFWS_LEADER) if not digits.isdigit(): mime_version.defects.append(errors.InvalidHeaderDefect( "Expected MIME minor version number but found {!r}".format(digits))) @@ -2255,14 +2266,7 @@ def get_invalid_parameter(value): """ invalid_parameter = InvalidParameter() - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - invalid_parameter.append(ValueTerminal(value[0], - 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - invalid_parameter.append(token) + value = _find_phrase(invalid_parameter, value, ';') return invalid_parameter, value def get_ttext(value): @@ -2407,10 +2411,8 @@ def get_section(value): if not value or not value[0].isdigit(): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) - digits = '' - while value and value[0].isdigit(): - digits += value[0] - value = value[1:] + ind = next((i for i, ch in enumerate(value) if not ch.isdigit()), 0) + digits, value = value[:ind], value[ind:] if digits[0] == '0' and digits != '0': section.defects.append(errors.InvalidHeaderDefect( "section number has an invalid leading 0")) @@ -2638,17 +2640,10 @@ def _find_mime_parameters(tokenlist, value): """Do our best to find the parameters in an invalid MIME header """ - while value and value[0] != ';': - if value[0] in PHRASE_ENDS: - tokenlist.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] - else: - token, value = get_phrase(value) - tokenlist.append(token) - if not value: - return - tokenlist.append(ValueTerminal(';', 'parameter-separator')) - tokenlist.append(parse_mime_parameters(value[1:])) + value = _find_phrase(tokenlist, value, ';') + if value: + tokenlist.append(ValueTerminal(';', 'parameter-separator')) + tokenlist.append(parse_mime_parameters(value[1:])) def parse_content_type_header(value): """ maintype "/" subtype *( ";" parameter ) @@ -2757,12 +2752,16 @@ def parse_content_transfer_encoding_header(value): if not value: return cte_header while value: - cte_header.defects.append(errors.InvalidHeaderDefect( - "Extra text after content transfer encoding")) if value[0] in PHRASE_ENDS: - cte_header.append(ValueTerminal(value[0], 'misplaced-special')) - value = value[1:] + tmpvalue = value.lstrip(PHRASE_ENDS_CHARS) + for i in range(len(value) - len(tmpvalue)): + cte_header.defects.append(errors.InvalidHeaderDefect( + "Extra text after content transfer encoding")) + cte_header.append(ValueTerminal(value[i], 'misplaced-special')) + value = tmpvalue else: + cte_header.defects.append(errors.InvalidHeaderDefect( + "Extra text after content transfer encoding")) token, value = get_phrase(value) cte_header.append(token) return cte_header diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index fd4ac2c404ce47..3d91537191c69b 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2676,6 +2676,16 @@ def test_invalid_content_transfer_encoding(self): ";foo", ";foo", ";foo", [errors.InvalidHeaderDefect]*3 ) + def test_invalid_content_transfer_encoding_misplaced_special(self): + cte = parser.parse_content_transfer_encoding_header("foo;;;;;") + self.assertEqual(len(cte), 6) + self.assertEqual(cte[0].value, "foo") + self.assertEqual(cte[0].token_type, "token") + self.assertEqual(cte[0].value, "foo") + self.assertEqual(cte[0].token_type, "token") + terminal = parser.ValueTerminal(";", "misplaced-special") + self.assertEqual(cte[1:], [terminal] * 5) + # get_msg_id def test_get_msg_id_empty(self): diff --git a/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst b/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst new file mode 100644 index 00000000000000..f4371932aa04cf --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst @@ -0,0 +1,2 @@ +Fix various HTTP header value parsing routines with worst-time +quadratic-complexity. Patch by Bénédikt Tran. From af32b1bf64506b5406a6efd7541007807a270a1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 31 May 2025 11:45:21 +0200 Subject: [PATCH 2/3] fix quadratic worst-time complexity in `_header_value_parser.py` --- Lib/email/_header_value_parser.py | 41 +++++++++++++++++++------------ 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 3b01636bc27cbf..235b180473b3b0 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -89,6 +89,7 @@ # '.', '"', and '(' do not end phrases in order to support obs-phrase PHRASE_ENDS = SPECIALS - set('."(') PHRASE_ENDS_CHARS = r''.join(PHRASE_ENDS) +PHRASE_ENDS_CHARS_NO_SEMICOLON = PHRASE_ENDS_CHARS.replace(';', '') TSPECIALS = (SPECIALS | set('/?=')) - set('.') TOKEN_ENDS = TSPECIALS | WSP ASPECIALS = TSPECIALS | set("*'%") @@ -1453,10 +1454,11 @@ def get_phrase(value): while value and value[0] not in PHRASE_ENDS: if value[0] == '.': tmpvalue = value.lstrip('.') - for _ in range(len(value) - len(tmpvalue)): - phrase.append(DOT) - phrase.defects.append(errors.ObsoleteHeaderDefect( - "period in 'phrase'")) + n = len(value) - len(tmpvalue) + phrase.extend(DOT for _ in range(n)) + phrase.defects.extend( + errors.ObsoleteHeaderDefect("period in 'phrase'") + for _ in range(n)) value = tmpvalue else: try: @@ -1471,14 +1473,13 @@ def get_phrase(value): phrase.append(token) return phrase, value -def _find_phrase(reslist, value, endchars): - # lstrip() should not strip stuff in 'endchars' - phrase_end_chars = ''.join(PHRASE_ENDS - set(endchars)) +def _find_phrase(reslist, value, phrase_ends, phrase_end_chars, endchars): while value and value[0] not in endchars: - if value[0] in PHRASE_ENDS: + if value[0] in phrase_ends: tmpvalue = value.lstrip(phrase_end_chars) - for i in range(len(value) - len(tmpvalue)): - reslist.append(ValueTerminal(value[i], 'misplaced-special')) + reslist.extend( + ValueTerminal(value[i], 'misplaced-special') + for i in range(len(value) - len(tmpvalue))) value = tmpvalue else: token, value = get_phrase(value) @@ -1866,7 +1867,10 @@ def get_invalid_mailbox(value, endchars): """ invalid_mailbox = InvalidMailbox() - value = _find_phrase(invalid_mailbox, value, endchars) + # lstrip() should not strip stuff in 'endchars' + phrase_end_chars = ''.join(PHRASE_ENDS - set(endchars)) + value = _find_phrase(invalid_mailbox, value, + PHRASE_ENDS, phrase_end_chars, endchars) return invalid_mailbox, value def get_mailbox_list(value): @@ -2266,7 +2270,8 @@ def get_invalid_parameter(value): """ invalid_parameter = InvalidParameter() - value = _find_phrase(invalid_parameter, value, ';') + value = _find_phrase(invalid_parameter, value, + PHRASE_ENDS, PHRASE_ENDS_CHARS_NO_SEMICOLON, ';') return invalid_parameter, value def get_ttext(value): @@ -2569,12 +2574,15 @@ def get_parameter(value): while value: if value[0] in WSP: token, value = get_fws(value) + v.append(token) elif value[0] == '"': - token = ValueTerminal('"', 'DQUOTE') - value = value[1:] + tmpvalue = value.lstrip('"') + n = len(value) - len(tmpvalue) + v.extend((ValueTerminal('"', 'DQUOTE') for _ in range(n))) + value = tmpvalue else: token, value = get_qcontent(value) - v.append(token) + v.append(token) token = v else: token, value = get_value(value) @@ -2640,7 +2648,8 @@ def _find_mime_parameters(tokenlist, value): """Do our best to find the parameters in an invalid MIME header """ - value = _find_phrase(tokenlist, value, ';') + value = _find_phrase(tokenlist, value, + PHRASE_ENDS, PHRASE_ENDS_CHARS_NO_SEMICOLON, ';') if value: tokenlist.append(ValueTerminal(';', 'parameter-separator')) tokenlist.append(parse_mime_parameters(value[1:])) From 7bf0e7a175a097560f2b4265075ffc29e54a11bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Jun 2025 10:27:16 +0200 Subject: [PATCH 3/3] fixup --- .../Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst b/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst index f4371932aa04cf..93389a64ee2ead 100644 --- a/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst +++ b/Misc/NEWS.d/next/Security/2025-05-30-20-08-38.gh-issue-134873.6Z5xUC.rst @@ -1,2 +1,2 @@ -Fix various HTTP header value parsing routines with worst-time -quadratic-complexity. Patch by Bénédikt Tran. +Fix various HTTP header value parsing routines with worst-case +quadratic time complexity. Patch by Bénédikt Tran. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy