From 1fe5d0a592b752f0af5fb2b2cb087a6b8c1f3f6d Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 16 Aug 2023 18:37:53 +0200 Subject: [PATCH] gh-79986: Add parsing for References/In-Reply-To email headers This is a followup to 46d88a113142b26c01c95c93846a89318ba87ffc (#13397), which added parsing for Message-ID. Similar handling is needed for the other two identification headers. --- Lib/email/_header_value_parser.py | 32 ++++++++++ Lib/email/headerregistry.py | 14 +++++ .../test_email/test__header_value_parser.py | 63 +++++++++++++++++++ Lib/test/test_email/test_headerregistry.py | 13 ++++ ...5-07-29-11-37-22.gh-issue-79986.fnJbE_.rst | 2 + 5 files changed, 124 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91243378dc0441..a8ea0128c8d750 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -874,6 +874,12 @@ class MessageID(MsgID): class InvalidMessageID(MessageID): token_type = 'invalid-message-id' +class MessageIDList(TokenList): + token_type = 'message-id-list' + + @property + def message_ids(self): + return [x for x in self if x.token_type=='msg-id'] class Header(TokenList): token_type = 'header' @@ -2171,6 +2177,32 @@ def parse_message_id(value): return message_id +def parse_message_ids(value): + """in-reply-to = "In-Reply-To:" 1*msg-id CRLF + references = "References:" 1*msg-id CRLF + """ + message_ids = MessageIDList() + while value: + # message id list separated with commas - this is invalid, + # but happens rather frequently in the wild + if value and value[0] == ',': + message_ids.defects.append( + errors.InvalidHeaderDefect("msg-id separated with comma")) + value = value[1:] + continue + + try: + token, value = get_msg_id(value) + message_ids.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_ids.append(InvalidMessageID(token)) + message_ids.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + break + + return message_ids + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in diff --git a/Lib/email/headerregistry.py b/Lib/email/headerregistry.py index 543141dc427ebe..4c3f4e929320e9 100644 --- a/Lib/email/headerregistry.py +++ b/Lib/email/headerregistry.py @@ -534,6 +534,18 @@ def parse(cls, value, kwds): kwds['defects'].extend(parse_tree.all_defects) +class ReferencesHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_ids) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = ' '.join((str(i) for i in parse_tree.message_ids)) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -557,6 +569,8 @@ def parse(cls, value, kwds): 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, 'message-id': MessageIDHeader, + 'in-reply-to': ReferencesHeader, + 'references': ReferencesHeader, } class HeaderRegistry: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 179e236ecdfd7f..6d649b0dab74b5 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2867,6 +2867,69 @@ def test_get_msg_id_ws_only_local(self): ) self.assertEqual(msg_id.token_type, 'msg-id') + def test_parse_message_ids_valid(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " ", + " ", + " ", + [], + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_empty(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " ", + " ", + " ", + [errors.InvalidHeaderDefect], + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_comment(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " (foo's message from \"bar\")", + " (foo's message from \"bar\")", + " ", + [], + ) + self.assertEqual(message_ids.message_ids[0].value, ' ') + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_comma_sep(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + ",", + "", + "", + [errors.InvalidHeaderDefect], + ) + self.assertEqual(message_ids.message_ids[0].value, '') + self.assertEqual(message_ids.message_ids[1].value, '') + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_invalid_id(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + "", + "", + "", + [errors.InvalidHeaderDefect]*2, + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + + def test_parse_message_ids_broken_ang(self): + message_ids = self._test_parse_x( + parser.parse_message_ids, + " >bar@foo", + " >bar@foo", + " >bar@foo", + [errors.InvalidHeaderDefect]*1, + ) + self.assertEqual(message_ids.token_type, 'message-id-list') + @parameterize diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index ff7a6da644d572..df34ec70504bc5 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1812,5 +1812,18 @@ def test_message_id_header_is_not_folded(self): h.fold(policy=policy.default.clone(max_line_length=20)), 'Message-ID:\n <ईमेलfromMessage@wők.com>\n') + def test_fold_references(self): + h = self.make_header( + 'References', + ' ' + '' + ) + self.assertEqual( + h.fold(policy=policy.default.clone(max_line_length=20)), + 'References: ' + '\n' + ' \n') + + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst new file mode 100644 index 00000000000000..57e14e9cdcaae3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-29-11-37-22.gh-issue-79986.fnJbE_.rst @@ -0,0 +1,2 @@ +Add parsing for References/In-Reply-To email headers, preventing them from +being folded. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy