From 523f8ca935d5d255a3efb5c1b257ce38b0939942 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 29 Feb 2024 16:57:37 +0200 Subject: [PATCH 1/2] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset --- Lib/email/generator.py | 2 +- Lib/email/message.py | 2 +- Lib/test/test_email/test_email.py | 15 +++++++++++++++ .../2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst | 2 ++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 7ccbe10eb76856..c8056ad47baa0f 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -243,7 +243,7 @@ def _handle_text(self, msg): # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) diff --git a/Lib/email/message.py b/Lib/email/message.py index fe769580fed5d0..a14cca56b3745a 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None): return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 39d4ace8d4a1d8..d9af05c306eb30 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self): msg = email.message_from_bytes(source) self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_with_ascii_charset(self): + m = textwrap.dedent("""\ + MIME-Version: 1.0 + Content-type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 8bit + + Test if non-ascii messages with no Content-Transfer-Encoding set + can be as_string'd: + Föö bär + """) + source = m.encode('iso-8859-1') + expected = source.decode('ascii', 'replace') + msg = email.message_from_bytes(source) + self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_without_content_type_and_cte(self): m = textwrap.dedent("""\ MIME-Version: 1.0 diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst new file mode 100644 index 00000000000000..1303ac527c4545 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -0,0 +1,2 @@ +Fix :meth:`email.Message.as_string` for non-ASCII message with ASCII +charset. From d8f86c81b7899b4388bebc037581a0b407b6f0d5 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 1 Mar 2024 17:09:22 +0200 Subject: [PATCH 2/2] Update the NEWS entry. --- .../Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst index 1303ac527c4545..da62f8a2450711 100644 --- a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -1,2 +1,4 @@ -Fix :meth:`email.Message.as_string` for non-ASCII message with ASCII -charset. +Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when +a message that claims to be in the ascii character set actually has non-ascii +characters. Non-ascii characters are now replaced with the U+FFFD replacement +character, like in the ``replace`` error handler. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy