diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py index 298177eb8003a7..245d941a8d13ce 100644 --- a/Lib/encodings/__init__.py +++ b/Lib/encodings/__init__.py @@ -58,11 +58,10 @@ def normalize_encoding(encoding): chars = [] punct = False for c in encoding: - if c.isalnum() or c == '.': + if c.isascii() and (c.isalnum() or c == '.'): if punct and chars: chars.append('_') - if c.isascii(): - chars.append(c) + chars.append(c) punct = False else: punct = True diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index d8666f7290e72e..fa777480664449 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -3888,6 +3888,12 @@ def search_function(encoding): self.assertEqual(FOUND, codecs.lookup('AAA---8')) self.assertEqual(FOUND, codecs.lookup('AAA 8')) self.assertEqual(FOUND, codecs.lookup('aaa\xe9\u20ac-8')) + self.assertEqual(FOUND, codecs.lookup('aaa\xe98')) + self.assertEqual(FOUND, codecs.lookup('aaa\u20ac8')) + self.assertEqual(FOUND, codecs.lookup('aaa-\xe9-8')) + self.assertEqual(FOUND, codecs.lookup('aaa-\u20ac-8')) + self.assertEqual(FOUND, codecs.lookup('aaa-8-\xe9')) + self.assertEqual(FOUND, codecs.lookup('aaa-8-\u20ac')) self.assertEqual(NOT_FOUND, codecs.lookup('AAA.8')) self.assertEqual(NOT_FOUND, codecs.lookup('AAA...8')) self.assertEqual(NOT_FOUND, codecs.lookup('BBB-8')) @@ -3899,6 +3905,12 @@ def test_encodings_normalize_encoding(self): normalize = encodings.normalize_encoding self.assertEqual(normalize('utf_8'), 'utf_8') self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8') + self.assertEqual(normalize('utf\xe98'), 'utf_8') + self.assertEqual(normalize('utf\u20ac8'), 'utf_8') + self.assertEqual(normalize('utf-\xe9-8'), 'utf_8') + self.assertEqual(normalize('utf-\u20ac-8'), 'utf_8') + self.assertEqual(normalize('utf-8-\xe9'), 'utf_8') + self.assertEqual(normalize('utf-8-\u20ac'), 'utf_8') self.assertEqual(normalize('utf 8'), 'utf_8') # encodings.normalize_encoding() doesn't convert # characters to lower case. diff --git a/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst b/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst new file mode 100644 index 00000000000000..5596170549e797 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-17-11-59-10.gh-issue-136736.kzQ_dY.rst @@ -0,0 +1,2 @@ +Fix handling alphanumerical non-ASCII characters in +:func:`encodings.normalize_encoding`.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: