From ade89ced891ed4f97935aaa1983a7e154550362e Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Wed, 2 Apr 2025 10:46:54 +0200 Subject: [PATCH] [3.12] gh-130197: pygettext: Test the --escape option (GH-131902) (cherry picked from commit 87d9983994e9a423e9e0050b1bbee52ebaf84367) Co-authored-by: Tomas R. --- .../test_tools/i18n_data/ascii-escapes.pot | 45 +++++++++++++++++++ Lib/test/test_tools/i18n_data/escapes.pot | 45 +++++++++++++++++++ Lib/test/test_tools/i18n_data/escapes.py | 23 ++++++++++ Lib/test/test_tools/test_i18n.py | 43 ++++++++++++------ Tools/i18n/pygettext.py | 2 +- 5 files changed, 143 insertions(+), 15 deletions(-) create mode 100644 Lib/test/test_tools/i18n_data/ascii-escapes.pot create mode 100644 Lib/test/test_tools/i18n_data/escapes.pot create mode 100644 Lib/test/test_tools/i18n_data/escapes.py diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot new file mode 100644 index 00000000000000..18d868b6a20a46 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: escapes.py:5 +msgid "" +"\"\t\n" +"\r\\" +msgstr "" + +#: escapes.py:8 +msgid "" +"\000\001\002\003\004\005\006\007\010\t\n" +"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +msgstr "" + +#: escapes.py:13 +msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" +msgstr "" + +#: escapes.py:17 +msgid "\177" +msgstr "" + +#: escapes.py:20 +msgid "€   ÿ" +msgstr "" + +#: escapes.py:23 +msgid "α ㄱ ð“‚€" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/escapes.pot b/Lib/test/test_tools/i18n_data/escapes.pot new file mode 100644 index 00000000000000..2c7899d59daba6 --- /dev/null +++ b/Lib/test/test_tools/i18n_data/escapes.pot @@ -0,0 +1,45 @@ +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR ORGANIZATION +# FIRST AUTHOR , YEAR. +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2000-01-01 00:00+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: pygettext.py 1.5\n" + + +#: escapes.py:5 +msgid "" +"\"\t\n" +"\r\\" +msgstr "" + +#: escapes.py:8 +msgid "" +"\000\001\002\003\004\005\006\007\010\t\n" +"\013\014\r\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037" +msgstr "" + +#: escapes.py:13 +msgid " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~" +msgstr "" + +#: escapes.py:17 +msgid "\177" +msgstr "" + +#: escapes.py:20 +msgid "\302\200 \302\240 \303\277" +msgstr "" + +#: escapes.py:23 +msgid "\316\261 \343\204\261 \360\223\202\200" +msgstr "" + diff --git a/Lib/test/test_tools/i18n_data/escapes.py b/Lib/test/test_tools/i18n_data/escapes.py new file mode 100644 index 00000000000000..900bd97a70fdba --- /dev/null +++ b/Lib/test/test_tools/i18n_data/escapes.py @@ -0,0 +1,23 @@ +import gettext as _ + + +# Special characters that are always escaped in the POT file +_('"\t\n\r\\') + +# All ascii characters 0-31 +_('\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n' + '\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15' + '\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f') + +# All ascii characters 32-126 +_(' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~') + +# ascii char 127 +_('\x7f') + +# some characters in the 128-255 range +_('\x80 \xa0 ÿ') + +# some characters >= 256 encoded as 2, 3 and 4 bytes, respectively +_('α ㄱ ð“‚€') diff --git a/Lib/test/test_tools/test_i18n.py b/Lib/test/test_tools/test_i18n.py index 6f71f0976819f1..ffa1b1178eddbc 100644 --- a/Lib/test/test_tools/test_i18n.py +++ b/Lib/test/test_tools/test_i18n.py @@ -369,15 +369,8 @@ class _(object): def test_pygettext_output(self): """Test that the pygettext output exactly matches snapshots.""" - for input_file in DATA_DIR.glob('*.py'): - output_file = input_file.with_suffix('.pot') - with self.subTest(input_file=f'i18n_data/{input_file}'): - contents = input_file.read_text(encoding='utf-8') - with temp_cwd(None): - Path(input_file.name).write_text(contents) - assert_python_ok('-Xutf8', self.script, '--docstrings', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') - + for input_file, output_file, output in extract_from_snapshots(): + with self.subTest(input_file=input_file): expected = output_file.read_text(encoding='utf-8') self.assert_POT_equal(expected, output) @@ -408,15 +401,37 @@ def test_files_list(self): self.assertNotIn(text3, data) -def update_POT_snapshots(): - for input_file in DATA_DIR.glob('*.py'): - output_file = input_file.with_suffix('.pot') +def extract_from_snapshots(): + snapshots = { + 'messages.py': ('--docstrings',), + 'fileloc.py': ('--docstrings',), + 'docstrings.py': ('--docstrings',), + # == Test character escaping + # Escape ascii and unicode: + 'escapes.py': ('--escape',), + # Escape only ascii and let unicode pass through: + ('escapes.py', 'ascii-escapes.pot'): (), + } + + for filename, args in snapshots.items(): + if isinstance(filename, tuple): + filename, output_file = filename + output_file = DATA_DIR / output_file + input_file = DATA_DIR / filename + else: + input_file = DATA_DIR / filename + output_file = input_file.with_suffix('.pot') contents = input_file.read_bytes() with temp_cwd(None): Path(input_file.name).write_bytes(contents) - assert_python_ok('-Xutf8', Test_pygettext.script, '--docstrings', input_file.name) - output = Path('messages.pot').read_text(encoding='utf-8') + assert_python_ok('-Xutf8', Test_pygettext.script, *args, + input_file.name) + yield (input_file, output_file, + Path('messages.pot').read_text(encoding='utf-8')) + +def update_POT_snapshots(): + for _, output_file, output in extract_from_snapshots(): output = normalize_POT_file(output) output_file.write_text(output, encoding='utf-8') diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index 0d16e8f7da0071..8f59982640cbba 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -207,7 +207,7 @@ def make_escapes(pass_nonascii): global escapes, escape if pass_nonascii: # Allow non-ascii characters to pass through so that e.g. 'msgid - # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we + # "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. mod = 128 escape = escape_ascii pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy