From 1fd83f134473efe04b390b7038fec9bc01d5940f Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 3 Apr 2025 00:54:04 +0100 Subject: [PATCH 1/5] Optimise import time for ``shlex`` --- Lib/shlex.py | 23 ++++++++++++------- ...-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst | 2 ++ 2 files changed, 17 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst diff --git a/Lib/shlex.py b/Lib/shlex.py index f4821616b62a0f..b299f65be373b4 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -7,25 +7,22 @@ # iterator interface by Gustavo Niemeyer, April 2003. # changes to tokenize more like Posix shells by Vinay Sajip, July 2016. -import os -import re -import sys -from collections import deque - -from io import StringIO - __all__ = ["shlex", "split", "quote", "join"] class shlex: "A lexical analyzer class for simple shell-like syntaxes." def __init__(self, instream=None, infile=None, posix=False, punctuation_chars=False): + from collections import deque # deferred import for performance + if isinstance(instream, str): + from io import StringIO # deferred import for performance instream = StringIO(instream) if instream is not None: self.instream = instream self.infile = infile else: + import sys # deferred import for performance self.instream = sys.stdin self.infile = None self.posix = posix @@ -78,6 +75,7 @@ def push_token(self, tok): def push_source(self, newstream, newfile=None): "Push an input source onto the lexer's input source stack." if isinstance(newstream, str): + from io import StringIO # deferred import for performance newstream = StringIO(newstream) self.filestack.appendleft((self.infile, self.instream, self.lineno)) self.infile = newfile @@ -278,6 +276,7 @@ def read_token(self): def sourcehook(self, newfile): "Hook called on a filename to be sourced." + import os.path if newfile[0] == '"': newfile = newfile[1:-1] # This implements cpp-like semantics for relative-path inclusion. @@ -318,7 +317,14 @@ def join(split_command): return ' '.join(quote(arg) for arg in split_command) -_find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search +def _find_unsafe(s, /): + # this function replaces itself with the compiled pattern on execution, + # to allow as deferred import of re for performance + global _find_unsafe + import re + _find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search + return _find_unsafe(s) + def quote(s): """Return a shell-escaped version of the string *s*.""" @@ -337,6 +343,7 @@ def _print_tokens(lexer): print("Token: " + repr(tt)) if __name__ == '__main__': + import sys # deferred import for performance if len(sys.argv) == 1: _print_tokens(shlex()) else: diff --git a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst new file mode 100644 index 00000000000000..c3b599b041aa64 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst @@ -0,0 +1,2 @@ +Improve import times by up to 33x for the :mod:`shlex` module. Patch by Adam +Turner. From 8811463aed98455641958e4a64cf7ba242ac283e Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Fri, 4 Apr 2025 16:07:58 +0100 Subject: [PATCH 2/5] Revert deferral of sys and io --- Lib/shlex.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/shlex.py b/Lib/shlex.py index b299f65be373b4..29278931a4febd 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -7,6 +7,9 @@ # iterator interface by Gustavo Niemeyer, April 2003. # changes to tokenize more like Posix shells by Vinay Sajip, July 2016. +import sys +from io import StringIO + __all__ = ["shlex", "split", "quote", "join"] class shlex: @@ -16,13 +19,11 @@ def __init__(self, instream=None, infile=None, posix=False, from collections import deque # deferred import for performance if isinstance(instream, str): - from io import StringIO # deferred import for performance instream = StringIO(instream) if instream is not None: self.instream = instream self.infile = infile else: - import sys # deferred import for performance self.instream = sys.stdin self.infile = None self.posix = posix @@ -75,7 +76,6 @@ def push_token(self, tok): def push_source(self, newstream, newfile=None): "Push an input source onto the lexer's input source stack." if isinstance(newstream, str): - from io import StringIO # deferred import for performance newstream = StringIO(newstream) self.filestack.appendleft((self.infile, self.instream, self.lineno)) self.infile = newfile @@ -343,7 +343,6 @@ def _print_tokens(lexer): print("Token: " + repr(tt)) if __name__ == '__main__': - import sys # deferred import for performance if len(sys.argv) == 1: _print_tokens(shlex()) else: From bd6916a9d45e68ce9f5de60f79cb0d9e049eb61d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 6 Apr 2025 05:13:47 +0100 Subject: [PATCH 3/5] Switch to bytes.translate() based approach --- Lib/shlex.py | 16 ++++++---------- ...025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst | 5 +++-- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/Lib/shlex.py b/Lib/shlex.py index 29278931a4febd..9272955cabfd8c 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -317,20 +317,16 @@ def join(split_command): return ' '.join(quote(arg) for arg in split_command) -def _find_unsafe(s, /): - # this function replaces itself with the compiled pattern on execution, - # to allow as deferred import of re for performance - global _find_unsafe - import re - _find_unsafe = re.compile(r'[^\w@%+=:,./-]', re.ASCII).search - return _find_unsafe(s) - - def quote(s): """Return a shell-escaped version of the string *s*.""" if not s: return "''" - if _find_unsafe(s) is None: + + # Use bytes.translate() for performance + safe_chars = (b'%+,-./0123456789:=@' + b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_' + b'abcdefghijklmnopqrstuvwxyz') + if not s.encode().translate(None, delete=safe_chars): return s # use single quotes, and put single quotes into double quotes diff --git a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst index c3b599b041aa64..6b4b3ed7526a8b 100644 --- a/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst +++ b/Misc/NEWS.d/next/Library/2025-04-03-00-56-48.gh-issue-118761.Vb0S1B.rst @@ -1,2 +1,3 @@ -Improve import times by up to 33x for the :mod:`shlex` module. Patch by Adam -Turner. +Improve import times by up to 33x for the :mod:`shlex` module, +and improve the performance of :func:`shlex.quote` by up to 12x. +Patch by Adam Turner. From 192329ea333c1e19e5ef251161afe4f221217a78 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Fri, 18 Apr 2025 02:50:26 +0100 Subject: [PATCH 4/5] Update Lib/shlex.py --- Lib/shlex.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/shlex.py b/Lib/shlex.py index 9272955cabfd8c..5bf6e0d70e0012 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -326,7 +326,8 @@ def quote(s): safe_chars = (b'%+,-./0123456789:=@' b'ABCDEFGHIJKLMNOPQRSTUVWXYZ_' b'abcdefghijklmnopqrstuvwxyz') - if not s.encode().translate(None, delete=safe_chars): + # No quoting is needed if `s` is an ASCII string consisting only of `safe_chars` + if s.isascii() and not s.encode().translate(None, delete=safe_chars): return s # use single quotes, and put single quotes into double quotes From 4a640ce2f34753d8c0c55f3cecdaa1f406f3dd00 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 21 Apr 2025 01:33:49 +0100 Subject: [PATCH 5/5] Add test_lazy_imports --- Lib/test/test_shlex.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py index 797c91ee7effdf..f35571ea88654d 100644 --- a/Lib/test/test_shlex.py +++ b/Lib/test/test_shlex.py @@ -3,6 +3,7 @@ import shlex import string import unittest +from test.support import import_helper # The original test data set was from shellwords, by Hartmut Goebel. @@ -363,6 +364,9 @@ def testPunctuationCharsReadOnly(self): with self.assertRaises(AttributeError): shlex_instance.punctuation_chars = False + def test_lazy_imports(self): + import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'}) + # Allow this test to be used with old shlex.py if not getattr(shlex, "split", None):
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: