From ec78492ef1ae8db70d6b85c03310fad27af6170d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 3 Apr 2025 01:35:47 +0100 Subject: [PATCH 1/6] Optimise import time for ``string`` --- Lib/string.py | 64 +++++++++++-------- ...-04-03-01-35-02.gh-issue-118761.VQcj70.rst | 2 + 2 files changed, 41 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2025-04-03-01-35-02.gh-issue-118761.VQcj70.rst diff --git a/Lib/string.py b/Lib/string.py index c4f05c7223ce8a..e3416135990179 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -49,29 +49,20 @@ def capwords(s, sep=None): #################################################################### -import re as _re -from collections import ChainMap as _ChainMap - +_sentinel_flags = object() _sentinel_dict = {} -class Template: - """A string class for supporting $-substitutions.""" - delimiter = '$' - # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but - # without the ASCII flag. We can't add re.ASCII to flags because of - # backward compatibility. So we use the ?a local flag and [a-z] pattern. - # See https://bugs.python.org/issue31672 - idpattern = r'(?a:[_a-z][_a-z0-9]*)' - braceidpattern = None - flags = _re.IGNORECASE - - def __init_subclass__(cls): - super().__init_subclass__() - if 'pattern' in cls.__dict__: +class _TemplatePattern: + def __get__(self, instance, cls=None): + if cls is None: + return self + import re + if ('pattern' in cls.__dict__ + and not isinstance(cls.__dict__['pattern'], _TemplatePattern)): pattern = cls.pattern else: - delim = _re.escape(cls.delimiter) + delim = re.escape(cls.delimiter) id = cls.idpattern bid = cls.braceidpattern or cls.idpattern pattern = fr""" @@ -82,7 +73,32 @@ def __init_subclass__(cls): (?P) # Other ill-formed delimiter exprs ) """ - cls.pattern = _re.compile(pattern, cls.flags | _re.VERBOSE) + if cls.flags is _sentinel_flags: + cls.flags = re.IGNORECASE + pattern = re.compile(pattern, cls.flags | re.VERBOSE) + # replace this descriptor with the compiled pattern + setattr(cls, 'pattern', pattern) + return pattern + + +class Template: + """A string class for supporting $-substitutions.""" + + delimiter = '$' + # r'[a-z]' matches to non-ASCII letters when used with IGNORECASE, but + # without the ASCII flag. We can't add re.ASCII to flags because of + # backward compatibility. So we use the ?a local flag and [a-z] pattern. + # See https://bugs.python.org/issue31672 + idpattern = r'(?a:[_a-z][_a-z0-9]*)' + braceidpattern = None + flags = _sentinel_flags # default: re.IGNORECASE + + # use a descriptor to be able to defer the import of `re`, for performance + pattern = _TemplatePattern() + + def __init_subclass__(cls): + super().__init_subclass__() + cls.pattern = _TemplatePattern() def __init__(self, template): self.template = template @@ -105,7 +121,8 @@ def substitute(self, mapping=_sentinel_dict, /, **kws): if mapping is _sentinel_dict: mapping = kws elif kws: - mapping = _ChainMap(kws, mapping) + from collections import ChainMap + mapping = ChainMap(kws, mapping) # Helper function for .sub() def convert(mo): # Check the most common path first. @@ -124,7 +141,8 @@ def safe_substitute(self, mapping=_sentinel_dict, /, **kws): if mapping is _sentinel_dict: mapping = kws elif kws: - mapping = _ChainMap(kws, mapping) + from collections import ChainMap + mapping = ChainMap(kws, mapping) # Helper function for .sub() def convert(mo): named = mo.group('named') or mo.group('braced') @@ -170,10 +188,6 @@ def get_identifiers(self): self.pattern) return ids -# Initialize Template.pattern. __init_subclass__() is automatically called -# only for subclasses, not for the Template class itself. -Template.__init_subclass__() - ######################################################################## # the Formatter class diff --git a/Misc/NEWS.d/next/Library/2025-04-03-01-35-02.gh-issue-118761.VQcj70.rst b/Misc/NEWS.d/next/Library/2025-04-03-01-35-02.gh-issue-118761.VQcj70.rst new file mode 100644 index 00000000000000..257ad7ece7d18a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-03-01-35-02.gh-issue-118761.VQcj70.rst @@ -0,0 +1,2 @@ +Improve import times by up to 27x for the :mod:`string` module. +Patch by Adam Turner. From 2b542c76445ec12ffaa4b6b869d6be8192d3e044 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 3 Apr 2025 02:52:55 +0100 Subject: [PATCH 2/6] Refactor to a common classmethod --- Lib/string.py | 55 +++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/Lib/string.py b/Lib/string.py index e3416135990179..1a62b699dc2ac6 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -49,36 +49,15 @@ def capwords(s, sep=None): #################################################################### -_sentinel_flags = object() _sentinel_dict = {} +_sentinel_flags = object() class _TemplatePattern: def __get__(self, instance, cls=None): if cls is None: return self - import re - if ('pattern' in cls.__dict__ - and not isinstance(cls.__dict__['pattern'], _TemplatePattern)): - pattern = cls.pattern - else: - delim = re.escape(cls.delimiter) - id = cls.idpattern - bid = cls.braceidpattern or cls.idpattern - pattern = fr""" - {delim}(?: - (?P{delim}) | # Escape sequence of two delimiters - (?P{id}) | # delimiter and a Python identifier - {{(?P{bid})}} | # delimiter and a braced identifier - (?P) # Other ill-formed delimiter exprs - ) - """ - if cls.flags is _sentinel_flags: - cls.flags = re.IGNORECASE - pattern = re.compile(pattern, cls.flags | re.VERBOSE) - # replace this descriptor with the compiled pattern - setattr(cls, 'pattern', pattern) - return pattern + return cls._compile_pattern() class Template: @@ -93,12 +72,36 @@ class Template: braceidpattern = None flags = _sentinel_flags # default: re.IGNORECASE - # use a descriptor to be able to defer the import of `re`, for performance - pattern = _TemplatePattern() + pattern = _TemplatePattern() # use a descriptor to compile the pattern def __init_subclass__(cls): super().__init_subclass__() - cls.pattern = _TemplatePattern() + cls._compile_pattern() + + @classmethod + def _compile_pattern(cls): + import re # deferred import, for performance + + cls_pattern = cls.__dict__.get('pattern') + if cls_pattern and not isinstance(cls_pattern, _TemplatePattern): + # Prefer a pattern defined on the class. + pattern = cls_pattern + else: + delim = re.escape(cls.delimiter) + id = cls.idpattern + bid = cls.braceidpattern or cls.idpattern + pattern = fr""" + {delim}(?: + (?P{delim}) | # Escape sequence of two delimiters + (?P{id}) | # delimiter and a Python identifier + {{(?P{bid})}} | # delimiter and a braced identifier + (?P) # Other ill-formed delimiter exprs + ) + """ + if cls.flags is _sentinel_flags: + cls.flags = re.IGNORECASE + pat = cls.pattern = re.compile(pattern, cls.flags | re.VERBOSE) + return pat def __init__(self, template): self.template = template From e28e43ff093256eade1fe965e6e72eb352c75477 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Thu, 3 Apr 2025 02:55:36 +0100 Subject: [PATCH 3/6] Add comment --- Lib/string.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/string.py b/Lib/string.py index 1a62b699dc2ac6..af762894667e18 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -57,6 +57,7 @@ class _TemplatePattern: def __get__(self, instance, cls=None): if cls is None: return self + # This descriptor is overwritten in ``_compile_pattern()``. return cls._compile_pattern() From 056dd07bb50834ee07601661ff00de6f6bcd72fd Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 6 Apr 2025 18:11:27 +0100 Subject: [PATCH 4/6] Use None as the sentinel --- Lib/string.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/string.py b/Lib/string.py index af762894667e18..c4fb69c4ec7a04 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -50,7 +50,6 @@ def capwords(s, sep=None): #################################################################### _sentinel_dict = {} -_sentinel_flags = object() class _TemplatePattern: @@ -71,7 +70,7 @@ class Template: # See https://bugs.python.org/issue31672 idpattern = r'(?a:[_a-z][_a-z0-9]*)' braceidpattern = None - flags = _sentinel_flags # default: re.IGNORECASE + flags = None # default: re.IGNORECASE pattern = _TemplatePattern() # use a descriptor to compile the pattern @@ -99,7 +98,7 @@ def _compile_pattern(cls): (?P) # Other ill-formed delimiter exprs ) """ - if cls.flags is _sentinel_flags: + if cls.flags is None: cls.flags = re.IGNORECASE pat = cls.pattern = re.compile(pattern, cls.flags | re.VERBOSE) return pat From bb3605a7d89acb288cabdf5e29dffc11b2567251 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Mon, 7 Apr 2025 21:52:14 +0100 Subject: [PATCH 5/6] Make _TemplatePattern a singleton --- Lib/string.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/string.py b/Lib/string.py index c4fb69c4ec7a04..9ba0769e93eee3 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -53,11 +53,12 @@ def capwords(s, sep=None): class _TemplatePattern: + # This descriptor is overwritten in ``Template._compile_pattern()``. def __get__(self, instance, cls=None): if cls is None: return self - # This descriptor is overwritten in ``_compile_pattern()``. return cls._compile_pattern() +_TemplatePattern = _TemplatePattern() class Template: @@ -72,7 +73,7 @@ class Template: braceidpattern = None flags = None # default: re.IGNORECASE - pattern = _TemplatePattern() # use a descriptor to compile the pattern + pattern = _TemplatePattern # use a descriptor to compile the pattern def __init_subclass__(cls): super().__init_subclass__() @@ -83,7 +84,7 @@ def _compile_pattern(cls): import re # deferred import, for performance cls_pattern = cls.__dict__.get('pattern') - if cls_pattern and not isinstance(cls_pattern, _TemplatePattern): + if cls_pattern is not None and cls_pattern is not _TemplatePattern: # Prefer a pattern defined on the class. pattern = cls_pattern else: From 6fb9b5d65c2822012efe47bf7158227bcfde4ce0 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 8 Apr 2025 10:39:27 +0100 Subject: [PATCH 6/6] Serhiy's suggestion Co-authored-by: Serhiy Storchaka --- Lib/string.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Lib/string.py b/Lib/string.py index 9ba0769e93eee3..eab5067c9b133e 100644 --- a/Lib/string.py +++ b/Lib/string.py @@ -83,11 +83,8 @@ def __init_subclass__(cls): def _compile_pattern(cls): import re # deferred import, for performance - cls_pattern = cls.__dict__.get('pattern') - if cls_pattern is not None and cls_pattern is not _TemplatePattern: - # Prefer a pattern defined on the class. - pattern = cls_pattern - else: + pattern = cls.__dict__.get('pattern', _TemplatePattern) + if pattern is _TemplatePattern: delim = re.escape(cls.delimiter) id = cls.idpattern bid = cls.braceidpattern or cls.idpattern pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy