From b0d836e7f40b983abf64b7dd3d50d5de1d27ab29 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 15 Mar 2023 00:39:14 +0000 Subject: [PATCH 1/8] GH-81079: Add case_sensitive argument to pathlib.Path.glob() This argument allows case-sensitive matching to be enabled on Windows, and case-insensitive matching to be enabled on Posix. --- Doc/library/pathlib.rst | 20 ++++- Lib/pathlib.py | 90 +++++++++---------- Lib/test/test_pathlib.py | 16 +++- ...3-03-15-00-37-43.gh-issue-81079.heTAod.rst | 2 + 4 files changed, 76 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8e91936680fab8..ac793c416e4822 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.5 -.. method:: Path.glob(pattern) +.. method:: Path.glob(pattern, *, case_sensitive=None) Glob the given relative *pattern* in the directory represented by this path, yielding all matching files (of any kind):: @@ -873,6 +873,11 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. The + *case_sensitive* keyword-only argument can be set to true or false to + override this behaviour. + .. note:: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. @@ -883,6 +888,9 @@ call fails (for example because the path doesn't exist). Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.group() Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1268,7 +1276,7 @@ call fails (for example because the path doesn't exist). .. versionadded:: 3.6 The *strict* argument (pre-3.6 behavior is strict). -.. method:: Path.rglob(pattern) +.. method:: Path.rglob(pattern, *, case_sensitive=None) Glob the given relative *pattern* recursively. This is like calling :func:`Path.glob` with "``**/``" added in front of the *pattern*, where @@ -1281,12 +1289,20 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] + By default, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. The + *case_sensitive* keyword-only argument can be set to true or false to + override this behaviour. + .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob .. versionchanged:: 3.11 Return only directories if *pattern* ends with a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + .. versionadded:: 3.12 + The *case_sensitive* argument. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 55c44f12e5a2fb..23f9290861ae57 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,18 +54,16 @@ def _ignore_error(exception): return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) - -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - # # Globbing helpers # +def _is_case_sensitive(flavour): + return flavour.normcase('Aa') == 'Aa' + + @functools.lru_cache() -def _make_selector(pattern_parts, flavour): +def _make_selector(pattern_parts, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -74,21 +72,21 @@ def _make_selector(pattern_parts, flavour): cls = _RecursiveWildcardSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat): - cls = _WildcardSelector + elif pat == '..': + cls = _ParentSelector else: - cls = _PreciseSelector - return cls(pat, child_parts, flavour) + cls = _WildcardSelector + return cls(pat, child_parts, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour): + def __init__(self, child_parts, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour) + self.successor = _make_selector(child_parts, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -98,44 +96,36 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists scandir = path_cls._scandir - normcase = path_cls._flavour.normcase - if not is_dir(parent_path): + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir, normcase) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): yield parent_path -class _PreciseSelector(_Selector): +class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour): - self.name = name - _Selector.__init__(self, child_parts, flavour) + def __init__(self, name, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): - yield p - except PermissionError: - return + def _select_from(self, parent_path, scandir): + path = parent_path._make_child_relpath('..') + return self.successor._select_from(path, scandir) class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_sensitive): + flags = re.NOFLAG if case_sensitive else re.IGNORECASE + self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -154,9 +144,9 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): raise continue name = entry.name - if self.match(normcase(name)): + if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir, normcase): + for p in self.successor._select_from(path, scandir): yield p except PermissionError: return @@ -164,10 +154,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour): - _Selector.__init__(self, child_parts, flavour) + def __init__(self, pat, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -183,18 +173,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): + for p in self._iterate_directories(path, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir, normcase): + def _select_from(self, parent_path, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir, normcase): + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): if p not in yielded: yield p yielded.add(p) @@ -763,7 +753,7 @@ def _scandir(self): # includes scandir(), which is used to implement glob(). return os.scandir(self) - def glob(self, pattern): + def glob(self, pattern, *, case_sensitive=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ @@ -775,11 +765,13 @@ def glob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(tuple(pattern_parts), self._flavour) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + selector = _make_selector(tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p - def rglob(self, pattern): + def rglob(self, pattern, *, case_sensitive=None): """Recursively yield all existing files (of any kind, including directories) matching the given relative pattern, anywhere in this subtree. @@ -790,7 +782,9 @@ def rglob(self, pattern): raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self._flavour) + selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f05dead5886743..cd43d3854dad15 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1777,6 +1777,18 @@ def _check(glob, expected): else: _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) + def test_glob_case_sensitive(self): + P = self.cls + def _check(path, pattern, case_sensitive, expected): + actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)} + expected = {str(P(BASE, q)) for q in expected} + self.assertEqual(actual, expected) + path = P(BASE) + _check(path, "DIRB/FILE*", True, []) + _check(path, "DIRB/FILE*", False, ["dirB/fileB"]) + _check(path, "dirb/file*", True, []) + _check(path, "dirb/file*", False, ["dirB/fileB"]) + def test_rglob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(BASE, q) for q in expected }) @@ -3053,7 +3065,7 @@ def test_glob(self): self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") }) self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") }) self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"}) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_rglob(self): @@ -3061,7 +3073,7 @@ def test_rglob(self): p = P(BASE, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"}) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) def test_expanduser(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst new file mode 100644 index 00000000000000..ef5690533985d5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-15-00-37-43.gh-issue-81079.heTAod.rst @@ -0,0 +1,2 @@ +Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and +:meth:`~pathlib.Path.rglob`. From dc82494fb4b70551da06de47527cf17ccb495b1b Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 2 May 2023 20:07:19 +0100 Subject: [PATCH 2/8] Describe behaviour when case_sensitive is None. --- Doc/library/pathlib.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index ac793c416e4822..779e8741b16528 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -873,10 +873,10 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] - By default, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. The - *case_sensitive* keyword-only argument can be set to true or false to - override this behaviour. + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. Set + *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. note:: Using the "``**``" pattern in large directory trees may consume @@ -1289,10 +1289,10 @@ call fails (for example because the path doesn't exist). PosixPath('setup.py'), PosixPath('test_pathlib.py')] - By default, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. The - *case_sensitive* keyword-only argument can be set to true or false to - override this behaviour. + By default, or when the *case_sensitive* keyword-only argument is set to + ``None``, this method matches paths using platform-specific casing rules: + case-sensitive on POSIX, and case-insensitive on Windows. Set + *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob From d097f8c3b8e228ab75816a95b2d2daf395f09ce5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 00:52:11 +0100 Subject: [PATCH 3/8] Restore `_PreciseSelector` --- Lib/pathlib.py | 70 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f999f8263a0b85..2c567a2587257b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -67,7 +67,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, case_sensitive): +def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -78,19 +78,21 @@ def _make_selector(pattern_parts, case_sensitive): cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - else: + elif _is_wildcard_pattern(pat) or case_sensitive != _is_case_sensitive(flavour): cls = _WildcardSelector - return cls(pat, child_parts, case_sensitive) + else: + cls = _PreciseSelector + return cls(pat, child_parts, flavour, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, case_sensitive): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, case_sensitive) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -100,37 +102,55 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) + is_dir = path_cls.is_dir + exists = path_cls.exists scandir = path_cls._scandir - if not parent_path.is_dir(): + if not is_dir(parent_path): return iter([]) - return self._select_from(parent_path, scandir) + return self._select_from(parent_path, is_dir, exists, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): yield parent_path class _ParentSelector(_Selector): - def __init__(self, name, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p +class _PreciseSelector(_Selector): + + def __init__(self, name, child_parts, flavour, case_sensitive): + self.name = name + _Selector.__init__(self, child_parts, flavour, case_sensitive) + + def _select_from(self, parent_path, is_dir, exists, scandir): + try: + path = parent_path._make_child_relpath(self.name) + if (is_dir if self.dironly else exists)(path): + for p in self.successor._select_from(path, is_dir, exists, scandir): + yield p + except PermissionError: + return + + class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): + def __init__(self, pat, child_parts, flavour, case_sensitive): flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, case_sensitive) + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -151,7 +171,7 @@ def _select_from(self, parent_path, scandir): name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, scandir): + for p in self.successor._select_from(path, is_dir, exists, scandir): yield p except PermissionError: return @@ -159,10 +179,10 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) - def _iterate_directories(self, parent_path, scandir): + def _iterate_directories(self, parent_path, is_dir, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -178,18 +198,18 @@ def _iterate_directories(self, parent_path, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, scandir): + for p in self._iterate_directories(path, is_dir, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, scandir): + def _select_from(self, parent_path, is_dir, exists, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, scandir): - for p in successor_select(starting_point, scandir): + for starting_point in self._iterate_directories(parent_path, is_dir, scandir): + for p in successor_select(starting_point, is_dir, exists, scandir): if p not in yielded: yield p yielded.add(p) @@ -839,7 +859,7 @@ def glob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), case_sensitive) + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p @@ -856,7 +876,7 @@ def rglob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p From 33b435d56bfad4a649729b2aacdcf2c2f3710b31 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 01:46:39 +0100 Subject: [PATCH 4/8] Revert "Restore `_PreciseSelector`" This reverts commit d097f8c3b8e228ab75816a95b2d2daf395f09ce5. --- Lib/pathlib.py | 70 ++++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 2c567a2587257b..f999f8263a0b85 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -67,7 +67,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, flavour, case_sensitive): +def _make_selector(pattern_parts, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -78,21 +78,19 @@ def _make_selector(pattern_parts, flavour, case_sensitive): cls = _ParentSelector elif '**' in pat: raise ValueError("Invalid pattern: '**' can only be an entire path component") - elif _is_wildcard_pattern(pat) or case_sensitive != _is_case_sensitive(flavour): - cls = _WildcardSelector else: - cls = _PreciseSelector - return cls(pat, child_parts, flavour, case_sensitive) + cls = _WildcardSelector + return cls(pat, child_parts, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, flavour, case_sensitive): + def __init__(self, child_parts, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, flavour, case_sensitive) + self.successor = _make_selector(child_parts, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -102,55 +100,37 @@ def select_from(self, parent_path): """Iterate over all child paths of `parent_path` matched by this selector. This can contain parent_path itself.""" path_cls = type(parent_path) - is_dir = path_cls.is_dir - exists = path_cls.exists scandir = path_cls._scandir - if not is_dir(parent_path): + if not parent_path.is_dir(): return iter([]) - return self._select_from(parent_path, is_dir, exists, scandir) + return self._select_from(parent_path, scandir) class _TerminatingSelector: - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): yield parent_path class _ParentSelector(_Selector): - def __init__(self, name, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) + def __init__(self, name, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p -class _PreciseSelector(_Selector): - - def __init__(self, name, child_parts, flavour, case_sensitive): - self.name = name - _Selector.__init__(self, child_parts, flavour, case_sensitive) - - def _select_from(self, parent_path, is_dir, exists, scandir): - try: - path = parent_path._make_child_relpath(self.name) - if (is_dir if self.dironly else exists)(path): - for p in self.successor._select_from(path, is_dir, exists, scandir): - yield p - except PermissionError: - return - - class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour, case_sensitive): + def __init__(self, pat, child_parts, case_sensitive): flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, flavour, case_sensitive) + _Selector.__init__(self, child_parts, case_sensitive) - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: # We must close the scandir() object before proceeding to # avoid exhausting file descriptors when globbing deep trees. @@ -171,7 +151,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir): name = entry.name if self.match(name): path = parent_path._make_child_relpath(name) - for p in self.successor._select_from(path, is_dir, exists, scandir): + for p in self.successor._select_from(path, scandir): yield p except PermissionError: return @@ -179,10 +159,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, flavour, case_sensitive): - _Selector.__init__(self, child_parts, flavour, case_sensitive) + def __init__(self, pat, child_parts, case_sensitive): + _Selector.__init__(self, child_parts, case_sensitive) - def _iterate_directories(self, parent_path, is_dir, scandir): + def _iterate_directories(self, parent_path, scandir): yield parent_path try: # We must close the scandir() object before proceeding to @@ -198,18 +178,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir): raise if entry_is_dir and not entry.is_symlink(): path = parent_path._make_child_relpath(entry.name) - for p in self._iterate_directories(path, is_dir, scandir): + for p in self._iterate_directories(path, scandir): yield p except PermissionError: return - def _select_from(self, parent_path, is_dir, exists, scandir): + def _select_from(self, parent_path, scandir): try: yielded = set() try: successor_select = self.successor._select_from - for starting_point in self._iterate_directories(parent_path, is_dir, scandir): - for p in successor_select(starting_point, is_dir, exists, scandir): + for starting_point in self._iterate_directories(parent_path, scandir): + for p in successor_select(starting_point, scandir): if p not in yielded: yield p yielded.add(p) @@ -859,7 +839,7 @@ def glob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) + selector = _make_selector(tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p @@ -876,7 +856,7 @@ def rglob(self, pattern, *, case_sensitive=None): pattern_parts.append('') if case_sensitive is None: case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) for p in selector.select_from(self): yield p From 203e734027bef0cc60ab4569e7a61d2907878ce4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 3 May 2023 01:48:21 +0100 Subject: [PATCH 5/8] Remove unused `_is_wildcard_pattern()` function --- Lib/pathlib.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f999f8263a0b85..da732dfe7ae554 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -54,11 +54,6 @@ def _ignore_error(exception): getattr(exception, 'winerror', None) in _IGNORED_WINERRORS) -def _is_wildcard_pattern(pat): - # Whether this pattern needs actual matching using fnmatch, or can - # be looked up directly as a file. - return "*" in pat or "?" in pat or "[" in pat - def _is_case_sensitive(flavour): return flavour.normcase('Aa') == 'Aa' From 2146b2b80e374a1ce911388a71fad41b9dca2feb Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 3 May 2023 18:56:14 +0100 Subject: [PATCH 6/8] Update Doc/library/pathlib.rst Co-authored-by: Steve Dower --- Doc/library/pathlib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 6d84ef214b685f..3ffe57b437c6c5 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -878,8 +878,8 @@ call fails (for example because the path doesn't exist). By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. Set - *case_sensitive* to ``True`` or ``False`` to override this behaviour. + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. note:: Using the "``**``" pattern in large directory trees may consume From b2c4b47b5e746c2254e28580ffdb5866dabee4d7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 May 2023 10:22:10 +0100 Subject: [PATCH 7/8] Pass flavour to selector initialiser --- Lib/pathlib.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c990daef28c42f..f32e1e2d822834 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -62,7 +62,7 @@ def _is_case_sensitive(flavour): # @functools.lru_cache() -def _make_selector(pattern_parts, case_sensitive): +def _make_selector(pattern_parts, flavour, case_sensitive): pat = pattern_parts[0] child_parts = pattern_parts[1:] if not pat: @@ -75,17 +75,17 @@ def _make_selector(pattern_parts, case_sensitive): raise ValueError("Invalid pattern: '**' can only be an entire path component") else: cls = _WildcardSelector - return cls(pat, child_parts, case_sensitive) + return cls(pat, child_parts, flavour, case_sensitive) class _Selector: """A selector matches a specific glob pattern part against the children of a given path.""" - def __init__(self, child_parts, case_sensitive): + def __init__(self, child_parts, flavour, case_sensitive): self.child_parts = child_parts if child_parts: - self.successor = _make_selector(child_parts, case_sensitive) + self.successor = _make_selector(child_parts, flavour, case_sensitive) self.dironly = True else: self.successor = _TerminatingSelector() @@ -109,8 +109,8 @@ def _select_from(self, parent_path, scandir): class _ParentSelector(_Selector): - def __init__(self, name, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, name, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _select_from(self, parent_path, scandir): path = parent_path._make_child_relpath('..') @@ -120,10 +120,13 @@ def _select_from(self, parent_path, scandir): class _WildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) + if case_sensitive is None: + # TODO: evaluate case-sensitivity of each directory in _select_from() + case_sensitive = _is_case_sensitive(flavour) flags = re.NOFLAG if case_sensitive else re.IGNORECASE self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch - _Selector.__init__(self, child_parts, case_sensitive) def _select_from(self, parent_path, scandir): try: @@ -154,8 +157,8 @@ def _select_from(self, parent_path, scandir): class _RecursiveWildcardSelector(_Selector): - def __init__(self, pat, child_parts, case_sensitive): - _Selector.__init__(self, child_parts, case_sensitive) + def __init__(self, pat, child_parts, flavour, case_sensitive): + _Selector.__init__(self, child_parts, flavour, case_sensitive) def _iterate_directories(self, parent_path, scandir): yield parent_path @@ -832,9 +835,7 @@ def glob(self, pattern, *, case_sensitive=None): raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(tuple(pattern_parts), case_sensitive) + selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p @@ -849,9 +850,7 @@ def rglob(self, pattern, *, case_sensitive=None): raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): pattern_parts.append('') - if case_sensitive is None: - case_sensitive = _is_case_sensitive(self._flavour) - selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive) + selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive) for p in selector.select_from(self): yield p From 0d697adf74980a14a3ffa2f0370b15e60e228e09 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 May 2023 17:00:47 +0100 Subject: [PATCH 8/8] Improve consistency between glob() and rglob() docs --- Doc/library/pathlib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 3ffe57b437c6c5..14118127835bbe 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1294,8 +1294,8 @@ call fails (for example because the path doesn't exist). By default, or when the *case_sensitive* keyword-only argument is set to ``None``, this method matches paths using platform-specific casing rules: - case-sensitive on POSIX, and case-insensitive on Windows. Set - *case_sensitive* to ``True`` or ``False`` to override this behaviour. + typically, case-sensitive on POSIX, and case-insensitive on Windows. + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy