From 42b0cdd98a0e8d1c45002c4f64107607ec43a554 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 11 May 2020 20:06:10 -0500 Subject: [PATCH 1/3] In translate(), generate unique group names across calls. The restores the undocumented ability to get a valid regexp by joining multiple translate() results via `|`. --- Lib/fnmatch.py | 17 +++++++++++++---- Lib/test/test_fnmatch.py | 12 ++++++++++-- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index d7d915d51314da..b0ee389160f4c4 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,6 +16,12 @@ __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] +# Build a thread-safe incrementing counter to help create unique regexp group +# names across calls. +from itertools import count +_nextgroupnum = count().__next__ +del count + def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -148,9 +154,12 @@ def translate(pat): # in a lookahead assertion, save the matched part in a group, then # consume that group via a backreference. If the overall match fails, # the lookahead assertion won't try alternatives. So the translation is: - # (?=(P.*?fixed))(?P=name) - # Group names are created as needed: g1, g2, g3, ... - groupnum = 0 + # (P?=(P.*?fixed))(?P=name) + # Group names are created as needed: g0, g1, g2, ... + # The numbers are obtained from _nextgroupnum() to ensure they're unique + # across calls and across threads. This is because people rely on the + # undocumented ability to join multiple translate() results together via + # "|" to build large regexps matching "one of many" shell patterns. while i < n: assert inp[i] is STAR i += 1 @@ -167,7 +176,7 @@ def translate(pat): add(".*") add(fixed) else: - groupnum += 1 + groupnum = _nextgroupnum() add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") assert i == n res = "".join(res) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 4c173069503cc6..54b6a478a1ae24 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -106,6 +106,7 @@ def test_warnings(self): class TranslateTestCase(unittest.TestCase): def test_translate(self): + import re self.assertEqual(translate('*'), r'(?s:.*)\Z') self.assertEqual(translate('?'), r'(?s:.)\Z') self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') @@ -122,8 +123,15 @@ def test_translate(self): self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') # fancy translation to prevent exponential-time match failure - self.assertEqual(translate('**a*a****a'), - r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') + t = translate('**a*a****a') + digits = re.findall(r'\d+', t) + self.assertEqual(len(digits), 4) + self.assertEqual(digits[0], digits[1]) + self.assertEqual(digits[2], digits[3]) + g1 = f"g{digits[0]}" # e.g., group name "g4" + g2 = f"g{digits[2]}" # e.g., group name "g5" + self.assertEqual(t, + fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z') class FilterTestCase(unittest.TestCase): From 002baa68580d88bce4873ac3ab6cbe6d78ce789b Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 11 May 2020 20:08:47 -0500 Subject: [PATCH 2/3] Not sure why I have to do this again :-( --- Lib/test/test_fnmatch.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 54b6a478a1ae24..10668e4f6103aa 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -132,7 +132,17 @@ def test_translate(self): g2 = f"g{digits[2]}" # e.g., group name "g5" self.assertEqual(t, fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z') - + # and try pasting multiple translate results - it's an undocumented + # feature that this works; all the pain of generating unique group + # names across calls exists to support this + r1 = translate('**a**a**a*') + r2 = translate('**b**b**b*') + r3 = translate('*c*c*c*') + fatre = "|".join([r1, r2, r3]) + self.assertTrue(re.match(fatre, 'abaccad')) + self.assertTrue(re.match(fatre, 'abxbcab')) + self.assertTrue(re.match(fatre, 'cbabcaxc')) + self.assertFalse(re.match(fatre, 'dabccbad')) class FilterTestCase(unittest.TestCase): From 770ec51cd5edeb31f7eb48981f37ddb5c35a03f5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 11 May 2020 20:46:48 -0500 Subject: [PATCH 3/3] Repair comment. --- Lib/fnmatch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index b0ee389160f4c4..0eb1802bdb53c5 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -154,7 +154,7 @@ def translate(pat): # in a lookahead assertion, save the matched part in a group, then # consume that group via a backreference. If the overall match fails, # the lookahead assertion won't try alternatives. So the translation is: - # (P?=(P.*?fixed))(?P=name) + # (?=(?P.*?fixed))(?P=name) # Group names are created as needed: g0, g1, g2, ... # The numbers are obtained from _nextgroupnum() to ensure they're unique # across calls and across threads. This is because people rely on the pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy