diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index b98e6413295e1c..d7d915d51314da 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -77,15 +77,19 @@ def translate(pat): There is no way to quote meta-characters. """ + STAR = object() + res = [] + add = res.append i, n = 0, len(pat) - res = '' while i < n: c = pat[i] i = i+1 if c == '*': - res = res + '.*' + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) elif c == '?': - res = res + '.' + add('.') elif c == '[': j = i if j < n and pat[j] == '!': @@ -95,7 +99,7 @@ def translate(pat): while j < n and pat[j] != ']': j = j+1 if j >= n: - res = res + '\\[' + add('\\[') else: stuff = pat[i:j] if '--' not in stuff: @@ -122,7 +126,49 @@ def translate(pat): stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) + add(f'[{stuff}]') else: - res = res + re.escape(c) - return r'(?s:%s)\Z' % res + add(re.escape(c)) + assert i == n + + # Deal with STARs. + inp = res + res = [] + add = res.append + i, n = 0, len(inp) + # Fixed pieces at the start? + while i < n and inp[i] is not STAR: + add(inp[i]) + i += 1 + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # We can't spell that directly, but can trick it into working by matching + # .*?fixed + # in a lookahead assertion, save the matched part in a group, then + # consume that group via a backreference. If the overall match fails, + # the lookahead assertion won't try alternatives. So the translation is: + # (?=(P.*?fixed))(?P=name) + # Group names are created as needed: g1, g2, g3, ... + groupnum = 0 + while i < n: + assert inp[i] is STAR + i += 1 + if i == n: + add(".*") + break + assert inp[i] is not STAR + fixed = [] + while i < n and inp[i] is not STAR: + fixed.append(inp[i]) + i += 1 + fixed = "".join(fixed) + if i == n: + add(".*") + add(fixed) + else: + groupnum += 1 + add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + assert i == n + res = "".join(res) + return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 55f9f0d3a5425a..4c173069503cc6 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -45,6 +45,13 @@ def test_fnmatch(self): check('\nfoo', 'foo*', False) check('\n', '*') + def test_slow_fnmatch(self): + check = self.check_match + check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') + # The next "takes forever" if the regexp translation is + # straightforward. See bpo-40480. + check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) + def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') @@ -107,6 +114,16 @@ def test_translate(self): self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') self.assertEqual(translate('[x'), r'(?s:\[x)\Z') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') class FilterTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst new file mode 100644 index 00000000000000..d046b1422419d7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst @@ -0,0 +1 @@ +``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking. \ No newline at end of file pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy