From b7c0e5ea530e2c07cd4d106505f02ca2cb7799c6 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 13:41:51 -0500 Subject: [PATCH 1/5] bpo-40480 "fnmatch" exponential execution time --- Lib/fnmatch.py | 64 +++++++++++++++++++++++++++++++++++----- Lib/test/test_fnmatch.py | 17 +++++++++++ 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index b98e6413295e1c..547695377fc828 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -77,15 +77,19 @@ def translate(pat): There is no way to quote meta-characters. """ + STAR = object() + res = [] + add = res.append i, n = 0, len(pat) - res = '' while i < n: c = pat[i] i = i+1 if c == '*': - res = res + '.*' + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) elif c == '?': - res = res + '.' + add('.') elif c == '[': j = i if j < n and pat[j] == '!': @@ -95,7 +99,7 @@ def translate(pat): while j < n and pat[j] != ']': j = j+1 if j >= n: - res = res + '\\[' + add('\\[') else: stuff = pat[i:j] if '--' not in stuff: @@ -122,7 +126,53 @@ def translate(pat): stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) + add(f'[{stuff}]') else: - res = res + re.escape(c) - return r'(?s:%s)\Z' % res + add(re.escape(c)) + assert i == n + + # Deal with STARs. + inp = res + res = [] + add = res.append + i, n = 0, len(inp) + # Fixed piece at the start? + fixed = [] + while i < n and inp[i] is not STAR: + add(inp[i]) + i += 1 + if fixed: + add("".join(fixed)) + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # We can't spell that directly, but can trick it into working by + # by matching + # .*?fixed + # in a lookahead assertion, save the matched part in a group, then + # consume that group via a backreference. If the overall match fails, + # the lookahead assertion won't try alternatives. So the translation is: + # (?=(P.*?fixed))(?P=name) + # Group names are created as needed: g1, g2, g3, ... + groupnum = 0 + while i < n: + assert inp[i] is STAR + i += 1 + if i == n: + add(".*") + break + assert inp[i] is not STAR + fixed = [] + while i < n and inp[i] is not STAR: + fixed.append(inp[i]) + i += 1 + fixed = "".join(fixed) + if i == n: + add(".*") + add(fixed) + else: + groupnum += 1 + add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + assert i == n + res = "".join(res) + return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 55f9f0d3a5425a..4cec840d4cde17 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -44,6 +44,23 @@ def test_fnmatch(self): check('foo\nbar\n', 'foo*') check('\nfoo', 'foo*', False) check('\n', '*') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') + + def test_slow_fnmatch(self): + check = self.check_match + check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') + # The next "takes forever" if the regexp translation is + # straightforward. + check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') From a158f625dc51cdff1f7bdbc4146e8540572e78d5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 13:48:28 -0500 Subject: [PATCH 2/5] trivial edits --- Lib/fnmatch.py | 3 +-- Lib/test/test_fnmatch.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 547695377fc828..03d369778271fe 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -146,8 +146,7 @@ def translate(pat): # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. - # We can't spell that directly, but can trick it into working by - # by matching + # We can't spell that directly, but can trick it into working by matching # .*?fixed # in a lookahead assertion, save the matched part in a group, then # consume that group via a backreference. If the overall match fails, diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 4cec840d4cde17..1e0f9c0fdb0bf2 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -59,7 +59,7 @@ def test_slow_fnmatch(self): check = self.check_match check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') # The next "takes forever" if the regexp translation is - # straightforward. + # straightforward. See bpo-40480. check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) def test_mix_bytes_str(self): From 1d61b7fa9120e59b3b03637b00219b4b643eee08 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 4 May 2020 21:21:46 +0000 Subject: [PATCH 3/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst diff --git a/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst new file mode 100644 index 00000000000000..d046b1422419d7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst @@ -0,0 +1 @@ +``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking. \ No newline at end of file From fbb7048393041e4a691cb87572212ea8257dd745 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 16:27:27 -0500 Subject: [PATCH 4/5] Moved the block of new translate() tests into the right test function. --- Lib/test/test_fnmatch.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 1e0f9c0fdb0bf2..4c173069503cc6 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -44,16 +44,6 @@ def test_fnmatch(self): check('foo\nbar\n', 'foo*') check('\nfoo', 'foo*', False) check('\n', '*') - # from the docs - self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') - # squash consecutive stars - self.assertEqual(translate('*********'), r'(?s:.*)\Z') - self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') - self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') - self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') - # fancy translation to prevent exponential-time match failure - self.assertEqual(translate('**a*a****a'), - r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') def test_slow_fnmatch(self): check = self.check_match @@ -124,6 +114,16 @@ def test_translate(self): self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') self.assertEqual(translate('[x'), r'(?s:\[x)\Z') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') class FilterTestCase(unittest.TestCase): From 86d1d0d817168d54ecec08cffec3b9b5eb092982 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 4 May 2020 16:35:39 -0500 Subject: [PATCH 5/5] Removed useless code from an earlier version. --- Lib/fnmatch.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 03d369778271fe..d7d915d51314da 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -136,13 +136,10 @@ def translate(pat): res = [] add = res.append i, n = 0, len(inp) - # Fixed piece at the start? - fixed = [] + # Fixed pieces at the start? while i < n and inp[i] is not STAR: add(inp[i]) i += 1 - if fixed: - add("".join(fixed)) # Now deal with STAR fixed STAR fixed ... # For an interior `STAR fixed` pairing, we want to do a minimal # .*? match followed by `fixed`, with no possibility of backtracking. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy