Skip to content

Commit b9c46a2

Browse files
authored
bpo-40480 "fnmatch" exponential execution time (GH-19908)
bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time.
1 parent 96074de commit b9c46a2

File tree

3 files changed

+71
-7
lines changed

3 files changed

+71
-7
lines changed

Lib/fnmatch.py

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,19 @@ def translate(pat):
7777
There is no way to quote meta-characters.
7878
"""
7979

80+
STAR = object()
81+
res = []
82+
add = res.append
8083
i, n = 0, len(pat)
81-
res = ''
8284
while i < n:
8385
c = pat[i]
8486
i = i+1
8587
if c == '*':
86-
res = res + '.*'
88+
# compress consecutive `*` into one
89+
if (not res) or res[-1] is not STAR:
90+
add(STAR)
8791
elif c == '?':
88-
res = res + '.'
92+
add('.')
8993
elif c == '[':
9094
j = i
9195
if j < n and pat[j] == '!':
@@ -95,7 +99,7 @@ def translate(pat):
9599
while j < n and pat[j] != ']':
96100
j = j+1
97101
if j >= n:
98-
res = res + '\\['
102+
add('\\[')
99103
else:
100104
stuff = pat[i:j]
101105
if '--' not in stuff:
@@ -122,7 +126,49 @@ def translate(pat):
122126
stuff = '^' + stuff[1:]
123127
elif stuff[0] in ('^', '['):
124128
stuff = '\\' + stuff
125-
res = '%s[%s]' % (res, stuff)
129+
add(f'[{stuff}]')
126130
else:
127-
res = res + re.escape(c)
128-
return r'(?s:%s)\Z' % res
131+
add(re.escape(c))
132+
assert i == n
133+
134+
# Deal with STARs.
135+
inp = res
136+
res = []
137+
add = res.append
138+
i, n = 0, len(inp)
139+
# Fixed pieces at the start?
140+
while i < n and inp[i] is not STAR:
141+
add(inp[i])
142+
i += 1
143+
# Now deal with STAR fixed STAR fixed ...
144+
# For an interior `STAR fixed` pairing, we want to do a minimal
145+
# .*? match followed by `fixed`, with no possibility of backtracking.
146+
# We can't spell that directly, but can trick it into working by matching
147+
# .*?fixed
148+
# in a lookahead assertion, save the matched part in a group, then
149+
# consume that group via a backreference. If the overall match fails,
150+
# the lookahead assertion won't try alternatives. So the translation is:
151+
# (?=(P<name>.*?fixed))(?P=name)
152+
# Group names are created as needed: g1, g2, g3, ...
153+
groupnum = 0
154+
while i < n:
155+
assert inp[i] is STAR
156+
i += 1
157+
if i == n:
158+
add(".*")
159+
break
160+
assert inp[i] is not STAR
161+
fixed = []
162+
while i < n and inp[i] is not STAR:
163+
fixed.append(inp[i])
164+
i += 1
165+
fixed = "".join(fixed)
166+
if i == n:
167+
add(".*")
168+
add(fixed)
169+
else:
170+
groupnum += 1
171+
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
172+
assert i == n
173+
res = "".join(res)
174+
return fr'(?s:{res})\Z'

Lib/test/test_fnmatch.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ def test_fnmatch(self):
4545
check('\nfoo', 'foo*', False)
4646
check('\n', '*')
4747

48+
def test_slow_fnmatch(self):
49+
check = self.check_match
50+
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
51+
# The next "takes forever" if the regexp translation is
52+
# straightforward. See bpo-40480.
53+
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
54+
4855
def test_mix_bytes_str(self):
4956
self.assertRaises(TypeError, fnmatch, 'test', b'*')
5057
self.assertRaises(TypeError, fnmatch, b'test', '*')
@@ -107,6 +114,16 @@ def test_translate(self):
107114
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
108115
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
109116
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
117+
# from the docs
118+
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
119+
# squash consecutive stars
120+
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
121+
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
122+
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
123+
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
124+
# fancy translation to prevent exponential-time match failure
125+
self.assertEqual(translate('**a*a****a'),
126+
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
110127

111128

112129
class FilterTestCase(unittest.TestCase):
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy