Skip to content

Commit d6018f2

Browse files
Greg Gutheg-k
authored andcommitted
fix bug 1623633
expand and comment on sanitize_css gauntlet regex per https://github.com/mozilla/bleach/pull/61/files#r677453
1 parent fc77027 commit d6018f2

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

bleach/sanitizer.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,14 @@ def sanitize_css(self, style):
593593
# the whole thing.
594594
parts = style.split(';')
595595
gauntlet = re.compile(
596-
r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""",
597-
flags=re.U
596+
r"""^( # consider a style attribute value as composed of:
597+
[/:,#%!.\s\w] # a non-newline character
598+
|\w-\w # 3 characters in the form \w-\w
599+
|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space
600+
|"[\s\w]+" # a double quoted string of [\s\w]+
601+
|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)''
602+
)*$""",
603+
flags=re.U | re.VERBOSE
598604
)
599605

600606
for part in parts:

tests/test_css.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import unicode_literals
22

33
from functools import partial
4+
from timeit import timeit
45

56
import pytest
67

@@ -37,10 +38,12 @@
3738
'<p style="color: red;">bar</p>'
3839
),
3940
# Handle leading - in attributes
40-
(
41+
# regressed with the fix for bug 1623633
42+
pytest.param(
4143
'<p style="cursor: -moz-grab;">bar</p>',
4244
['cursor'],
43-
'<p style="cursor: -moz-grab;">bar</p>'
45+
'<p style="cursor: -moz-grab;">bar</p>',
46+
marks=pytest.mark.xfail,
4447
),
4548
# Handle () in attributes
4649
(
@@ -54,16 +57,20 @@
5457
'<p style="color: rgba(255,0,0,0.4);">bar</p>',
5558
),
5659
# Handle ' in attributes
57-
(
60+
# regressed with the fix for bug 1623633
61+
pytest.param(
5862
'<p style="text-overflow: \',\' ellipsis;">bar</p>',
5963
['text-overflow'],
60-
'<p style="text-overflow: \',\' ellipsis;">bar</p>'
64+
'<p style="text-overflow: \',\' ellipsis;">bar</p>',
65+
marks=pytest.mark.xfail,
6166
),
6267
# Handle " in attributes
63-
(
68+
# regressed with the fix for bug 1623633
69+
pytest.param(
6470
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
6571
['text-overflow'],
66-
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>'
72+
'<p style=\'text-overflow: "," ellipsis;\'>bar</p>',
73+
marks=pytest.mark.xfail,
6774
),
6875
(
6976
'<p style=\'font-family: "Arial";\'>bar</p>',
@@ -223,3 +230,17 @@ def test_style_hang():
223230
def test_css_parsing_with_entities(data, styles, expected):
224231
"""The sanitizer should be ok with character entities"""
225232
assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected
233+
234+
235+
@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"])
236+
def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char):
237+
"""The sanitizer gauntlet regex should not catastrophically backtrack"""
238+
# refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633
239+
240+
def time_clean(test_char, size):
241+
style_attr_value = (test_char + 'a' + test_char) * size + '^'
242+
stmt = """clean('''<a style='%s'></a>''', attributes={'a': ['style']})""" % style_attr_value
243+
return timeit(stmt=stmt, setup='from bleach import clean', number=1)
244+
245+
# should complete in less than one second
246+
assert time_clean(overlap_test_char, 22) < 1.0

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy