Skip to content

Commit f50cb08

Browse files
committed
Unbreak sanitizer tests and convert to nose
1 parent 99f4bf1 commit f50cb08

File tree

2 files changed

+63
-85
lines changed

2 files changed

+63
-85
lines changed

html5lib/sanitizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,11 @@ def sanitize_css(self, style):
245245

246246
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
247247
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
248-
lowercaseElementName=False, lowercaseAttrName=False):
248+
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
249249
#Change case matching defaults as we only output lowercase html anyway
250250
#This solution doesn't seem ideal...
251251
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
252-
lowercaseElementName, lowercaseAttrName)
252+
lowercaseElementName, lowercaseAttrName, parser=parser)
253253

254254
def __iter__(self):
255255
for token in HTMLTokenizer.__iter__(self):

html5lib/tests/test_sanitizer.py

Lines changed: 61 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -7,92 +7,70 @@
77
except ImportError:
88
import simplejson as json
99

10-
from support import html5lib_test_files
1110
from html5lib import html5parser, sanitizer, constants
1211

13-
class SanitizeTest(unittest.TestCase):
14-
def addTest(cls, name, expected, input):
15-
def test(self, expected=expected, input=input):
16-
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
17-
parseFragment(expected).childNodes])
18-
expected = json.loads(json.dumps(expected))
19-
self.assertEqual(expected, self.sanitize_html(input))
20-
setattr(cls, name, test)
21-
addTest = classmethod(addTest)
12+
def runSanitizerTest(name, expected, input):
13+
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
14+
parseFragment(expected).childNodes])
15+
expected = json.loads(json.dumps(expected))
16+
assert expected == sanitize_html(input)
2217

23-
def sanitize_html(self,stream):
18+
def sanitize_html(stream):
2419
return ''.join([token.toxml() for token in
25-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
26-
parseFragment(stream).childNodes])
27-
28-
def test_should_handle_astral_plane_characters(self):
29-
self.assertEqual(u"<p>\U0001d4b5 \U0001d538</p>",
30-
self.sanitize_html("<p>&#x1d4b5; &#x1d538;</p>"))
31-
32-
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
33-
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']: continue ### TODO
34-
if tag_name != tag_name.lower(): continue ### TODO
35-
if tag_name == 'image':
36-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
37-
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
38-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
39-
elif tag_name == 'br':
40-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
41-
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
20+
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
21+
parseFragment(stream).childNodes])
22+
23+
def test_should_handle_astral_plane_characters():
24+
assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
25+
26+
def test_sanitizer():
27+
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
28+
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
29+
continue ### TODO
30+
if tag_name != tag_name.lower():
31+
continue ### TODO
32+
if tag_name == 'image':
33+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
34+
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
35+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
36+
elif tag_name == 'br':
37+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
38+
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
39+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
40+
elif tag_name in constants.voidElements:
41+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
42+
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
43+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
44+
else:
45+
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
46+
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
47+
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
48+
49+
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
50+
tag_name = tag_name.upper()
51+
yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
52+
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
4253
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
43-
elif tag_name in constants.voidElements:
44-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
45-
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
46-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
47-
else:
48-
SanitizeTest.addTest("test_should_allow_%s_tag" % tag_name,
49-
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
50-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
51-
52-
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
53-
tag_name = tag_name.upper()
54-
SanitizeTest.addTest("test_should_forbid_%s_tag" % tag_name,
55-
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
56-
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
57-
58-
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
59-
if attribute_name != attribute_name.lower(): continue ### TODO
60-
if attribute_name == 'style': continue
61-
SanitizeTest.addTest("test_should_allow_%s_attribute" % attribute_name,
62-
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
63-
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
64-
65-
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
66-
attribute_name = attribute_name.upper()
67-
SanitizeTest.addTest("test_should_forbid_%s_attribute" % attribute_name,
68-
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
69-
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
70-
71-
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
72-
SanitizeTest.addTest("test_should_allow_%s_uris" % protocol,
73-
"<a href=\"%s\">foo</a>" % protocol,
74-
"""<a href="%s">foo</a>""" % protocol)
75-
76-
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
77-
SanitizeTest.addTest("test_should_allow_uppercase_%s_uris" % protocol,
78-
"<a href=\"%s\">foo</a>" % protocol,
79-
"""<a href="%s">foo</a>""" % protocol)
80-
81-
def buildTestSuite():
82-
for filename in html5lib_test_files("sanitizer"):
83-
for test in json.load(file(filename)):
84-
SanitizeTest.addTest('test_' + test['name'], test['output'], test['input'])
85-
86-
return unittest.TestLoader().loadTestsFromTestCase(SanitizeTest)
87-
88-
def sanitize_html(stream):
89-
return ''.join([token.toxml() for token in
90-
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
91-
parseFragment(stream).childNodes])
92-
93-
def main():
94-
buildTestSuite()
95-
unittest.main()
9654

97-
if __name__ == "__main__":
98-
main()
55+
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
56+
if attribute_name != attribute_name.lower(): continue ### TODO
57+
if attribute_name == 'style': continue
58+
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
59+
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
60+
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
61+
62+
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
63+
attribute_name = attribute_name.upper()
64+
yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
65+
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
66+
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
67+
68+
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
69+
yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
70+
"<a href=\"%s\">foo</a>" % protocol,
71+
"""<a href="%s">foo</a>""" % protocol)
72+
73+
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
74+
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
75+
"<a href=\"%s\">foo</a>" % protocol,
76+
"""<a href="%s">foo</a>""" % protocol)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy