Skip to content

Commit 9dc49f6

Browse files
committed
Merge pull request html5lib#240 from gsnedders/tree_types
Sort out the tokenizer and tree-construction tests; r=nobody!
2 parents b5b91cc + 68d6f34 commit 9dc49f6

File tree

6 files changed

+711
-924
lines changed

6 files changed

+711
-924
lines changed

.pytest.expect

Lines changed: 459 additions & 729 deletions
Large diffs are not rendered by default.

html5lib/tests/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import os.path
22

33
from .tree_construction import TreeConstructionFile
4+
from .tokenizer import TokenizerFile
45

56
_dir = os.path.abspath(os.path.dirname(__file__))
67
_testdata = os.path.join(_dir, "testdata")
78
_tree_construction = os.path.join(_testdata, "tree-construction")
9+
_tokenizer = os.path.join(_testdata, "tokenizer")
810

911

1012
def pytest_collectstart():
@@ -19,3 +21,6 @@ def pytest_collect_file(path, parent):
1921
return
2022
if path.ext == ".dat":
2123
return TreeConstructionFile(path, parent)
24+
elif dir == _tokenizer:
25+
if path.ext == ".test":
26+
return TokenizerFile(path, parent)

html5lib/tests/support.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,24 @@
1313
os.path.pardir,
1414
os.path.pardir)))
1515

16-
from html5lib import treebuilders
16+
from html5lib import treebuilders, treewalkers, treeadapters
1717
del base_path
1818

1919
# Build a dict of available trees
20-
treeTypes = {"DOM": treebuilders.getTreeBuilder("dom")}
20+
treeTypes = {}
2121

22-
# Try whatever etree implementations are available from a list that are
23-
#"supposed" to work
22+
# DOM impls
23+
treeTypes["DOM"] = {
24+
"builder": treebuilders.getTreeBuilder("dom"),
25+
"walker": treewalkers.getTreeWalker("dom")
26+
}
27+
28+
# ElementTree impls
2429
import xml.etree.ElementTree as ElementTree
25-
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
30+
treeTypes['ElementTree'] = {
31+
"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
32+
"walker": treewalkers.getTreeWalker("etree", ElementTree)
33+
}
2634

2735
try:
2836
import xml.etree.cElementTree as cElementTree
@@ -33,14 +41,32 @@
3341
if cElementTree.Element is ElementTree.Element:
3442
treeTypes['cElementTree'] = None
3543
else:
36-
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
44+
treeTypes['cElementTree'] = {
45+
"builder": treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True),
46+
"walker": treewalkers.getTreeWalker("etree", cElementTree)
47+
}
3748

3849
try:
3950
import lxml.etree as lxml # flake8: noqa
4051
except ImportError:
4152
treeTypes['lxml'] = None
4253
else:
43-
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml")
54+
treeTypes['lxml'] = {
55+
"builder": treebuilders.getTreeBuilder("lxml"),
56+
"walker": treewalkers.getTreeWalker("lxml")
57+
}
58+
59+
# Genshi impls
60+
try:
61+
import genshi # flake8: noqa
62+
except ImportError:
63+
pass
64+
else:
65+
treeTypes["genshi"] = {
66+
"builder": treebuilders.getTreeBuilder("dom"),
67+
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
68+
"walker": treewalkers.getTreeWalker("genshi")
69+
}
4470

4571

4672
def get_data_files(subdirectory, files='*.dat'):

html5lib/tests/test_treewalkers.py

Lines changed: 28 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,12 @@
11
from __future__ import absolute_import, division, unicode_literals
22

3-
import os
4-
import sys
5-
import unittest
6-
import warnings
7-
from difflib import unified_diff
3+
import pytest
84

9-
try:
10-
unittest.TestCase.assertEqual
11-
except AttributeError:
12-
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
5+
from .support import treeTypes
136

14-
from .support import get_data_files, TestData, convertExpected
15-
16-
from html5lib import html5parser, treewalkers, treebuilders, treeadapters, constants
7+
from html5lib import html5parser, treewalkers
178
from html5lib.filters.lint import Filter as Lint
189

19-
20-
treeTypes = {
21-
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
22-
"walker": treewalkers.getTreeWalker("dom")},
23-
}
24-
25-
# Try whatever etree implementations are available from a list that are
26-
#"supposed" to work
27-
try:
28-
import xml.etree.ElementTree as ElementTree
29-
except ImportError:
30-
pass
31-
else:
32-
treeTypes['ElementTree'] = \
33-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
34-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
35-
36-
try:
37-
import xml.etree.cElementTree as ElementTree
38-
except ImportError:
39-
pass
40-
else:
41-
treeTypes['cElementTree'] = \
42-
{"builder": treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True),
43-
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
44-
45-
46-
try:
47-
import lxml.etree as ElementTree # flake8: noqa
48-
except ImportError:
49-
pass
50-
else:
51-
treeTypes['lxml_native'] = \
52-
{"builder": treebuilders.getTreeBuilder("lxml"),
53-
"walker": treewalkers.getTreeWalker("lxml")}
54-
55-
56-
try:
57-
import genshi # flake8: noqa
58-
except ImportError:
59-
pass
60-
else:
61-
treeTypes["genshi"] = \
62-
{"builder": treebuilders.getTreeBuilder("dom"),
63-
"adapter": lambda tree: treeadapters.genshi.to_genshi(treewalkers.getTreeWalker("dom")(tree)),
64-
"walker": treewalkers.getTreeWalker("genshi")}
65-
6610
import re
6711
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
6812

@@ -73,80 +17,29 @@ def sortattrs(x):
7317
return "\n".join(lines)
7418

7519

76-
class TokenTestCase(unittest.TestCase):
77-
def test_all_tokens(self):
78-
expected = [
79-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
80-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
81-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
82-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
83-
{'data': 'a', 'type': 'Characters'},
84-
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
85-
{'data': 'b', 'type': 'Characters'},
86-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
87-
{'data': 'c', 'type': 'Characters'},
88-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
89-
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
90-
]
91-
for treeName, treeCls in sorted(treeTypes.items()):
92-
p = html5parser.HTMLParser(tree=treeCls["builder"])
93-
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
94-
document = treeCls.get("adapter", lambda x: x)(document)
95-
output = Lint(treeCls["walker"](document))
96-
for expectedToken, outputToken in zip(expected, output):
97-
self.assertEqual(expectedToken, outputToken)
98-
99-
100-
def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
101-
warnings.resetwarnings()
102-
warnings.simplefilter("error")
103-
try:
104-
p = html5parser.HTMLParser(tree=treeClass["builder"])
105-
if innerHTML:
106-
document = p.parseFragment(input, innerHTML)
107-
else:
108-
document = p.parse(input)
109-
except constants.DataLossWarning:
110-
# Ignore testcases we know we don't pass
111-
return
112-
113-
document = treeClass.get("adapter", lambda x: x)(document)
114-
try:
115-
output = treewalkers.pprint(Lint(treeClass["walker"](document)))
116-
output = attrlist.sub(sortattrs, output)
117-
expected = attrlist.sub(sortattrs, convertExpected(expected))
118-
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
119-
[line + "\n" for line in output.splitlines()],
120-
"Expected", "Received"))
121-
assert expected == output, "\n".join([
122-
"", "Input:", input,
123-
"", "Expected:", expected,
124-
"", "Received:", output,
125-
"", "Diff:", diff,
126-
])
127-
except NotImplementedError:
128-
pass # Amnesty for those that confess...
129-
130-
131-
def test_treewalker():
132-
sys.stdout.write('Testing tree walkers ' + " ".join(list(treeTypes.keys())) + "\n")
133-
20+
def test_all_tokens():
21+
expected = [
22+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'},
23+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
24+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head'},
25+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
26+
{'data': 'a', 'type': 'Characters'},
27+
{'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
28+
{'data': 'b', 'type': 'Characters'},
29+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div'},
30+
{'data': 'c', 'type': 'Characters'},
31+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body'},
32+
{'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html'}
33+
]
13434
for treeName, treeCls in sorted(treeTypes.items()):
135-
files = get_data_files('tree-construction')
136-
for filename in files:
137-
testName = os.path.basename(filename).replace(".dat", "")
138-
if testName in ("template",):
139-
continue
140-
141-
tests = TestData(filename, "data")
142-
143-
for index, test in enumerate(tests):
144-
(input, errors,
145-
innerHTML, expected) = [test[key] for key in ("data", "errors",
146-
"document-fragment",
147-
"document")]
148-
errors = errors.split("\n")
149-
yield runTreewalkerTest, innerHTML, input, expected, errors, treeCls
35+
if treeCls is None:
36+
continue
37+
p = html5parser.HTMLParser(tree=treeCls["builder"])
38+
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
39+
document = treeCls.get("adapter", lambda x: x)(document)
40+
output = Lint(treeCls["walker"](document))
41+
for expectedToken, outputToken in zip(expected, output):
42+
assert expectedToken == outputToken
15043

15144

15245
def set_attribute_on_first_child(docfrag, name, value, treeName):
@@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
16457
def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
16558
"""tests what happens when we add attributes to the intext"""
16659
treeName, treeClass = tree
60+
if treeClass is None:
61+
pytest.skip("Treebuilder not loaded")
16762
parser = html5parser.HTMLParser(tree=treeClass["builder"])
16863
document = parser.parseFragment(intext)
16964
for nom, val in attrs_to_add:
@@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
17267
document = treeClass.get("adapter", lambda x: x)(document)
17368
output = treewalkers.pprint(treeClass["walker"](document))
17469
output = attrlist.sub(sortattrs, output)
175-
if not output in expected:
70+
if output not in expected:
17671
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
17772

17873

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy