Skip to content

Commit 8f7f9f0

Browse files
committed
Move the treewalker printer to the treewalker module
1 parent 9695fc8 commit 8f7f9f0

File tree

2 files changed

+83
-79
lines changed

2 files changed

+83
-79
lines changed

html5lib/tests/test_treewalkers.py

Lines changed: 2 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -139,83 +139,6 @@ def GenshiAdapter(tree):
139139
"adapter": GenshiAdapter,
140140
"walker": treewalkers.getTreeWalker("genshi")}
141141

142-
143-
def concatenateCharacterTokens(tokens):
144-
charactersToken = None
145-
for token in tokens:
146-
type = token["type"]
147-
if type in ("Characters", "SpaceCharacters"):
148-
if charactersToken is None:
149-
charactersToken = {"type": "Characters", "data": token["data"]}
150-
else:
151-
charactersToken["data"] += token["data"]
152-
else:
153-
if charactersToken is not None:
154-
yield charactersToken
155-
charactersToken = None
156-
yield token
157-
if charactersToken is not None:
158-
yield charactersToken
159-
160-
161-
def convertTokens(tokens):
162-
output = []
163-
indent = 0
164-
for token in concatenateCharacterTokens(tokens):
165-
type = token["type"]
166-
if type in ("StartTag", "EmptyTag"):
167-
if (token["namespace"] and
168-
token["namespace"] != constants.namespaces["html"]):
169-
if token["namespace"] in constants.prefixes:
170-
name = constants.prefixes[token["namespace"]]
171-
else:
172-
name = token["namespace"]
173-
name += " " + token["name"]
174-
else:
175-
name = token["name"]
176-
output.append("%s<%s>" % (" " * indent, name))
177-
indent += 2
178-
attrs = token["data"]
179-
if attrs:
180-
# TODO: Remove this if statement, attrs should always exist
181-
for (namespace, name), value in sorted(attrs.items()):
182-
if namespace:
183-
if namespace in constants.prefixes:
184-
outputname = constants.prefixes[namespace]
185-
else:
186-
outputname = namespace
187-
outputname += " " + name
188-
else:
189-
outputname = name
190-
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
191-
if type == "EmptyTag":
192-
indent -= 2
193-
elif type == "EndTag":
194-
indent -= 2
195-
elif type == "Comment":
196-
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
197-
elif type == "Doctype":
198-
if token["name"]:
199-
if token["publicId"]:
200-
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
201-
(" " * indent, token["name"],
202-
token["publicId"],
203-
token["systemId"] and token["systemId"] or ""))
204-
elif token["systemId"]:
205-
output.append("""%s<!DOCTYPE %s "" "%s">""" %
206-
(" " * indent, token["name"],
207-
token["systemId"]))
208-
else:
209-
output.append("%s<!DOCTYPE %s>" % (" " * indent,
210-
token["name"]))
211-
else:
212-
output.append("%s<!DOCTYPE >" % (" " * indent,))
213-
elif type in ("Characters", "SpaceCharacters"):
214-
output.append("%s\"%s\"" % (" " * indent, token["data"]))
215-
else:
216-
pass # TODO: what to do with errors?
217-
return "\n".join(output)
218-
219142
import re
220143
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+", re.M)
221144

@@ -265,7 +188,7 @@ def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
265188

266189
document = treeClass.get("adapter", lambda x: x)(document)
267190
try:
268-
output = convertTokens(treeClass["walker"](document))
191+
output = treewalkers.pprint(treeClass["walker"](document))
269192
output = attrlist.sub(sortattrs, output)
270193
expected = attrlist.sub(sortattrs, convertExpected(expected))
271194
diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
@@ -323,7 +246,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
323246
set_attribute_on_first_child(document, nom, val, treeName)
324247

325248
document = treeClass.get("adapter", lambda x: x)(document)
326-
output = convertTokens(treeClass["walker"](document))
249+
output = treewalkers.pprint(treeClass["walker"](document))
327250
output = attrlist.sub(sortattrs, output)
328251
if not output in expected:
329252
raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))

html5lib/treewalkers/__init__.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,12 @@
1010

1111
from __future__ import absolute_import, division, unicode_literals
1212

13+
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshistream", "lxmletree",
14+
"pulldom"]
15+
1316
import sys
1417

18+
from .. import constants
1519
from ..utils import default_etree
1620

1721
treeWalkerCache = {}
@@ -55,3 +59,80 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
5559
# XXX: NEVER cache here, caching is done in the etree submodule
5660
return etree.getETreeModule(implementation, **kwargs).TreeWalker
5761
return treeWalkerCache.get(treeType)
62+
63+
64+
def concatenateCharacterTokens(tokens):
65+
charactersToken = None
66+
for token in tokens:
67+
type = token["type"]
68+
if type in ("Characters", "SpaceCharacters"):
69+
if charactersToken is None:
70+
charactersToken = {"type": "Characters", "data": token["data"]}
71+
else:
72+
charactersToken["data"] += token["data"]
73+
else:
74+
if charactersToken is not None:
75+
yield charactersToken
76+
charactersToken = None
77+
yield token
78+
if charactersToken is not None:
79+
yield charactersToken
80+
81+
82+
def pprint(tokens):
83+
output = []
84+
indent = 0
85+
for token in concatenateCharacterTokens(tokens):
86+
type = token["type"]
87+
if type in ("StartTag", "EmptyTag"):
88+
if (token["namespace"] and
89+
token["namespace"] != constants.namespaces["html"]):
90+
if token["namespace"] in constants.prefixes:
91+
name = constants.prefixes[token["namespace"]]
92+
else:
93+
name = token["namespace"]
94+
name += " " + token["name"]
95+
else:
96+
name = token["name"]
97+
output.append("%s<%s>" % (" " * indent, name))
98+
indent += 2
99+
attrs = token["data"]
100+
if attrs:
101+
# TODO: Remove this if statement, attrs should always exist
102+
for (namespace, name), value in sorted(attrs.items()):
103+
if namespace:
104+
if namespace in constants.prefixes:
105+
outputname = constants.prefixes[namespace]
106+
else:
107+
outputname = namespace
108+
outputname += " " + name
109+
else:
110+
outputname = name
111+
output.append("%s%s=\"%s\"" % (" " * indent, outputname, value))
112+
if type == "EmptyTag":
113+
indent -= 2
114+
elif type == "EndTag":
115+
indent -= 2
116+
elif type == "Comment":
117+
output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
118+
elif type == "Doctype":
119+
if token["name"]:
120+
if token["publicId"]:
121+
output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
122+
(" " * indent, token["name"],
123+
token["publicId"],
124+
token["systemId"] and token["systemId"] or ""))
125+
elif token["systemId"]:
126+
output.append("""%s<!DOCTYPE %s "" "%s">""" %
127+
(" " * indent, token["name"],
128+
token["systemId"]))
129+
else:
130+
output.append("%s<!DOCTYPE %s>" % (" " * indent,
131+
token["name"]))
132+
else:
133+
output.append("%s<!DOCTYPE >" % (" " * indent,))
134+
elif type in ("Characters", "SpaceCharacters"):
135+
output.append("%s\"%s\"" % (" " * indent, token["data"]))
136+
else:
137+
pass # TODO: what to do with errors?
138+
return "\n".join(output)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy