Skip to content

Commit 0f1994b

Browse files
willkggsnedders
authored andcommitted
Document html5lib.treewalkers (html5lib#386)
1 parent 4ed8b8b commit 0f1994b

File tree

2 files changed

+128
-15
lines changed

2 files changed

+128
-15
lines changed

html5lib/treewalkers/__init__.py

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,25 @@
2121
def getTreeWalker(treeType, implementation=None, **kwargs):
2222
"""Get a TreeWalker class for various types of tree with built-in support
2323
24-
Args:
25-
treeType (str): the name of the tree type required (case-insensitive).
26-
Supported values are:
27-
28-
- "dom": The xml.dom.minidom DOM implementation
29-
- "etree": A generic walker for tree implementations exposing an
30-
elementtree-like interface (known to work with
31-
ElementTree, cElementTree and lxml.etree).
32-
- "lxml": Optimized walker for lxml.etree
33-
- "genshi": a Genshi stream
34-
35-
Implementation: A module implementing the tree type e.g.
36-
xml.etree.ElementTree or cElementTree (Currently applies to the
37-
"etree" tree type only).
24+
:arg str treeType: the name of the tree type required (case-insensitive).
25+
Supported values are:
26+
27+
* "dom": The xml.dom.minidom DOM implementation
28+
* "etree": A generic walker for tree implementations exposing an
29+
elementtree-like interface (known to work with ElementTree,
30+
cElementTree and lxml.etree).
31+
* "lxml": Optimized walker for lxml.etree
32+
* "genshi": a Genshi stream
33+
34+
:arg implementation: A module implementing the tree type e.g.
35+
xml.etree.ElementTree or cElementTree (Currently applies to the "etree"
36+
tree type only).
37+
38+
:arg kwargs: keyword arguments passed to the etree walker--for other
39+
walkers, this has no effect
40+
41+
:returns: a TreeWalker class
42+
3843
"""
3944

4045
treeType = treeType.lower()
@@ -73,7 +78,13 @@ def concatenateCharacterTokens(tokens):
7378

7479

7580
def pprint(walker):
76-
"""Pretty printer for tree walkers"""
81+
"""Pretty printer for tree walkers
82+
83+
Takes a TreeWalker instance and pretty prints the output of walking the tree.
84+
85+
:arg walker: a TreeWalker instance
86+
87+
"""
7788
output = []
7889
indent = 0
7990
for token in concatenateCharacterTokens(walker):

html5lib/treewalkers/base.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,34 +18,110 @@
1818

1919

2020
class TreeWalker(object):
21+
"""Walks a tree yielding tokens
22+
23+
Tokens are dicts that all have a ``type`` field specifying the type of the
24+
token.
25+
26+
"""
2127
def __init__(self, tree):
28+
"""Creates a TreeWalker
29+
30+
:arg tree: the tree to walk
31+
32+
"""
2233
self.tree = tree
2334

2435
def __iter__(self):
2536
raise NotImplementedError
2637

2738
def error(self, msg):
39+
"""Generates an error token with the given message
40+
41+
:arg msg: the error message
42+
43+
:returns: SerializeError token
44+
45+
"""
2846
return {"type": "SerializeError", "data": msg}
2947

3048
def emptyTag(self, namespace, name, attrs, hasChildren=False):
49+
"""Generates an EmptyTag token
50+
51+
:arg namespace: the namespace of the token--can be ``None``
52+
53+
:arg name: the name of the element
54+
55+
:arg attrs: the attributes of the element as a dict
56+
57+
:arg hasChildren: whether or not to yield a SerializationError because
58+
this tag shouldn't have children
59+
60+
:returns: EmptyTag token
61+
62+
"""
3163
yield {"type": "EmptyTag", "name": name,
3264
"namespace": namespace,
3365
"data": attrs}
3466
if hasChildren:
3567
yield self.error("Void element has children")
3668

3769
def startTag(self, namespace, name, attrs):
70+
"""Generates a StartTag token
71+
72+
:arg namespace: the namespace of the token--can be ``None``
73+
74+
:arg name: the name of the element
75+
76+
:arg attrs: the attributes of the element as a dict
77+
78+
:returns: StartTag token
79+
80+
"""
3881
return {"type": "StartTag",
3982
"name": name,
4083
"namespace": namespace,
4184
"data": attrs}
4285

4386
def endTag(self, namespace, name):
87+
"""Generates an EndTag token
88+
89+
:arg namespace: the namespace of the token--can be ``None``
90+
91+
:arg name: the name of the element
92+
93+
:returns: EndTag token
94+
95+
"""
4496
return {"type": "EndTag",
4597
"name": name,
4698
"namespace": namespace}
4799

48100
def text(self, data):
101+
"""Generates SpaceCharacters and Characters tokens
102+
103+
Depending on what's in the data, this generates one or more
104+
``SpaceCharacters`` and ``Characters`` tokens.
105+
106+
For example:
107+
108+
>>> from html5lib.treewalkers.base import TreeWalker
109+
>>> # Give it an empty tree just so it instantiates
110+
>>> walker = TreeWalker([])
111+
>>> list(walker.text(''))
112+
[]
113+
>>> list(walker.text(' '))
114+
[{u'data': ' ', u'type': u'SpaceCharacters'}]
115+
>>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE
116+
[{u'data': ' ', u'type': u'SpaceCharacters'},
117+
{u'data': u'abc', u'type': u'Characters'},
118+
{u'data': u' ', u'type': u'SpaceCharacters'}]
119+
120+
:arg data: the text data
121+
122+
:returns: one or more ``SpaceCharacters`` and ``Characters`` tokens
123+
124+
"""
49125
data = data
50126
middle = data.lstrip(spaceCharacters)
51127
left = data[:len(data) - len(middle)]
@@ -60,18 +136,44 @@ def text(self, data):
60136
yield {"type": "SpaceCharacters", "data": right}
61137

62138
def comment(self, data):
139+
"""Generates a Comment token
140+
141+
:arg data: the comment
142+
143+
:returns: Comment token
144+
145+
"""
63146
return {"type": "Comment", "data": data}
64147

65148
def doctype(self, name, publicId=None, systemId=None):
149+
"""Generates a Doctype token
150+
151+
:arg name:
152+
153+
:arg publicId:
154+
155+
:arg systemId:
156+
157+
:returns: the Doctype token
158+
159+
"""
66160
return {"type": "Doctype",
67161
"name": name,
68162
"publicId": publicId,
69163
"systemId": systemId}
70164

71165
def entity(self, name):
166+
"""Generates an Entity token
167+
168+
:arg name: the entity name
169+
170+
:returns: an Entity token
171+
172+
"""
72173
return {"type": "Entity", "name": name}
73174

74175
def unknown(self, nodeType):
176+
"""Handles unknown node types"""
75177
return self.error("Unknown node type: " + nodeType)
76178

77179

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy