diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 402b722e..9bec2076 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -21,20 +21,25 @@ def getTreeWalker(treeType, implementation=None, **kwargs): """Get a TreeWalker class for various types of tree with built-in support - Args: - treeType (str): the name of the tree type required (case-insensitive). - Supported values are: - - - "dom": The xml.dom.minidom DOM implementation - - "etree": A generic walker for tree implementations exposing an - elementtree-like interface (known to work with - ElementTree, cElementTree and lxml.etree). - - "lxml": Optimized walker for lxml.etree - - "genshi": a Genshi stream - - Implementation: A module implementing the tree type e.g. - xml.etree.ElementTree or cElementTree (Currently applies to the - "etree" tree type only). + :arg str treeType: the name of the tree type required (case-insensitive). + Supported values are: + + * "dom": The xml.dom.minidom DOM implementation + * "etree": A generic walker for tree implementations exposing an + elementtree-like interface (known to work with ElementTree, + cElementTree and lxml.etree). + * "lxml": Optimized walker for lxml.etree + * "genshi": a Genshi stream + + :arg implementation: A module implementing the tree type e.g. + xml.etree.ElementTree or cElementTree (Currently applies to the "etree" + tree type only). + + :arg kwargs: keyword arguments passed to the etree walker--for other + walkers, this has no effect + + :returns: a TreeWalker class + """ treeType = treeType.lower() @@ -73,7 +78,13 @@ def concatenateCharacterTokens(tokens): def pprint(walker): - """Pretty printer for tree walkers""" + """Pretty printer for tree walkers + + Takes a TreeWalker instance and pretty prints the output of walking the tree. + + :arg walker: a TreeWalker instance + + """ output = [] indent = 0 for token in concatenateCharacterTokens(walker): diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py index 36e1ba24..80c474c4 100644 --- a/html5lib/treewalkers/base.py +++ b/html5lib/treewalkers/base.py @@ -18,16 +18,48 @@ class TreeWalker(object): + """Walks a tree yielding tokens + + Tokens are dicts that all have a ``type`` field specifying the type of the + token. + + """ def __init__(self, tree): + """Creates a TreeWalker + + :arg tree: the tree to walk + + """ self.tree = tree def __iter__(self): raise NotImplementedError def error(self, msg): + """Generates an error token with the given message + + :arg msg: the error message + + :returns: SerializeError token + + """ return {"type": "SerializeError", "data": msg} def emptyTag(self, namespace, name, attrs, hasChildren=False): + """Generates an EmptyTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :arg hasChildren: whether or not to yield a SerializationError because + this tag shouldn't have children + + :returns: EmptyTag token + + """ yield {"type": "EmptyTag", "name": name, "namespace": namespace, "data": attrs} @@ -35,17 +67,61 @@ def emptyTag(self, namespace, name, attrs, hasChildren=False): yield self.error("Void element has children") def startTag(self, namespace, name, attrs): + """Generates a StartTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :arg attrs: the attributes of the element as a dict + + :returns: StartTag token + + """ return {"type": "StartTag", "name": name, "namespace": namespace, "data": attrs} def endTag(self, namespace, name): + """Generates an EndTag token + + :arg namespace: the namespace of the token--can be ``None`` + + :arg name: the name of the element + + :returns: EndTag token + + """ return {"type": "EndTag", "name": name, "namespace": namespace} def text(self, data): + """Generates SpaceCharacters and Characters tokens + + Depending on what's in the data, this generates one or more + ``SpaceCharacters`` and ``Characters`` tokens. + + For example: + + >>> from html5lib.treewalkers.base import TreeWalker + >>> # Give it an empty tree just so it instantiates + >>> walker = TreeWalker([]) + >>> list(walker.text('')) + [] + >>> list(walker.text(' ')) + [{u'data': ' ', u'type': u'SpaceCharacters'}] + >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE + [{u'data': ' ', u'type': u'SpaceCharacters'}, + {u'data': u'abc', u'type': u'Characters'}, + {u'data': u' ', u'type': u'SpaceCharacters'}] + + :arg data: the text data + + :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens + + """ data = data middle = data.lstrip(spaceCharacters) left = data[:len(data) - len(middle)] @@ -60,18 +136,44 @@ def text(self, data): yield {"type": "SpaceCharacters", "data": right} def comment(self, data): + """Generates a Comment token + + :arg data: the comment + + :returns: Comment token + + """ return {"type": "Comment", "data": data} def doctype(self, name, publicId=None, systemId=None): + """Generates a Doctype token + + :arg name: + + :arg publicId: + + :arg systemId: + + :returns: the Doctype token + + """ return {"type": "Doctype", "name": name, "publicId": publicId, "systemId": systemId} def entity(self, name): + """Generates an Entity token + + :arg name: the entity name + + :returns: an Entity token + + """ return {"type": "Entity", "name": name} def unknown(self, nodeType): + """Handles unknown node types""" return self.error("Unknown node type: " + nodeType)
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: