diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index e2328847..d44447ea 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -1,29 +1,32 @@ -"""A collection of modules for building different kinds of tree from -HTML documents. +"""A collection of modules for building different kinds of trees from HTML +documents. To create a treebuilder for a new type of tree, you need to do implement several things: -1) A set of classes for various types of elements: Document, Doctype, -Comment, Element. These must implement the interface of -_base.treebuilders.Node (although comment nodes have a different -signature for their constructor, see treebuilders.etree.Comment) -Textual content may also be implemented as another node type, or not, as -your tree implementation requires. - -2) A treebuilder object (called TreeBuilder by convention) that -inherits from treebuilders._base.TreeBuilder. This has 4 required attributes: -documentClass - the class to use for the bottommost node of a document -elementClass - the class to use for HTML Elements -commentClass - the class to use for comments -doctypeClass - the class to use for doctypes -It also has one required method: -getDocument - Returns the root node of the complete document tree - -3) If you wish to run the unit tests, you must also create a -testSerializer method on your treebuilder which accepts a node and -returns a string containing Node and its children serialized according -to the format used in the unittests +1. A set of classes for various types of elements: Document, Doctype, Comment, + Element. These must implement the interface of ``base.treebuilders.Node`` + (although comment nodes have a different signature for their constructor, + see ``treebuilders.etree.Comment``) Textual content may also be implemented + as another node type, or not, as your tree implementation requires. + +2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits + from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: + + * ``documentClass`` - the class to use for the bottommost node of a document + * ``elementClass`` - the class to use for HTML Elements + * ``commentClass`` - the class to use for comments + * ``doctypeClass`` - the class to use for doctypes + + It also has one required method: + + * ``getDocument`` - Returns the root node of the complete document tree + +3. If you wish to run the unit tests, you must also create a ``testSerializer`` + method on your treebuilder which accepts a node and returns a string + containing Node and its children serialized according to the format used in + the unittests + """ from __future__ import absolute_import, division, unicode_literals @@ -34,23 +37,32 @@ def getTreeBuilder(treeType, implementation=None, **kwargs): - """Get a TreeBuilder class for various types of tree with built-in support - - treeType - the name of the tree type required (case-insensitive). Supported - values are: - - "dom" - A generic builder for DOM implementations, defaulting to - a xml.dom.minidom based implementation. - "etree" - A generic builder for tree implementations exposing an - ElementTree-like interface, defaulting to - xml.etree.cElementTree if available and - xml.etree.ElementTree if not. - "lxml" - A etree-based builder for lxml.etree, handling - limitations of lxml's implementation. - - implementation - (Currently applies to the "etree" and "dom" tree types). A - module implementing the tree type e.g. - xml.etree.ElementTree or xml.etree.cElementTree.""" + """Get a TreeBuilder class for various types of trees with built-in support + + :arg treeType: the name of the tree type required (case-insensitive). Supported + values are: + + * "dom" - A generic builder for DOM implementations, defaulting to a + xml.dom.minidom based implementation. + * "etree" - A generic builder for tree implementations exposing an + ElementTree-like interface, defaulting to xml.etree.cElementTree if + available and xml.etree.ElementTree if not. + * "lxml" - A etree-based builder for lxml.etree, handling limitations + of lxml's implementation. + + :arg implementation: (Currently applies to the "etree" and "dom" tree + types). A module implementing the tree type e.g. xml.etree.ElementTree + or xml.etree.cElementTree. + + :arg kwargs: Any additional options to pass to the TreeBuilder when + creating it. + + Example: + + >>> from html5lib.treebuilders import getTreeBuilder + >>> builder = getTreeBuilder('etree') + + """ treeType = treeType.lower() if treeType not in treeBuilderCache: diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index a4b2792a..05d97ecc 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -21,22 +21,25 @@ class Node(object): + """Represents an item in the tree""" def __init__(self, name): - """Node representing an item in the tree. - name - The tag name associated with the node - parent - The parent of the current node (or None for the document node) - value - The value of the current node (applies to text nodes and - comments - attributes - a dict holding name, value pairs for attributes of the node - childNodes - a list of child nodes of the current node. This must - include all elements but not necessarily other node types - _flags - A list of miscellaneous flags that can be set on the node + """Creates a Node + + :arg name: The tag name associated with the node + """ + # The tag name assocaited with the node self.name = name + # The parent of the current node (or None for the document node) self.parent = None + # The value of the current node (applies to text nodes and comments) self.value = None + # A dict holding name -> value pairs for attributes of the node self.attributes = {} + # A list of child nodes of the current node. This must include all + # elements but not necessarily other node types. self.childNodes = [] + # A list of miscellaneous flags that can be set on the node. self._flags = [] def __str__(self): @@ -53,23 +56,41 @@ def __repr__(self): def appendChild(self, node): """Insert node as a child of the current node + + :arg node: the node to insert + """ raise NotImplementedError def insertText(self, data, insertBefore=None): """Insert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. + + :arg data: the data to insert + + :arg insertBefore: True if you want to insert the text before the node + and False if you want to insert it after the node + """ raise NotImplementedError def insertBefore(self, node, refNode): """Insert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of - the current node""" + the current node + + :arg node: the node to insert + + :arg refNode: the child node to insert the node before + + """ raise NotImplementedError def removeChild(self, node): """Remove node from the children of the current node + + :arg node: the child node to remove + """ raise NotImplementedError @@ -77,6 +98,9 @@ def reparentChildren(self, newParent): """Move all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way + + :arg newParent: the node to move all this node's children to + """ # XXX - should this method be made more general? for child in self.childNodes: @@ -121,10 +145,12 @@ def nodesEqual(self, node1, node2): class TreeBuilder(object): """Base treebuilder implementation - documentClass - the class to use for the bottommost node of a document - elementClass - the class to use for HTML Elements - commentClass - the class to use for comments - doctypeClass - the class to use for doctypes + + * documentClass - the class to use for the bottommost node of a document + * elementClass - the class to use for HTML Elements + * commentClass - the class to use for comments + * doctypeClass - the class to use for doctypes + """ # pylint:disable=not-callable @@ -144,6 +170,11 @@ class TreeBuilder(object): fragmentClass = None def __init__(self, namespaceHTMLElements): + """Create a TreeBuilder + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + """ if namespaceHTMLElements: self.defaultNamespace = "http://www.w3.org/1999/xhtml" else: @@ -367,11 +398,11 @@ def generateImpliedEndTags(self, exclude=None): self.generateImpliedEndTags(exclude) def getDocument(self): - "Return the final tree" + """Return the final tree""" return self.document def getFragment(self): - "Return the final fragment" + """Return the final fragment""" # assert self.innerHTML fragment = self.fragmentClass() self.openElements[0].reparentChildren(fragment) @@ -379,5 +410,8 @@ def getFragment(self): def testSerializer(self, node): """Serialize the subtree of node in the format required by unit tests - node - the node from which to start serializing""" + + :arg node: the node from which to start serializing + + """ raise NotImplementedError diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 908820c0..ca12a99c 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -309,7 +309,6 @@ def insertCommentMain(self, data, parent=None): super(TreeBuilder, self).insertComment(data, parent) def insertRoot(self, token): - """Create the document root""" # Because of the way libxml2 works, it doesn't seem to be possible to # alter information like the doctype after the tree has been parsed. # Therefore we need to use the built-in parser to create our initial pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy