diff --git a/doc/html5lib.filters.rst b/doc/html5lib.filters.rst
index 38d4a956..d70e4552 100644
--- a/doc/html5lib.filters.rst
+++ b/doc/html5lib.filters.rst
@@ -6,54 +6,53 @@ filters Package
.. automodule:: html5lib.filters.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`alphabeticalattributes` Module
------------------------------------
.. automodule:: html5lib.filters.alphabeticalattributes
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`inject_meta_charset` Module
---------------------------------
.. automodule:: html5lib.filters.inject_meta_charset
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`lint` Module
------------------
.. automodule:: html5lib.filters.lint
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`optionaltags` Module
--------------------------
.. automodule:: html5lib.filters.optionaltags
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`sanitizer` Module
-----------------------
.. automodule:: html5lib.filters.sanitizer
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`whitespace` Module
------------------------
.. automodule:: html5lib.filters.whitespace
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
diff --git a/doc/html5lib.rst b/doc/html5lib.rst
index 2a0b150f..d7c75c58 100644
--- a/doc/html5lib.rst
+++ b/doc/html5lib.rst
@@ -9,7 +9,6 @@ html5lib Package
.. automodule:: html5lib.constants
:members:
- :undoc-members:
:show-inheritance:
:mod:`html5parser` Module
@@ -17,16 +16,16 @@ html5lib Package
.. automodule:: html5lib.html5parser
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`serializer` Module
------------------------
.. automodule:: html5lib.serializer
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
Subpackages
-----------
@@ -37,4 +36,3 @@ Subpackages
html5lib.treebuilders
html5lib.treewalkers
html5lib.treeadapters
-
diff --git a/doc/html5lib.treeadapters.rst b/doc/html5lib.treeadapters.rst
index 6b2dc78d..1d3a9fba 100644
--- a/doc/html5lib.treeadapters.rst
+++ b/doc/html5lib.treeadapters.rst
@@ -1,4 +1,4 @@
-treebuilders Package
+treeadapters Package
====================
:mod:`~html5lib.treeadapters` Package
@@ -6,15 +6,15 @@ treebuilders Package
.. automodule:: html5lib.treeadapters
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
.. automodule:: html5lib.treeadapters.genshi
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
.. automodule:: html5lib.treeadapters.sax
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
diff --git a/doc/html5lib.treebuilders.rst b/doc/html5lib.treebuilders.rst
index aee82142..1a051e50 100644
--- a/doc/html5lib.treebuilders.rst
+++ b/doc/html5lib.treebuilders.rst
@@ -6,38 +6,37 @@ treebuilders Package
.. automodule:: html5lib.treebuilders
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`base` Module
-------------------
.. automodule:: html5lib.treebuilders.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treebuilders.dom
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treebuilders.etree
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree_lxml` Module
------------------------
.. automodule:: html5lib.treebuilders.etree_lxml
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
diff --git a/doc/html5lib.treewalkers.rst b/doc/html5lib.treewalkers.rst
index 085d8a98..4afef476 100644
--- a/doc/html5lib.treewalkers.rst
+++ b/doc/html5lib.treewalkers.rst
@@ -6,46 +6,45 @@ treewalkers Package
.. automodule:: html5lib.treewalkers
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`base` Module
------------------
.. automodule:: html5lib.treewalkers.base
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`dom` Module
-----------------
.. automodule:: html5lib.treewalkers.dom
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree` Module
-------------------
.. automodule:: html5lib.treewalkers.etree
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
:mod:`etree_lxml` Module
------------------------
.. automodule:: html5lib.treewalkers.etree_lxml
:members:
- :undoc-members:
:show-inheritance:
-
+ :special-members: __init__
:mod:`genshi` Module
--------------------
.. automodule:: html5lib.treewalkers.genshi
:members:
- :undoc-members:
:show-inheritance:
+ :special-members: __init__
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 75765924..9d39b9d4 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -25,13 +25,48 @@
def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs):
- """Parse a string or file-like object into a tree"""
+ """Parse an HTML document as a string or file-like object into a tree
+
+ :arg doc: the document to parse as a string or file-like object
+
+ :arg treebuilder: the treebuilder to use when parsing
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5parser import parse
+ >>> parse('
This is a doc
')
+
+
+ """
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parse(doc, **kwargs)
def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs):
+ """Parse an HTML fragment as a string or file-like object into a tree
+
+ :arg doc: the fragment to parse as a string or file-like object
+
+ :arg container: the container context to parse the fragment in
+
+ :arg treebuilder: the treebuilder to use when parsing
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5libparser import parseFragment
+ >>> parseFragment('this is a fragment')
+
+
+ """
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parseFragment(doc, container=container, **kwargs)
@@ -50,16 +85,30 @@ def __new__(meta, classname, bases, classDict):
class HTMLParser(object):
- """HTML parser. Generates a tree structure from a stream of (possibly
- malformed) HTML"""
+ """HTML parser
+
+ Generates a tree structure from a stream of (possibly malformed) HTML.
+
+ """
def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
"""
- strict - raise an exception when a parse error is encountered
+ :arg tree: a treebuilder class controlling the type of tree that will be
+ returned. Built in treebuilders can be accessed through
+ html5lib.treebuilders.getTreeBuilder(treeType)
+
+ :arg strict: raise an exception when a parse error is encountered
+
+ :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+ :arg debug: whether or not to enable debug mode which logs things
+
+ Example:
+
+ >>> from html5lib.html5parser import HTMLParser
+ >>> parser = HTMLParser() # generates parser with etree builder
+ >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict
- tree - a treebuilder class controlling the type of tree that will be
- returned. Built in treebuilders can be accessed through
- html5lib.treebuilders.getTreeBuilder(treeType)
"""
# Raise an exception on the first error encountered
@@ -123,9 +172,8 @@ def reset(self):
@property
def documentEncoding(self):
- """The name of the character encoding
- that was used to decode the input stream,
- or :obj:`None` if that is not determined yet.
+ """Name of the character encoding that was used to decode the input stream, or
+ :obj:`None` if that is not determined yet
"""
if not hasattr(self, 'tokenizer'):
@@ -219,14 +267,24 @@ def normalizedTokens(self):
def parse(self, stream, *args, **kwargs):
"""Parse a HTML document into a well-formed tree
- stream - a filelike object or string containing the HTML to be parsed
+ :arg stream: a file-like object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element).
+
+ :arg scripting: treat noscript elements as if JavaScript was turned on
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5parser import HTMLParser
+ >>> parser = HTMLParser()
+ >>> parser.parse('This is a doc
')
+
- scripting - treat noscript elements as if javascript was turned on
"""
self._parse(stream, False, None, *args, **kwargs)
return self.tree.getDocument()
@@ -234,17 +292,27 @@ def parse(self, stream, *args, **kwargs):
def parseFragment(self, stream, *args, **kwargs):
"""Parse a HTML fragment into a well-formed tree fragment
- container - name of the element we're setting the innerHTML property
- if set to None, default to 'div'
+ :arg container: name of the element we're setting the innerHTML
+ property if set to None, default to 'div'
+
+ :arg stream: a file-like object or string containing the HTML to be parsed
+
+ The optional encoding parameter must be a string that indicates
+ the encoding. If specified, that encoding will be used,
+ regardless of any BOM or later declaration (such as in a meta
+ element)
- stream - a filelike object or string containing the HTML to be parsed
+ :arg scripting: treat noscript elements as if JavaScript was turned on
- The optional encoding parameter must be a string that indicates
- the encoding. If specified, that encoding will be used,
- regardless of any BOM or later declaration (such as in a meta
- element)
+ :returns: parsed tree
+
+ Example:
+
+ >>> from html5lib.html5libparser import HTMLParser
+ >>> parser = HTMLParser()
+ >>> parser.parseFragment('this is a fragment')
+
- scripting - treat noscript elements as if javascript was turned on
"""
self._parse(stream, True, *args, **kwargs)
return self.tree.getFragment()
@@ -258,8 +326,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
raise ParseError(E[errorcode] % datavars)
def normalizeToken(self, token):
- """ HTML5 specific normalizations to the token stream """
-
+ # HTML5 specific normalizations to the token stream
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
token["data"] = OrderedDict(raw)
@@ -327,9 +394,7 @@ def resetInsertionMode(self):
self.phase = new_phase
def parseRCDataRawtext(self, token, contentType):
- """Generic RCDATA/RAWTEXT Parsing algorithm
- contentType - RCDATA or RAWTEXT
- """
+ # Generic RCDATA/RAWTEXT Parsing algorithm
assert contentType in ("RAWTEXT", "RCDATA")
self.tree.insertElement(token)
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index 9e19a559..402b722e 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -13,7 +13,7 @@
from .. import constants
from .._utils import default_etree
-__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
+__all__ = ["getTreeWalker", "pprint"]
treeWalkerCache = {}
pFad - Phonifier reborn
Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies:
Alternative Proxy
pFad Proxy
pFad v3 Proxy
pFad v4 Proxy