From 21a6820957e677f24684ec84cddfecf1d541b11d Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Wed, 29 Nov 2017 14:39:01 -0500 Subject: [PATCH 1/3] Remove undoc-members and add documentation for __init__ The public API should all be documented. Anything that isn't documented with a docstring or starts with a _ isn't part of the public API. Given that, we shouldn't be autodoc'ing undocumented members. We do want to document __init__ since that's how the classes get built. So we explicitly add that to autodoc. I think this is a good base to build on. If it isn't, we can adjust things and maybe explicitly specify what should and shouldn't be documented. --- doc/html5lib.filters.rst | 15 +++++++-------- doc/html5lib.rst | 6 ++---- doc/html5lib.treeadapters.rst | 8 ++++---- doc/html5lib.treebuilders.rst | 11 +++++------ doc/html5lib.treewalkers.rst | 13 ++++++------- 5 files changed, 24 insertions(+), 29 deletions(-) diff --git a/doc/html5lib.filters.rst b/doc/html5lib.filters.rst index 38d4a956..d70e4552 100644 --- a/doc/html5lib.filters.rst +++ b/doc/html5lib.filters.rst @@ -6,54 +6,53 @@ filters Package .. automodule:: html5lib.filters.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`alphabeticalattributes` Module ------------------------------------ .. automodule:: html5lib.filters.alphabeticalattributes :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`inject_meta_charset` Module --------------------------------- .. automodule:: html5lib.filters.inject_meta_charset :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`lint` Module ------------------ .. automodule:: html5lib.filters.lint :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`optionaltags` Module -------------------------- .. automodule:: html5lib.filters.optionaltags :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`sanitizer` Module ----------------------- .. automodule:: html5lib.filters.sanitizer :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`whitespace` Module ------------------------ .. automodule:: html5lib.filters.whitespace :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ diff --git a/doc/html5lib.rst b/doc/html5lib.rst index 2a0b150f..d7c75c58 100644 --- a/doc/html5lib.rst +++ b/doc/html5lib.rst @@ -9,7 +9,6 @@ html5lib Package .. automodule:: html5lib.constants :members: - :undoc-members: :show-inheritance: :mod:`html5parser` Module @@ -17,16 +16,16 @@ html5lib Package .. automodule:: html5lib.html5parser :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`serializer` Module ------------------------ .. automodule:: html5lib.serializer :members: - :undoc-members: :show-inheritance: + :special-members: __init__ Subpackages ----------- @@ -37,4 +36,3 @@ Subpackages html5lib.treebuilders html5lib.treewalkers html5lib.treeadapters - diff --git a/doc/html5lib.treeadapters.rst b/doc/html5lib.treeadapters.rst index 6b2dc78d..1d3a9fba 100644 --- a/doc/html5lib.treeadapters.rst +++ b/doc/html5lib.treeadapters.rst @@ -1,4 +1,4 @@ -treebuilders Package +treeadapters Package ==================== :mod:`~html5lib.treeadapters` Package @@ -6,15 +6,15 @@ treebuilders Package .. automodule:: html5lib.treeadapters :members: - :undoc-members: :show-inheritance: + :special-members: __init__ .. automodule:: html5lib.treeadapters.genshi :members: - :undoc-members: :show-inheritance: + :special-members: __init__ .. automodule:: html5lib.treeadapters.sax :members: - :undoc-members: :show-inheritance: + :special-members: __init__ diff --git a/doc/html5lib.treebuilders.rst b/doc/html5lib.treebuilders.rst index aee82142..1a051e50 100644 --- a/doc/html5lib.treebuilders.rst +++ b/doc/html5lib.treebuilders.rst @@ -6,38 +6,37 @@ treebuilders Package .. automodule:: html5lib.treebuilders :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`base` Module ------------------- .. automodule:: html5lib.treebuilders.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`dom` Module ----------------- .. automodule:: html5lib.treebuilders.dom :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree` Module ------------------- .. automodule:: html5lib.treebuilders.etree :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree_lxml` Module ------------------------ .. automodule:: html5lib.treebuilders.etree_lxml :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ diff --git a/doc/html5lib.treewalkers.rst b/doc/html5lib.treewalkers.rst index 085d8a98..4afef476 100644 --- a/doc/html5lib.treewalkers.rst +++ b/doc/html5lib.treewalkers.rst @@ -6,46 +6,45 @@ treewalkers Package .. automodule:: html5lib.treewalkers :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`base` Module ------------------ .. automodule:: html5lib.treewalkers.base :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`dom` Module ----------------- .. automodule:: html5lib.treewalkers.dom :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree` Module ------------------- .. automodule:: html5lib.treewalkers.etree :members: - :undoc-members: :show-inheritance: + :special-members: __init__ :mod:`etree_lxml` Module ------------------------ .. automodule:: html5lib.treewalkers.etree_lxml :members: - :undoc-members: :show-inheritance: - + :special-members: __init__ :mod:`genshi` Module -------------------- .. automodule:: html5lib.treewalkers.genshi :members: - :undoc-members: :show-inheritance: + :special-members: __init__ From dc9443d11809db88003ee3b360d078817bf61845 Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Wed, 29 Nov 2017 14:51:04 -0500 Subject: [PATCH 2/3] Remove items from __all__ that aren't in the namespace --- html5lib/treewalkers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index 9e19a559..402b722e 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -13,7 +13,7 @@ from .. import constants from .._utils import default_etree -__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"] +__all__ = ["getTreeWalker", "pprint"] treeWalkerCache = {} From 805f272868365ed336a9d707d24120348f68e536 Mon Sep 17 00:00:00 2001 From: Will Kahn-Greene Date: Wed, 29 Nov 2017 15:08:19 -0500 Subject: [PATCH 3/3] Document html5parser module --- html5lib/html5parser.py | 123 ++++++++++++++++++++++++++++++---------- 1 file changed, 94 insertions(+), 29 deletions(-) diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 75765924..9d39b9d4 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -25,13 +25,48 @@ def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse a string or file-like object into a tree""" + """Parse an HTML document as a string or file-like object into a tree + + :arg doc: the document to parse as a string or file-like object + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import parse + >>> parse('

This is a doc

') + + + """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parse(doc, **kwargs) def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): + """Parse an HTML fragment as a string or file-like object into a tree + + :arg doc: the fragment to parse as a string or file-like object + + :arg container: the container context to parse the fragment in + + :arg treebuilder: the treebuilder to use when parsing + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import parseFragment + >>> parseFragment('this is a fragment') + + + """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parseFragment(doc, container=container, **kwargs) @@ -50,16 +85,30 @@ def __new__(meta, classname, bases, classDict): class HTMLParser(object): - """HTML parser. Generates a tree structure from a stream of (possibly - malformed) HTML""" + """HTML parser + + Generates a tree structure from a stream of (possibly malformed) HTML. + + """ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ - strict - raise an exception when a parse error is encountered + :arg tree: a treebuilder class controlling the type of tree that will be + returned. Built in treebuilders can be accessed through + html5lib.treebuilders.getTreeBuilder(treeType) + + :arg strict: raise an exception when a parse error is encountered + + :arg namespaceHTMLElements: whether or not to namespace HTML elements + + :arg debug: whether or not to enable debug mode which logs things + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() # generates parser with etree builder + >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict - tree - a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) """ # Raise an exception on the first error encountered @@ -123,9 +172,8 @@ def reset(self): @property def documentEncoding(self): - """The name of the character encoding - that was used to decode the input stream, - or :obj:`None` if that is not determined yet. + """Name of the character encoding that was used to decode the input stream, or + :obj:`None` if that is not determined yet """ if not hasattr(self, 'tokenizer'): @@ -219,14 +267,24 @@ def normalizedTokens(self): def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree - stream - a filelike object or string containing the HTML to be parsed + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element). + + :arg scripting: treat noscript elements as if JavaScript was turned on - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) + :returns: parsed tree + + Example: + + >>> from html5lib.html5parser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parse('

This is a doc

') + - scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() @@ -234,17 +292,27 @@ def parse(self, stream, *args, **kwargs): def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment - container - name of the element we're setting the innerHTML property - if set to None, default to 'div' + :arg container: name of the element we're setting the innerHTML + property if set to None, default to 'div' + + :arg stream: a file-like object or string containing the HTML to be parsed + + The optional encoding parameter must be a string that indicates + the encoding. If specified, that encoding will be used, + regardless of any BOM or later declaration (such as in a meta + element) - stream - a filelike object or string containing the HTML to be parsed + :arg scripting: treat noscript elements as if JavaScript was turned on - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) + :returns: parsed tree + + Example: + + >>> from html5lib.html5libparser import HTMLParser + >>> parser = HTMLParser() + >>> parser.parseFragment('this is a fragment') + - scripting - treat noscript elements as if javascript was turned on """ self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() @@ -258,8 +326,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None): raise ParseError(E[errorcode] % datavars) def normalizeToken(self, token): - """ HTML5 specific normalizations to the token stream """ - + # HTML5 specific normalizations to the token stream if token["type"] == tokenTypes["StartTag"]: raw = token["data"] token["data"] = OrderedDict(raw) @@ -327,9 +394,7 @@ def resetInsertionMode(self): self.phase = new_phase def parseRCDataRawtext(self, token, contentType): - """Generic RCDATA/RAWTEXT Parsing algorithm - contentType - RCDATA or RAWTEXT - """ + # Generic RCDATA/RAWTEXT Parsing algorithm assert contentType in ("RAWTEXT", "RCDATA") self.tree.insertElement(token) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy