From e0dc25f335d3df610f752df29d5c4301717eb452 Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 15 Jul 2016 02:23:19 +0100 Subject: [PATCH 1/3] Fix attribute order to the treebuilder to be document order Somehow I managed to screw this up so it became reverse document order! --- CHANGES.rst | 5 +++-- html5lib/html5parser.py | 6 +++++- html5lib/tests/test_parser2.py | 33 +++++++++++++++++++++++++++++++-- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 483bdedb..570c9605 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,9 +4,10 @@ Change Log 0.999999999/1.0b10 ~~~~~~~~~~~~~~~~~~ -Released on XXX +Released on July 15, 2016 -* XXX +* Fix attribute order going to the tree builder to be document order + instead of reverse document order(!). 0.99999999/1.0b9 diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 470c8a7d..2abd63e4 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -265,7 +265,11 @@ def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: - token["data"] = OrderedDict(token['data'][::-1]) + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) return token diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 21dc59d9..bcc0bf48 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,12 +1,12 @@ from __future__ import absolute_import, division, unicode_literals -from six import PY2, text_type +from six import PY2, text_type, unichr import io from . import support # noqa -from html5lib.constants import namespaces +from html5lib.constants import namespaces, tokenTypes from html5lib import parse, parseFragment, HTMLParser @@ -53,6 +53,21 @@ def test_unicode_file(): assert parse(io.StringIO("a")) is not None +def test_maintain_attribute_order(): + # This is here because we impl it in parser and not tokenizer + p = HTMLParser() + # generate loads to maximize the chance a hash-based mutation will occur + attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))] + token = {'name': 'html', + 'selfClosing': False, + 'selfClosingAcknowledged': False, + 'type': tokenTypes["StartTag"], + 'data': attrs} + out = p.normalizeToken(token) + attr_order = list(out["data"].keys()) + assert attr_order == [x for x, i in attrs] + + def test_duplicate_attribute(): # This is here because we impl it in parser and not tokenizer doc = parse('

') @@ -60,6 +75,20 @@ def test_duplicate_attribute(): assert el.get("class") == "a" +def test_maintain_duplicate_attribute_order(): + # This is here because we impl it in parser and not tokenizer + p = HTMLParser() + attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))] + token = {'name': 'html', + 'selfClosing': False, + 'selfClosingAcknowledged': False, + 'type': tokenTypes["StartTag"], + 'data': attrs + [('a', len(attrs))]} + out = p.normalizeToken(token) + attr_order = list(out["data"].keys()) + assert attr_order == [x for x, i in attrs] + + def test_debug_log(): parser = HTMLParser(debug=True) parser.parse("a

bd

e") From 6a73efa01754253605284b5a5688de3961b120fa Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 15 Jul 2016 02:24:18 +0100 Subject: [PATCH 2/3] Yes, another release, already. :( --- html5lib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 473c265f..8ee9b53e 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -22,4 +22,4 @@ "getTreeWalker", "serialize"] # this has to be at the top level, see how setup.py parses this -__version__ = "0.999999999-dev" +__version__ = "0.999999999" From 983a9355ea66a8c1626a42fd0682b48e246685bd Mon Sep 17 00:00:00 2001 From: Geoffrey Sneddon Date: Fri, 15 Jul 2016 02:24:33 +0100 Subject: [PATCH 3/3] And back to dev. --- html5lib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 8ee9b53e..f3cd9455 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -22,4 +22,4 @@ "getTreeWalker", "serialize"] # this has to be at the top level, see how setup.py parses this -__version__ = "0.999999999" +__version__ = "0.9999999999-dev" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy