diff --git a/CHANGES.rst b/CHANGES.rst index 483bdedb..570c9605 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,9 +4,10 @@ Change Log 0.999999999/1.0b10 ~~~~~~~~~~~~~~~~~~ -Released on XXX +Released on July 15, 2016 -* XXX +* Fix attribute order going to the tree builder to be document order + instead of reverse document order(!). 0.99999999/1.0b9 diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 473c265f..f3cd9455 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -22,4 +22,4 @@ "getTreeWalker", "serialize"] # this has to be at the top level, see how setup.py parses this -__version__ = "0.999999999-dev" +__version__ = "0.9999999999-dev" diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 470c8a7d..2abd63e4 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -265,7 +265,11 @@ def normalizeToken(self, token): """ HTML5 specific normalizations to the token stream """ if token["type"] == tokenTypes["StartTag"]: - token["data"] = OrderedDict(token['data'][::-1]) + raw = token["data"] + token["data"] = OrderedDict(raw) + if len(raw) > len(token["data"]): + # we had some duplicated attribute, fix so first wins + token["data"].update(raw[::-1]) return token diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 21dc59d9..bcc0bf48 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,12 +1,12 @@ from __future__ import absolute_import, division, unicode_literals -from six import PY2, text_type +from six import PY2, text_type, unichr import io from . import support # noqa -from html5lib.constants import namespaces +from html5lib.constants import namespaces, tokenTypes from html5lib import parse, parseFragment, HTMLParser @@ -53,6 +53,21 @@ def test_unicode_file(): assert parse(io.StringIO("a")) is not None +def test_maintain_attribute_order(): + # This is here because we impl it in parser and not tokenizer + p = HTMLParser() + # generate loads to maximize the chance a hash-based mutation will occur + attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))] + token = {'name': 'html', + 'selfClosing': False, + 'selfClosingAcknowledged': False, + 'type': tokenTypes["StartTag"], + 'data': attrs} + out = p.normalizeToken(token) + attr_order = list(out["data"].keys()) + assert attr_order == [x for x, i in attrs] + + def test_duplicate_attribute(): # This is here because we impl it in parser and not tokenizer doc = parse('
') @@ -60,6 +75,20 @@ def test_duplicate_attribute(): assert el.get("class") == "a" +def test_maintain_duplicate_attribute_order(): + # This is here because we impl it in parser and not tokenizer + p = HTMLParser() + attrs = [(unichr(x), i) for i, x in enumerate(range(ord('a'), ord('z')))] + token = {'name': 'html', + 'selfClosing': False, + 'selfClosingAcknowledged': False, + 'type': tokenTypes["StartTag"], + 'data': attrs + [('a', len(attrs))]} + out = p.normalizeToken(token) + attr_order = list(out["data"].keys()) + assert attr_order == [x for x, i in attrs] + + def test_debug_log(): parser = HTMLParser(debug=True) parser.parse("
bd
e")Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: