Skip to content

Commit 05fcd62

Browse files
willkggsnedders
authored andcommitted
Fix alphabeticalattributes filter namepsace problem (html5lib#324)
If a tag has an attribute with a None namespace and one with a str namespace, then this filter would fail with a TypeError in Python 3. This fixes that. Fixes html5lib#322
1 parent 984f934 commit 05fcd62

File tree

2 files changed

+93
-1
lines changed

2 files changed

+93
-1
lines changed

html5lib/filters/alphabeticalattributes.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,24 @@
88
from ordereddict import OrderedDict
99

1010

11+
def _attr_key(attr):
12+
"""Return an appropriate key for an attribute for sorting
13+
14+
Attributes have a namespace that can be either ``None`` or a string. We
15+
can't compare the two because they're different types, so we convert
16+
``None`` to an empty string first.
17+
18+
"""
19+
return (attr[0][0] or ''), attr[0][1]
20+
21+
1122
class Filter(base.Filter):
1223
def __iter__(self):
1324
for token in base.Filter.__iter__(self):
1425
if token["type"] in ("StartTag", "EmptyTag"):
1526
attrs = OrderedDict()
1627
for name, value in sorted(token["data"].items(),
17-
key=lambda x: x[0]):
28+
key=_attr_key):
1829
attrs[name] = value
1930
token["data"] = attrs
2031
yield token
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
from __future__ import absolute_import, division, unicode_literals
2+
3+
try:
4+
from collections import OrderedDict
5+
except ImportError:
6+
from ordereddict import OrderedDict
7+
8+
import pytest
9+
10+
import html5lib
11+
from html5lib.filters.alphabeticalattributes import Filter
12+
from html5lib.serializer import HTMLSerializer
13+
14+
15+
@pytest.mark.parametrize('msg, attrs, expected_attrs', [
16+
(
17+
'no attrs',
18+
{},
19+
{}
20+
),
21+
(
22+
'one attr',
23+
{(None, 'alt'): 'image'},
24+
OrderedDict([((None, 'alt'), 'image')])
25+
),
26+
(
27+
'multiple attrs',
28+
{
29+
(None, 'src'): 'foo',
30+
(None, 'alt'): 'image',
31+
(None, 'style'): 'border: 1px solid black;'
32+
},
33+
OrderedDict([
34+
((None, 'alt'), 'image'),
35+
((None, 'src'), 'foo'),
36+
((None, 'style'), 'border: 1px solid black;')
37+
])
38+
),
39+
])
40+
def test_alphabetizing(msg, attrs, expected_attrs):
41+
tokens = [{'type': 'StartTag', 'name': 'img', 'data': attrs}]
42+
output_tokens = list(Filter(tokens))
43+
44+
attrs = output_tokens[0]['data']
45+
assert attrs == expected_attrs
46+
47+
48+
def test_with_different_namespaces():
49+
tokens = [{
50+
'type': 'StartTag',
51+
'name': 'pattern',
52+
'data': {
53+
(None, 'id'): 'patt1',
54+
('http://www.w3.org/1999/xlink', 'href'): '#patt2'
55+
}
56+
}]
57+
output_tokens = list(Filter(tokens))
58+
59+
attrs = output_tokens[0]['data']
60+
assert attrs == OrderedDict([
61+
((None, 'id'), 'patt1'),
62+
(('http://www.w3.org/1999/xlink', 'href'), '#patt2')
63+
])
64+
65+
66+
def test_with_serializer():
67+
"""Verify filter works in the context of everything else"""
68+
parser = html5lib.HTMLParser()
69+
dom = parser.parseFragment('<svg><pattern xlink:href="#patt2" id="patt1"></svg>')
70+
walker = html5lib.getTreeWalker('etree')
71+
ser = HTMLSerializer(
72+
alphabetical_attributes=True,
73+
quote_attr_values='always'
74+
)
75+
76+
# FIXME(willkg): The "xlink" namespace gets dropped by the serializer. When
77+
# that gets fixed, we can fix this expected result.
78+
assert (
79+
ser.render(walker(dom)) ==
80+
'<svg><pattern id="patt1" href="#patt2"></pattern></svg>'
81+
)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy