Skip to content

Commit 9b8d8eb

Browse files
committed
Fix #11, #12: quote attributes that need escaping in legacy browsers
These are mostly out of the market now, so this isn't massively needed any more; nevertheless, avoiding XSS as much as possible is inevitably desirable. This alters the API so that quote_attr_values is now a ternary setting, choosing between legacy-safe behaviour, spec behaviour, and always quoting.
1 parent 4768c64 commit 9b8d8eb

File tree

5 files changed

+132
-11
lines changed

5 files changed

+132
-11
lines changed

CHANGES.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,13 @@ Released on XXX
3333
* **Use scripting disabled by default (as we don't implement
3434
scripting).**
3535

36+
* **Fix #11, avoiding the XSS bug potentially caused by serializer
37+
allowing attribute values to be escaped out of in old browser versions,
38+
changing the quote_attr_values option on serializer to take one of
39+
three values, "always" (the old True value), "legacy" (the new option,
40+
and the new default), and "spec" (the old False value, and the old
41+
default).**
42+
3643

3744
0.9999999/1.0b8
3845
~~~~~~~~~~~~~~~

html5lib/serializer/htmlserializer.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,16 @@
1010

1111
spaceCharacters = "".join(spaceCharacters)
1212

13-
quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]")
13+
quoteAttributeSpecChars = spaceCharacters + "\"'=<>`"
14+
quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]")
15+
quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars +
16+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
17+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
18+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
19+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
20+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
21+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
22+
"\u3000]")
1423

1524
try:
1625
from codecs import register_error, xmlcharrefreplace_errors
@@ -72,7 +81,7 @@ def htmlentityreplace_errors(exc):
7281
class HTMLSerializer(object):
7382

7483
# attribute quoting options
75-
quote_attr_values = False
84+
quote_attr_values = "legacy" # be secure by default
7685
quote_char = '"'
7786
use_best_quote_char = True
7887

@@ -108,9 +117,9 @@ def __init__(self, **kwargs):
108117
inject_meta_charset=True|False
109118
Whether it insert a meta element to define the character set of the
110119
document.
111-
quote_attr_values=True|False
120+
quote_attr_values="legacy"|"spec"|"always"
112121
Whether to quote attribute values that don't require quoting
113-
per HTML5 parsing rules.
122+
per legacy browser behaviour, when required by the standard, or always.
114123
quote_char=u'"'|u"'"
115124
Use given quote character for attribute quoting. Default is to
116125
use double quote unless attribute value contains a double quote,
@@ -239,10 +248,15 @@ def serialize(self, treewalker, encoding=None):
239248
(k not in booleanAttributes.get(name, tuple()) and
240249
k not in booleanAttributes.get("", tuple())):
241250
yield self.encodeStrict("=")
242-
if self.quote_attr_values:
251+
if self.quote_attr_values == "always" or len(v) == 0:
243252
quote_attr = True
253+
elif self.quote_attr_values == "spec":
254+
quote_attr = quoteAttributeSpec.search(v) is not None
255+
elif self.quote_attr_values == "legacy":
256+
quote_attr = quoteAttributeLegacy.search(v) is not None
244257
else:
245-
quote_attr = len(v) == 0 or quoteAttributeSpec.search(v)
258+
raise ValueError("quote_attr_values must be one of: "
259+
"'always', 'spec', or 'legacy'")
246260
v = v.replace("&", "&amp;")
247261
if self.escape_lt_in_attrs:
248262
v = v.replace("<", "&lt;")

html5lib/tests/serializer-testdata/core.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@
242242
},
243243
{
244244
"expected": [
245-
"<span title=foo\u000bbar>"
245+
"<span title=\"foo\u000bbar\">"
246246
],
247247
"input": [
248248
[

html5lib/tests/serializer-testdata/options.test

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
]
4242
]
4343
],
44-
"description": "quote_attr_values=true",
44+
"description": "quote_attr_values='always'",
4545
"options": {
46-
"quote_attr_values": true
46+
"quote_attr_values": "always"
4747
}
4848
},
4949
{
@@ -64,9 +64,78 @@
6464
]
6565
]
6666
],
67-
"description": "quote_attr_values=true with irrelevant",
67+
"description": "quote_attr_values='always' with irrelevant",
6868
"options": {
69-
"quote_attr_values": true
69+
"quote_attr_values": "always"
70+
}
71+
},
72+
{
73+
"expected": [
74+
"<div class=\"foo\">"
75+
],
76+
"input": [
77+
[
78+
"StartTag",
79+
"http://www.w3.org/1999/xhtml",
80+
"div",
81+
[
82+
{
83+
"namespace": null,
84+
"name": "class",
85+
"value": "foo"
86+
}
87+
]
88+
]
89+
],
90+
"description": "non-minimized quote_attr_values='always'",
91+
"options": {
92+
"quote_attr_values": "always"
93+
}
94+
},
95+
{
96+
"expected": [
97+
"<div class=foo>"
98+
],
99+
"input": [
100+
[
101+
"StartTag",
102+
"http://www.w3.org/1999/xhtml",
103+
"div",
104+
[
105+
{
106+
"namespace": null,
107+
"name": "class",
108+
"value": "foo"
109+
}
110+
]
111+
]
112+
],
113+
"description": "non-minimized quote_attr_values='legacy'",
114+
"options": {
115+
"quote_attr_values": "legacy"
116+
}
117+
},
118+
{
119+
"expected": [
120+
"<div class=foo>"
121+
],
122+
"input": [
123+
[
124+
"StartTag",
125+
"http://www.w3.org/1999/xhtml",
126+
"div",
127+
[
128+
{
129+
"namespace": null,
130+
"name": "class",
131+
"value": "foo"
132+
}
133+
]
134+
]
135+
],
136+
"description": "non-minimized quote_attr_values='spec'",
137+
"options": {
138+
"quote_attr_values": "spec"
70139
}
71140
},
72141
{

html5lib/tests/test_serializer.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,37 @@ def testComment():
146146
throwsWithLatin1([["Comment", "\u0101"]])
147147

148148

149+
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"))
150+
def testSpecQuoteAttribute(c):
151+
input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
152+
[{"namespace": None, "name": "foo", "value": c}]]]
153+
if c == '"':
154+
output_ = ["<span foo='%s'>" % c]
155+
else:
156+
output_ = ['<span foo="%s">' % c]
157+
options_ = {"quote_attr_values": "spec"}
158+
runSerializerTest(input_, output_, options_)
159+
160+
161+
@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`"
162+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
163+
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
164+
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
165+
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
166+
"\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
167+
"\u2008\u2009\u200a\u2028\u2029\u202f\u205f"
168+
"\u3000"))
169+
def testLegacyQuoteAttribute(c):
170+
input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span",
171+
[{"namespace": None, "name": "foo", "value": c}]]]
172+
if c == '"':
173+
output_ = ["<span foo='%s'>" % c]
174+
else:
175+
output_ = ['<span foo="%s">' % c]
176+
options_ = {"quote_attr_values": "legacy"}
177+
runSerializerTest(input_, output_, options_)
178+
179+
149180
@pytest.fixture
150181
def lxml_parser():
151182
return etree.XMLParser(resolve_entities=False)

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy