Skip to content

Commit a025d87

Browse files
committed
Escape < in XML attributes
Fixes jhy#528
1 parent 1e09df6 commit a025d87

File tree

3 files changed

+21
-2
lines changed

3 files changed

+21
-2
lines changed

CHANGES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ jsoup changelog
2828
rules as a URL read from Nodes.absUrl(String).
2929
<https://github.com/jhy/jsoup/issues/585>
3030

31+
* When serialising XML, ensure that '<' characters in attributes are escaped, per spec. Not required in HTML.
32+
<https://github.com/jhy/jsoup/issues/528>
33+
3134
*** Release 1.8.2 [2015-Apr-13]
3235
* Performance improvements for parsing HTML on Android, of 1.5x to 1.9x, with larger parses getting a bigger
3336
speed increase. For non-Android JREs, around 1.1x to 1.2x.

src/main/java/org/jsoup/nodes/Entities.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ static void escape(StringBuilder accum, String string, Document.OutputSettings o
118118
accum.append("&#xa0;");
119119
break;
120120
case '<':
121-
if (!inAttribute)
121+
// escape when in character data or when in a xml attribue val; not needed in html attr val
122+
if (!inAttribute || escapeMode == EscapeMode.xhtml)
122123
accum.append("&lt;");
123124
else
124125
accum.append(c);

src/test/java/org/jsoup/nodes/EntitiesTest.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ public class EntitiesTest {
1414
String escapedAscii = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(base));
1515
String escapedAsciiFull = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(extended));
1616
String escapedAsciiXhtml = Entities.escape(text, new OutputSettings().charset("ascii").escapeMode(xhtml));
17-
String escapedUtfFull = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(base));
17+
String escapedUtfFull = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(extended));
1818
String escapedUtfMin = Entities.escape(text, new OutputSettings().charset("UTF-8").escapeMode(xhtml));
1919

2020
assertEquals("Hello &amp;&lt;&gt; &Aring; &aring; &#x3c0; &#x65b0; there &frac34; &copy; &raquo;", escapedAscii);
@@ -86,4 +86,19 @@ public class EntitiesTest {
8686
String string = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2";
8787
assertEquals(string, Entities.unescape(string));
8888
}
89+
90+
@Test public void escapesGtInXmlAttributesButNotInHtml() {
91+
// https://github.com/jhy/jsoup/issues/528 - < is OK in HTML attribute values, but not in XML
92+
93+
94+
String docHtml = "<a title='<p>One</p>'>One</a>";
95+
Document doc = Jsoup.parse(docHtml);
96+
Element element = doc.select("a").first();
97+
98+
doc.outputSettings().escapeMode(base);
99+
assertEquals("<a title=\"<p>One</p>\">One</a>", element.outerHtml());
100+
101+
doc.outputSettings().escapeMode(xhtml);
102+
assertEquals("<a title=\"&lt;p>One&lt;/p>\">One</a>", element.outerHtml());
103+
}
89104
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy