Skip to content

Commit 3a7f39c

Browse files
committed
Fixed an issue where unexpected elements in a badly nested table could be moved to the wrong location in the document.
Fixes jhy#552 Closes jhy#591
1 parent ec60867 commit 3a7f39c

File tree

4 files changed

+54
-1
lines changed

4 files changed

+54
-1
lines changed

CHANGES

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
jsoup changelog
22

33
*** Release 1.8.3 [PENDING]
4-
* Fix an issue in Element.getElementSiblingIndex (and related methods) where sibling elements with the same content
4+
* Fixed an issue in Element.getElementSiblingIndex (and related methods) where sibling elements with the same content
55
would incorrectly have the same sibling index.
66
<https://github.com/jhy/jsoup/issues/554>
77

8+
* Fixed an issue where unexpected elements in a badly nested table could be moved to the wrong location in the
9+
document.
10+
<https://github.com/jhy/jsoup/issues/552>
11+
812
*** Release 1.8.2 [2015-Apr-13]
913
* Performance improvements for parsing HTML on Android, of 1.5x to 1.9x, with larger parses getting a bigger
1014
speed increase. For non-Android JREs, around 1.1x to 1.2x.

src/main/java/org/jsoup/parser/TreeBuilder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,25 @@ protected void runParser() {
5858
protected abstract boolean process(Token token);
5959

6060
protected boolean processStartTag(String name) {
61+
if (currentToken == start) { // don't recycle an in-use token
62+
return process(new Token.StartTag().name(name));
63+
}
6164
return process(start.reset().name(name));
6265
}
6366

6467
public boolean processStartTag(String name, Attributes attrs) {
68+
if (currentToken == start) { // don't recycle an in-use token
69+
return process(new Token.StartTag().nameAttr(name, attrs));
70+
}
6571
start.reset();
6672
start.nameAttr(name, attrs);
6773
return process(start);
6874
}
6975

7076
protected boolean processEndTag(String name) {
77+
if (currentToken == end) { // don't recycle an in-use token
78+
return process(new Token.EndTag().name(name));
79+
}
7180
return process(end.reset().name(name));
7281
}
7382

src/test/java/org/jsoup/parser/HtmlParserTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
import org.jsoup.Jsoup;
44
import org.jsoup.TextUtil;
55
import org.jsoup.helper.StringUtil;
6+
import org.jsoup.integration.ParseTest;
67
import org.jsoup.nodes.*;
78
import org.jsoup.select.Elements;
89
import org.junit.Test;
910

11+
import java.io.File;
12+
import java.io.IOException;
1013
import java.util.List;
1114

1215
import static org.junit.Assert.assertEquals;
@@ -847,4 +850,24 @@ public class HtmlParserTest {
847850
assertEquals(50000, doc.body().childNodeSize());
848851
assertTrue(System.currentTimeMillis() - start < 1000);
849852
}
853+
854+
@Test
855+
public void testInvalidTableContents() throws IOException {
856+
File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
857+
Document doc = Jsoup.parse(in, "UTF-8");
858+
doc.outputSettings().prettyPrint(true);
859+
String rendered = doc.toString();
860+
int endOfEmail = rendered.indexOf("Comment");
861+
int guarantee = rendered.indexOf("Why am I here?");
862+
assertTrue("Comment not found", endOfEmail > -1);
863+
assertTrue("Search text not found", guarantee > -1);
864+
assertTrue("Search text did not come after comment", guarantee > endOfEmail);
865+
}
866+
867+
@Test public void testNormalisesIsIndex() {
868+
Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
869+
String html = doc.outerHtml();
870+
assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
871+
StringUtil.normaliseWhitespace(doc.body().html()));
872+
}
850873
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<html>
2+
<body>
3+
<table>
4+
<tr>
5+
<td>
6+
<table>
7+
<tr>
8+
<!--Comment-->
9+
<table>
10+
<p>Why am I here?</p>
11+
</tr>
12+
</table>
13+
</td>
14+
</tr>
15+
</table>
16+
</body>
17+
</html>

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy