Skip to content

Commit 8e12e67

Browse files
author
Edward Z. Yang ext:(%22)
committed
Implement revised table foster parenting algo from r3382
1 parent e130ac0 commit 8e12e67

File tree

2 files changed

+90
-32
lines changed

2 files changed

+90
-32
lines changed

SPEC

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1-
3354
1+
3382
2+
3+
This is the last revision of the spec this library has been audited against.
4+
5+
Excluding: 3374
26

3-
(this is the last revision of the spec this library has been audited against)

library/HTML5/TreeBuilder.php

Lines changed: 85 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,9 @@ class HTML5_TreeBuilder {
7070
'p','param','plaintext','pre','script','select','spacer','style',
7171
'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
7272

73+
private $pendingTableCharacters;
74+
private $pendingTableCharactersDirty;
75+
7376
// Tree construction modes
7477
const INITIAL = 0;
7578
const BEFORE_HTML = 1;
@@ -80,19 +83,20 @@ class HTML5_TreeBuilder {
8083
const IN_BODY = 6;
8184
const IN_CDATA_RCDATA = 7;
8285
const IN_TABLE = 8;
83-
const IN_CAPTION = 9;
84-
const IN_COLUMN_GROUP = 10;
85-
const IN_TABLE_BODY = 11;
86-
const IN_ROW = 12;
87-
const IN_CELL = 13;
88-
const IN_SELECT = 14;
89-
const IN_SELECT_IN_TABLE= 15;
90-
const IN_FOREIGN_CONTENT= 16;
91-
const AFTER_BODY = 17;
92-
const IN_FRAMESET = 18;
93-
const AFTER_FRAMESET = 19;
94-
const AFTER_AFTER_BODY = 20;
95-
const AFTER_AFTER_FRAMESET = 21;
86+
const IN_TABLE_TEXT = 9;
87+
const IN_CAPTION = 10;
88+
const IN_COLUMN_GROUP = 11;
89+
const IN_TABLE_BODY = 12;
90+
const IN_ROW = 13;
91+
const IN_CELL = 14;
92+
const IN_SELECT = 15;
93+
const IN_SELECT_IN_TABLE= 16;
94+
const IN_FOREIGN_CONTENT= 17;
95+
const AFTER_BODY = 18;
96+
const IN_FRAMESET = 19;
97+
const AFTER_FRAMESET = 20;
98+
const AFTER_AFTER_BODY = 21;
99+
const AFTER_AFTER_FRAMESET = 22;
96100

97101
/**
98102
* Converts a magic number to a readable name. Use for debugging.
@@ -1940,17 +1944,21 @@ public function emitToken($token, $mode = null) {
19401944
case self::IN_TABLE:
19411945
$clear = array('html', 'table');
19421946

1943-
/* A character token that is one of one of U+0009 CHARACTER TABULATION,
1944-
U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
1945-
or U+0020 SPACE */
1946-
if($token['type'] === HTML5_Tokenizer::SPACECHARACTER &&
1947-
/* If the current table is tainted, then act as described in
1948-
* the "anything else" entry below. */
1949-
// Note: hsivonen has a test that fails due to this line
1950-
// because he wants to convince Hixie not to do taint
1951-
!$this->currentTableIsTainted()) {
1952-
/* Append the character to the current node. */
1953-
$this->insertText($token['data']);
1947+
/* A character token */
1948+
if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
1949+
$token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
1950+
/* Let the pending table character tokens
1951+
* be an empty list of tokens. */
1952+
$this->pendingTableCharacters = "";
1953+
$this->pendingTableCharactersDirty = false;
1954+
/* Let the original insertion mode be the current
1955+
* insertion mode. */
1956+
$this->original_mode = $this->mode;
1957+
/* Switch the insertion mode to
1958+
* "in table text" and
1959+
* reprocess the token. */
1960+
$this->mode = self::IN_TABLE_TEXT;
1961+
$this->emitToken($token);
19541962

19551963
/* A comment token */
19561964
} elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
@@ -2096,6 +2104,57 @@ public function emitToken($token, $mode = null) {
20962104
}
20972105
break;
20982106

2107+
case self::IN_TABLE_TEXT:
2108+
/* A character token */
2109+
if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2110+
/* Append the character token to the pending table
2111+
* character tokens list. */
2112+
$this->pendingTableCharacters .= $token['data'];
2113+
$this->pendingTableCharactersDirty = true;
2114+
} elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2115+
$this->pendingTableCharacters .= $token['data'];
2116+
/* Anything else */
2117+
} else {
2118+
if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2119+
/* If any of the tokens in the pending table character tokens list
2120+
* are character tokens that are not one of U+0009 CHARACTER
2121+
* TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2122+
* U+0020 SPACE, then reprocess those character tokens using the
2123+
* rules given in the "anything else" entry in the in table"
2124+
* insertion mode.*/
2125+
if ($this->pendingTableCharactersDirty) {
2126+
/* Parse error. Process the token using the rules for the
2127+
* "in body" insertion mode, except that if the current
2128+
* node is a table, tbody, tfoot, thead, or tr element,
2129+
* then, whenever a node would be inserted into the current
2130+
* node, it must instead be foster parented. */
2131+
// XERROR
2132+
$old = $this->foster_parent;
2133+
$this->foster_parent = true;
2134+
$text_token = array(
2135+
'type' => HTML5_Tokenizer::CHARACTER,
2136+
'data' => $this->pendingTableCharacters,
2137+
);
2138+
$this->processWithRulesFor($text_token, self::IN_BODY);
2139+
$this->foster_parent = $old;
2140+
2141+
/* Otherwise, insert the characters given by the pending table
2142+
* character tokens list into the current node. */
2143+
} else {
2144+
$this->insertText($this->pendingTableCharacters);
2145+
}
2146+
$this->pendingTableCharacters = null;
2147+
$this->pendingTableCharactersNull = null;
2148+
}
2149+
2150+
/* Switch the insertion mode to the original insertion mode and
2151+
* reprocess the token.
2152+
*/
2153+
$this->mode = $this->original_mode;
2154+
$this->emitToken($token);
2155+
}
2156+
break;
2157+
20992158
case self::IN_CAPTION:
21002159
/* An end tag whose tag name is "caption" */
21012160
if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
@@ -3458,12 +3517,8 @@ private function getFosterParent() {
34583517
public function fosterParent($node) {
34593518
$foster_parent = $this->getFosterParent();
34603519
$table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3461-
/* When a node node is to be foster parented, the node node must be
3462-
* inserted into the foster parent element, and the current table must
3463-
* be marked as tainted. (Once the current table has been tainted,
3464-
* whitespace characters are inserted into the foster parent element
3465-
* instead of the current node.) */
3466-
$table->tainted = true;
3520+
/* When a node node is to be foster parented, the node node must be
3521+
* be inserted into the foster parent element. */
34673522
/* If the foster parent element is the parent element of the last table
34683523
* element in the stack of open elements, then node must be inserted
34693524
* immediately before the last table element in the stack of open

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy