diff --git a/library/HTML5/TreeBuilder.php b/library/HTML5/TreeBuilder.php index c6e4b1f..359e225 100644 --- a/library/HTML5/TreeBuilder.php +++ b/library/HTML5/TreeBuilder.php @@ -31,6 +31,7 @@ // XERROR - with regards to parse errors // XSCRIPT - with regards to scripting mode // XENCODING - with regards to encoding (for reparsing tests) +// XSKETCHY - godawful workarounds class HTML5_TreeBuilder { public $stack = array(); @@ -128,6 +129,15 @@ private function strConst($number) { const NS_XML = 'http://www.w3.org/XML/1998/namespace'; const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; + public $nsToPrefix = array( + self::NS_HTML => '', + self::NS_MATHML => 'math:', + self::NS_SVG => 'svg:', + self::NS_XLINK => 'xlink:', + self::NS_XML => 'xml:', + self::NS_XMLNS => 'xmlns:', + ); + public function __construct() { $this->mode = self::INITIAL; $this->dom = new DOMDocument; @@ -140,7 +150,6 @@ public function __construct() { // Process tag tokens public function emitToken($token, $mode = null) { - // XXX: ignore parse errors... why are we emitting them, again? if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return; if ($mode === null) $mode = $this->mode; @@ -195,9 +204,6 @@ public function emitToken($token, $mode = null) { * doctype attribute of the Document object. */ if (!isset($token['public'])) $token['public'] = null; if (!isset($token['system'])) $token['system'] = null; - // Yes this is hacky. I'm kind of annoyed that I can't appendChild - // a doctype to DOMDocument. Maybe I haven't chanted the right - // syllables. $impl = new DOMImplementation(); // This call can fail for particularly pathological cases (namely, // the qualifiedName parameter ($token['name']) could be missing. @@ -1753,7 +1759,7 @@ public function emitToken($token, $mode = null) { * elements with an entry for the new element, and * let node be the new element. */ // we don't know what the token is anymore - $clone = $node->cloneNode(); + $clone = $this->cloneNode($node); $a_pos = array_search($node, $this->a_formatting, true); $s_pos = array_search($node, $this->stack, true); $this->a_formatting[$a_pos] = $clone; @@ -1794,7 +1800,7 @@ public function emitToken($token, $mode = null) { /* 8. Create an element for the token for which the * formatting element was created. */ - $clone = $formatting_element->cloneNode(); + $clone = $this->cloneNode($formatting_element); /* 9. Take all of the child nodes of the furthest block and append them to the element created in the @@ -3177,7 +3183,7 @@ private function reconstructActiveFormattingElements() { } /* 8. Perform a shallow clone of the element entry to obtain clone. */ - $clone = $entry->cloneNode(); + $clone = $this->cloneNode($entry); /* 9. Append clone to the current node and push it onto the stack of open elements so that it is the new current node. */ @@ -3672,22 +3678,25 @@ public function insertForeignElement($token, $namespaceURI) { if (!empty($token['attr'])) { foreach ($token['attr'] as $kp) { $attr = $kp['name']; + // XSKETCHY: this entire thing is a hack to get around + // DOM's really bad XML implementation if (is_array($attr)) { $ns = $attr[2]; $attr = $attr[1]; } else { $ns = self::NS_HTML; } - if (!$el->hasAttributeNS($ns, $attr)) { - // XSKETCHY: work around godawful libxml bug - if ($ns === self::NS_XLINK) { - $el->setAttribute('xlink:'.$attr, $kp['value']); - } elseif ($ns === self::NS_HTML) { - // Another godawful libxml bug - $el->setAttribute($attr, $kp['value']); - } else { - $el->setAttributeNS($ns, $attr, $kp['value']); + if ($ns === self::NS_XML) { + // this is special cased since DOM converts xml:lang + // into lang + $el->setAttributeNS($ns, $attr, $kp['value']); + } else { + $prefix = $this->nsToPrefix[$ns]; + $el->setAttribute($prefix.$attr, $kp['value']); + if (!isset($el->html5_namespaced)) { + $el->html5_namespaced = array(); } + $el->html5_namespaced[$prefix.$attr] = true; } } } @@ -3701,6 +3710,14 @@ public function insertForeignElement($token, $namespaceURI) { * value is not the XLink Namespace, that is a parse error. */ } + private function cloneNode($node) { + $clone = $node->cloneNode(); + if (isset($node->html5_namespaced)) { + $clone->html5_namespaced = $node->html5_namespaced; + } + return $clone; + } + public function save() { $this->dom->normalize(); if (!$this->fragment) { diff --git a/tests/HTML5/TestData.php b/tests/HTML5/TestData.php index 2f72142..5cea72f 100644 --- a/tests/HTML5/TestData.php +++ b/tests/HTML5/TestData.php @@ -117,22 +117,16 @@ public static function strDom($node, $prefix = '| ') { } $text = "<{$ns}{$next->tagName}>"; foreach ($next->attributes as $attr) { - $ans = ''; - switch ($attr->namespaceURI) { - case HTML5_TreeBuilder::NS_MATHML: - $ans = 'math '; break; - case HTML5_TreeBuilder::NS_SVG: - $ans = 'svg '; break; - case HTML5_TreeBuilder::NS_XLINK: - $ans = 'xlink '; break; - case HTML5_TreeBuilder::NS_XML: - $ans = 'xml '; break; - case HTML5_TreeBuilder::NS_XMLNS: - $ans = 'xmlns '; break; + // XSKETCHY + $name = $attr->name; + if ($attr->namespaceURI === HTML5_TreeBuilder::NS_XML) { + $name = "xml $name"; + } else { + if (isset($next->html5_namespaced[$name])) { + $name = str_replace(':', ' ', $name); + } } - // XSKETCHY: needed for our horrible xlink hack - $name = str_replace(':', ' ', $attr->localName); - $subnodes[] = "{$ans}{$name}=\"{$attr->value}\""; + $subnodes[] = "{$name}=\"{$attr->value}\""; } sort($subnodes); break; pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy