diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 4efbbf9b2d880..70a4b607dcdca 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -69,6 +69,7 @@ public function __construct($node = null, string $uri = null, string $baseHref = { $this->uri = $uri; $this->baseHref = $baseHref ?: $uri; + $this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null; $this->add($node); } @@ -190,13 +191,7 @@ public function addContent($content, $type = null) public function addHtmlContent($content, $charset = 'UTF-8') { // Use HTML5 parser if the content is HTML5 and the library is available - if (!$this->html5Parser - && class_exists(HTML5::class) - && '' === strtolower(substr(ltrim($content), 0, 15))) { - $this->html5Parser = new HTML5(['disable_html_ns' => true]); - } - - $dom = null !== $this->html5Parser ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset); + $dom = null !== $this->html5Parser && strspn($content, " \t\r\n") === stripos($content, '') ? $this->parseHtml5($content, $charset) : $this->parseXhtml($content, $charset); $this->addDocument($dom); $base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']); @@ -599,18 +594,16 @@ public function html(/* $default = null */) throw new \InvalidArgumentException('The current node list is empty.'); } - if (null !== $this->html5Parser) { - $html = ''; - foreach ($this->getNode(0)->childNodes as $child) { - $html .= $this->html5Parser->saveHTML($child); - } + $node = $this->getNode(0); + $owner = $node->ownerDocument; - return $html; + if (null !== $this->html5Parser && '' === $owner->saveXML($owner->childNodes[0])) { + $owner = $this->html5Parser; } $html = ''; - foreach ($this->getNode(0)->childNodes as $child) { - $html .= $child->ownerDocument->saveHTML($child); + foreach ($node->childNodes as $child) { + $html .= $owner->saveHTML($child); } return $html;
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: