diff --git a/src/Symfony/Component/BrowserKit/AbstractBrowser.php b/src/Symfony/Component/BrowserKit/AbstractBrowser.php index f69beb5da91aa..bf5ff90e5dd60 100644 --- a/src/Symfony/Component/BrowserKit/AbstractBrowser.php +++ b/src/Symfony/Component/BrowserKit/AbstractBrowser.php @@ -37,6 +37,7 @@ abstract class AbstractBrowser protected $internalResponse; protected $response; protected $crawler; + protected bool $useHtml5Parser = true; protected $insulated = false; protected $redirect; protected $followRedirects = true; @@ -207,6 +208,18 @@ public function getCrawler(): Crawler return $this->crawler; } + /** + * Sets whether parsing should be done using "masterminds/html5". + * + * @return $this + */ + public function useHtml5Parser(bool $useHtml5Parser): static + { + $this->useHtml5Parser = $useHtml5Parser; + + return $this; + } + /** * Returns the current BrowserKit Response instance. */ @@ -497,7 +510,7 @@ protected function createCrawlerFromContent(string $uri, string $content, string return null; } - $crawler = new Crawler(null, $uri); + $crawler = new Crawler(null, $uri, null, $this->useHtml5Parser); $crawler->addContent($content, $type); return $crawler; diff --git a/src/Symfony/Component/BrowserKit/CHANGELOG.md b/src/Symfony/Component/BrowserKit/CHANGELOG.md index a730a86bf4e70..2d2ea9a75c2c8 100644 --- a/src/Symfony/Component/BrowserKit/CHANGELOG.md +++ b/src/Symfony/Component/BrowserKit/CHANGELOG.md @@ -1,6 +1,11 @@ CHANGELOG ========= +6.3 +--- + + * Add `AbstractBrowser::useHtml5Parser()` + 6.1 --- diff --git a/src/Symfony/Component/DomCrawler/CHANGELOG.md b/src/Symfony/Component/DomCrawler/CHANGELOG.md index 1cd98759a3270..be1c0ba143f92 100644 --- a/src/Symfony/Component/DomCrawler/CHANGELOG.md +++ b/src/Symfony/Component/DomCrawler/CHANGELOG.md @@ -4,6 +4,7 @@ CHANGELOG 6.3 --- + * Add `$useHtml5Parser` argument to `Crawler` * Add `CrawlerSelectorCount` test constraint * Add argument `$normalizeWhitespace` to `Crawler::innerText()` * Make `Crawler::innerText()` return the first non-empty text diff --git a/src/Symfony/Component/DomCrawler/Crawler.php b/src/Symfony/Component/DomCrawler/Crawler.php index 8176fee4e6d4d..59eec3068c9e7 100644 --- a/src/Symfony/Component/DomCrawler/Crawler.php +++ b/src/Symfony/Component/DomCrawler/Crawler.php @@ -58,16 +58,17 @@ class Crawler implements \Countable, \IteratorAggregate */ private bool $isHtml = true; - private HTML5 $html5Parser; + + private ?HTML5 $html5Parser = null; /** * @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $node A Node to use as the base for the crawling */ - public function __construct(\DOMNodeList|\DOMNode|array|string $node = null, string $uri = null, string $baseHref = null) + public function __construct(\DOMNodeList|\DOMNode|array|string $node = null, string $uri = null, string $baseHref = null, bool $useHtml5Parser = true) { $this->uri = $uri; $this->baseHref = $baseHref ?: $uri; - $this->html5Parser = new HTML5(['disable_html_ns' => true]); + $this->html5Parser = $useHtml5Parser ? new HTML5(['disable_html_ns' => true]) : null; $this->cachedNamespaces = new \ArrayObject(); $this->add($node); @@ -621,7 +622,7 @@ public function html(string $default = null): string $node = $this->getNode(0); $owner = $node->ownerDocument; - if ('' === $owner->saveXML($owner->childNodes[0])) { + if ($this->html5Parser && '' === $owner->saveXML($owner->childNodes[0])) { $owner = $this->html5Parser; } @@ -642,7 +643,7 @@ public function outerHtml(): string $node = $this->getNode(0); $owner = $node->ownerDocument; - if ('' === $owner->saveXML($owner->childNodes[0])) { + if ($this->html5Parser && '' === $owner->saveXML($owner->childNodes[0])) { $owner = $this->html5Parser; } @@ -1215,6 +1216,10 @@ private function parseHtmlString(string $content, string $charset): \DOMDocument private function canParseHtml5String(string $content): bool { + if (!$this->html5Parser) { + return false; + } + if (false === ($pos = stripos($content, ''))) { return false; } diff --git a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php index 840a1c8263dad..e682ff405a349 100644 --- a/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php +++ b/src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTestCase.php @@ -21,9 +21,9 @@ abstract class AbstractCrawlerTestCase extends TestCase { abstract public static function getDoctype(): string; - protected function createCrawler($node = null, string $uri = null, string $baseHref = null) + protected function createCrawler($node = null, string $uri = null, string $baseHref = null, bool $useHtml5Parser = true) { - return new Crawler($node, $uri, $baseHref); + return new Crawler($node, $uri, $baseHref, $useHtml5Parser); } public function testConstructor() diff --git a/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php b/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php index 82558a7dd1d80..ceba8e7c06e90 100644 --- a/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php +++ b/src/Symfony/Component/DomCrawler/Tests/Html5ParserCrawlerTest.php @@ -46,6 +46,20 @@ public function testHtml5ParserWithInvalidHeadedContent(string $content) self::assertEmpty($crawler->filterXPath('//h1')->text(), '->addHtmlContent failed as expected'); } + public function testHtml5ParserNotSameAsNativeParserForSpecificHtml() + { + // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596) + $html = $this->getDoctype().'
Foo
'; + + $html5Crawler = $this->createCrawler(null, null, null, true); + $html5Crawler->add($html); + + $nativeCrawler = $this->createCrawler(null, null, null, false); + $nativeCrawler->add($html); + + $this->assertNotEquals($nativeCrawler->filterXPath('//h1')->text(), $html5Crawler->filterXPath('//h1')->text(), 'Native parser and Html5 parser must be different'); + } + public static function validHtml5Provider(): iterable { $html = self::getDoctype().'Foo
';Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: