Skip to content

Commit ba2ae30

Browse files
committed
[HtmlSanitizer] Introduce HtmlSanitizer component
1 parent b2e7fcd commit ba2ae30

31 files changed

+3800
-0
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
/Tests export-ignore
2+
/phpunit.xml.dist export-ignore
3+
/.gitattributes export-ignore
4+
/.gitignore export-ignore
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
composer.lock
2+
phpunit.xml
3+
vendor/
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
CHANGELOG
2+
=========
3+
4+
6.1.0
5+
-----
6+
7+
* added the component as experimental
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <fabien@symfony.com>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
namespace Symfony\Component\HtmlSanitizer;
13+
14+
use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser;
15+
use Symfony\Component\HtmlSanitizer\Parser\ParserInterface;
16+
use Symfony\Component\HtmlSanitizer\Reference\W3CReference;
17+
use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer;
18+
use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor;
19+
use function Symfony\Component\String\u;
20+
21+
/**
22+
* @author Titouan Galopin <galopintitouan@gmail.com>
23+
*/
24+
class HtmlSanitizer
25+
{
26+
private HtmlSanitizerConfig $config;
27+
private int $maxInputLength;
28+
private ParserInterface $parser;
29+
30+
/**
31+
* @var DomVisitor[]
32+
*/
33+
private array $domVisitors = [];
34+
35+
public function __construct(HtmlSanitizerConfig $config, int $maxInputLength = 20000, ParserInterface $parser = null)
36+
{
37+
$this->config = clone $config;
38+
$this->maxInputLength = $maxInputLength;
39+
$this->parser = $parser ?: new MastermindsParser();
40+
}
41+
42+
public function sanitize(string $input): string
43+
{
44+
return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input);
45+
}
46+
47+
public function sanitizeFor(string $element, string $input): string
48+
{
49+
return $this->sanitizeWithContext(W3CReference::CONTEXTS_MAP[$element] ?? W3CReference::CONTEXT_BODY, $input);
50+
}
51+
52+
private function sanitizeWithContext(string $context, string $input): string
53+
{
54+
// Text context: early return with HTML encoding
55+
if (W3CReference::CONTEXT_TEXT === $context) {
56+
return StringSanitizer::encodeHtmlEntities($input);
57+
}
58+
59+
// Other context: build a DOM visitor
60+
if (!isset($this->domVisitors[$context])) {
61+
$this->domVisitors[$context] = $this->createDomVisitorForContext($context);
62+
}
63+
64+
// Prevent DOS attack induced by extremely long HTML strings
65+
$uInput = u($input);
66+
if ($uInput->length() > $this->maxInputLength) {
67+
$input = $uInput->slice(0, $this->maxInputLength)->toString();
68+
}
69+
70+
/*
71+
* Only operate on valid UTF-8 strings. This is necessary to prevent cross
72+
* site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss).
73+
*/
74+
if (!$this->isValidUtf8($input)) {
75+
return '';
76+
}
77+
78+
// Remove NULL character
79+
$input = str_replace(\chr(0), '', $input);
80+
81+
try {
82+
$parsed = $this->parser->parse($input);
83+
} catch (\Exception) {
84+
return '';
85+
}
86+
87+
return $this->domVisitors[$context]->visit($parsed)->render();
88+
}
89+
90+
private function isValidUtf8(string $html): bool
91+
{
92+
// preg_match() fails silently on strings containing invalid UTF-8.
93+
return '' === $html || 1 === preg_match('/^./us', $html);
94+
}
95+
96+
private function createDomVisitorForContext(string $context)
97+
{
98+
$allowedNodes = [];
99+
100+
// Head: only a few elements are allowed
101+
if (W3CReference::CONTEXT_HEAD === $context) {
102+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
103+
if (\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) {
104+
$allowedNodes[$allowedElement] = $allowedAttributes;
105+
}
106+
}
107+
108+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
109+
if (\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) {
110+
$allowedNodes[$blockedElement] = false;
111+
}
112+
}
113+
114+
return new DomVisitor($allowedNodes, $this->config);
115+
}
116+
117+
// Body: allow any configured element that isn't in <head>
118+
foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) {
119+
if (!\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) {
120+
$allowedNodes[$allowedElement] = $allowedAttributes;
121+
}
122+
}
123+
124+
foreach ($this->config->getBlockedElements() as $blockedElement => $v) {
125+
if (!\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) {
126+
$allowedNodes[$blockedElement] = false;
127+
}
128+
}
129+
130+
return new DomVisitor($allowedNodes, $this->config);
131+
}
132+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy