|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file is part of the Symfony package. |
| 5 | + * |
| 6 | + * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | + * |
| 8 | + * For the full copyright and license information, please view the LICENSE |
| 9 | + * file that was distributed with this source code. |
| 10 | + */ |
| 11 | + |
| 12 | +namespace Symfony\Component\HtmlSanitizer; |
| 13 | + |
| 14 | +use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser; |
| 15 | +use Symfony\Component\HtmlSanitizer\Parser\ParserInterface; |
| 16 | +use Symfony\Component\HtmlSanitizer\Reference\W3CReference; |
| 17 | +use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; |
| 18 | +use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor; |
| 19 | +use function Symfony\Component\String\u; |
| 20 | + |
| 21 | +/** |
| 22 | + * @author Titouan Galopin <galopintitouan@gmail.com> |
| 23 | + */ |
| 24 | +class HtmlSanitizer |
| 25 | +{ |
| 26 | + private HtmlSanitizerConfig $config; |
| 27 | + private int $maxInputLength; |
| 28 | + private ParserInterface $parser; |
| 29 | + |
| 30 | + /** |
| 31 | + * @var DomVisitor[] |
| 32 | + */ |
| 33 | + private array $domVisitors = []; |
| 34 | + |
| 35 | + public function __construct(HtmlSanitizerConfig $config, int $maxInputLength = 20000, ParserInterface $parser = null) |
| 36 | + { |
| 37 | + $this->config = clone $config; |
| 38 | + $this->maxInputLength = $maxInputLength; |
| 39 | + $this->parser = $parser ?: new MastermindsParser(); |
| 40 | + } |
| 41 | + |
| 42 | + public function sanitize(string $input): string |
| 43 | + { |
| 44 | + return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input); |
| 45 | + } |
| 46 | + |
| 47 | + public function sanitizeFor(string $element, string $input): string |
| 48 | + { |
| 49 | + return $this->sanitizeWithContext(W3CReference::CONTEXTS_MAP[$element] ?? W3CReference::CONTEXT_BODY, $input); |
| 50 | + } |
| 51 | + |
| 52 | + private function sanitizeWithContext(string $context, string $input): string |
| 53 | + { |
| 54 | + // Text context: early return with HTML encoding |
| 55 | + if (W3CReference::CONTEXT_TEXT === $context) { |
| 56 | + return StringSanitizer::encodeHtmlEntities($input); |
| 57 | + } |
| 58 | + |
| 59 | + // Other context: build a DOM visitor |
| 60 | + if (!isset($this->domVisitors[$context])) { |
| 61 | + $this->domVisitors[$context] = $this->createDomVisitorForContext($context); |
| 62 | + } |
| 63 | + |
| 64 | + // Prevent DOS attack induced by extremely long HTML strings |
| 65 | + $uInput = u($input); |
| 66 | + if ($uInput->length() > $this->maxInputLength) { |
| 67 | + $input = $uInput->slice(0, $this->maxInputLength)->toString(); |
| 68 | + } |
| 69 | + |
| 70 | + /* |
| 71 | + * Only operate on valid UTF-8 strings. This is necessary to prevent cross |
| 72 | + * site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss). |
| 73 | + */ |
| 74 | + if (!$this->isValidUtf8($input)) { |
| 75 | + return ''; |
| 76 | + } |
| 77 | + |
| 78 | + // Remove NULL character |
| 79 | + $input = str_replace(\chr(0), '', $input); |
| 80 | + |
| 81 | + try { |
| 82 | + $parsed = $this->parser->parse($input); |
| 83 | + } catch (\Exception) { |
| 84 | + return ''; |
| 85 | + } |
| 86 | + |
| 87 | + return $this->domVisitors[$context]->visit($parsed)->render(); |
| 88 | + } |
| 89 | + |
| 90 | + private function isValidUtf8(string $html): bool |
| 91 | + { |
| 92 | + // preg_match() fails silently on strings containing invalid UTF-8. |
| 93 | + return '' === $html || 1 === preg_match('/^./us', $html); |
| 94 | + } |
| 95 | + |
| 96 | + private function createDomVisitorForContext(string $context) |
| 97 | + { |
| 98 | + $allowedNodes = []; |
| 99 | + |
| 100 | + // Head: only a few elements are allowed |
| 101 | + if (W3CReference::CONTEXT_HEAD === $context) { |
| 102 | + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { |
| 103 | + if (\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) { |
| 104 | + $allowedNodes[$allowedElement] = $allowedAttributes; |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { |
| 109 | + if (\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) { |
| 110 | + $allowedNodes[$blockedElement] = false; |
| 111 | + } |
| 112 | + } |
| 113 | + |
| 114 | + return new DomVisitor($allowedNodes, $this->config); |
| 115 | + } |
| 116 | + |
| 117 | + // Body: allow any configured element that isn't in <head> |
| 118 | + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { |
| 119 | + if (!\in_array($allowedElement, W3CReference::HEAD_ELEMENTS, true)) { |
| 120 | + $allowedNodes[$allowedElement] = $allowedAttributes; |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { |
| 125 | + if (!\in_array($blockedElement, W3CReference::HEAD_ELEMENTS, true)) { |
| 126 | + $allowedNodes[$blockedElement] = false; |
| 127 | + } |
| 128 | + } |
| 129 | + |
| 130 | + return new DomVisitor($allowedNodes, $this->config); |
| 131 | + } |
| 132 | +} |
0 commit comments