Skip to content

Commit e4e9e21

Browse files
Greg Gutheg-k
authored andcommitted
fix bug 1621692
1 parent 0d88dd8 commit e4e9e21

File tree

2 files changed

+37
-3
lines changed

2 files changed

+37
-3
lines changed

bleach/html5lib_shim.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,18 @@ def next_possible_entity(text):
533533

534534

535535
class BleachHTMLSerializer(HTMLSerializer):
536-
"""HTMLSerializer that undoes & -> & in attributes"""
536+
"""HTMLSerializer that undoes & -> & in attributes and sets
537+
escape_rcdata to True
538+
"""
539+
540+
# per the HTMLSerializer.__init__ docstring:
541+
#
542+
# Whether to escape characters that need to be
543+
# escaped within normal elements within rcdata elements such as
544+
# style.
545+
#
546+
escape_rcdata = True
547+
537548
def escape_base_amp(self, stoken):
538549
"""Escapes just bare & in HTML attribute values"""
539550
# First, undo escaping of &. We need to do this because html5lib's

tests/test_clean.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from bleach import clean
66
from bleach.html5lib_shim import Filter
77
from bleach.sanitizer import Cleaner
8-
8+
from bleach._vendor.html5lib.constants import rcdataElements
99

1010
def test_clean_idempotent():
1111
"""Make sure that applying the filter twice doesn't change anything."""
@@ -787,7 +787,7 @@ def test_nonexistent_namespace():
787787
(
788788
raw_tag,
789789
"<noscript><%s></noscript><img src=x onerror=alert(1) />" % raw_tag,
790-
"<noscript><%s></noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
790+
"<noscript>&lt;%s&gt;</noscript>&lt;img src=x onerror=alert(1) /&gt;" % raw_tag,
791791
)
792792
for raw_tag in _raw_tags
793793
],
@@ -797,6 +797,29 @@ def test_noscript_rawtag_(raw_tag, data, expected):
797797
assert clean(data, tags=["noscript", raw_tag]) == expected
798798

799799

800+
@pytest.mark.parametrize(
801+
"namespace_tag, rc_data_element_tag, data, expected",
802+
[
803+
(
804+
namespace_tag,
805+
rc_data_element_tag,
806+
"<%s><%s><img src=x onerror=alert(1)>" % (namespace_tag, rc_data_element_tag),
807+
"<%s><%s>&lt;img src=x onerror=alert(1)&gt;</%s></%s>" % (namespace_tag, rc_data_element_tag, rc_data_element_tag, namespace_tag),
808+
)
809+
for namespace_tag in ["math", "svg"]
810+
# https://dev.w3.org/html5/html-author/#rcdata-elements
811+
# https://html.spec.whatwg.org/index.html#parsing-html-fragments
812+
# in html5lib: 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', and 'noscript'
813+
for rc_data_element_tag in rcdataElements
814+
],
815+
)
816+
def test_namespace_rc_data_element_strip_false(namespace_tag, rc_data_element_tag, data, expected):
817+
# refs: bug 1621692 / GHSA-m6xf-fq7q-8743
818+
#
819+
# browsers will pull the img out of the namespace and rc data tag resulting in XSS
820+
assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
821+
822+
800823
def get_ids_and_tests():
801824
"""Retrieves regression tests from data/ directory
802825

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy