From c91c5ce960e2c67b79cd094d22a5d4c82bfdca35 Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Sun, 31 Mar 2024 16:45:07 +0200 Subject: [PATCH 1/6] Increase HTML standard compliance for closing comment tags --- Lib/_markupbase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 3ad7e279960f7e..2e2e00dd4b1f6f 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -9,7 +9,7 @@ _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match -_commentclose = re.compile(r'--\s*>') +_commentclose = re.compile(r'--!?>') _markedsectionclose = re.compile(r']\s*]\s*>') # An analysis of the MS-Word extensions is available at From 4536d8bbc618d3de3251450457220a9b9697a52a Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 31 Mar 2024 14:57:20 +0000 Subject: [PATCH 2/6] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst diff --git a/Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst b/Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst new file mode 100644 index 00000000000000..f031c37774433c --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-03-31-14-57-20.gh-issue-102555.2P8jGn.rst @@ -0,0 +1 @@ +Follow the `parsing recommendation `_ and `standard `_ for closing comment tag in the :mod:`html.parser`. Increased compliance leads to predictable behavior, thus enhancing security. From 9147ff64f87e8c268300be493889ab2b4aefc77f Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Sat, 6 Apr 2024 15:00:01 +0200 Subject: [PATCH 3/6] Add more edge cases and tests --- Lib/_markupbase.py | 4 ++-- Lib/test/test_htmlparser.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 2e2e00dd4b1f6f..3c6986600eb64c 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -81,7 +81,7 @@ def parse_declaration(self, i): # A simple, practical version could look like: ((name|stringlit) S*) + '>' n = len(rawdata) if rawdata[j:j+2] == '--': #comment - # Locate --.*-- as the body of the comment + # Locate the body of the comment. return self.parse_comment(i) elif rawdata[j] == '[': #marked section # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section @@ -166,7 +166,7 @@ def parse_comment(self, i, report=1): rawdata = self.rawdata if rawdata[i:i+4] != '' '' '' - '') + '' + '' + '' + '' + '') expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), ('comment', ''), ('comment', '--I have many hyphens--'), ('comment', ' I have a > in the middle '), - ('comment', ' and I have -- in the middle! ')] + ('comment', ' and I have -- in the middle! '), + ('comment', ''), + ('comment', ''), + ('comment', ' Date: Sat, 6 Apr 2024 15:18:49 +0200 Subject: [PATCH 4/6] Add invalid HTML comment closing tags test cases --- Lib/test/test_htmlparser.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 3c5c87d1ba72ba..937be33061e8ce 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -328,7 +328,10 @@ def test_comments(self): '' '' '' - '') + '' + '' + '' + '') expected = [('comment', " I'm a valid comment "), ('comment', 'me too!'), ('comment', '--'), @@ -339,7 +342,11 @@ def test_comments(self): ('comment', ''), ('comment', ''), ('comment', ''), + ('comment', 'Me too (invalid character) --x>'), + ('comment', 'Me too (invalid characters) --cheese>') + ] self._run_check(html, expected) def test_condcoms(self): From 9e687bf2c2aeb83cd7522e3d8ead17520ad27b8b Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Sat, 6 Apr 2024 15:27:32 +0200 Subject: [PATCH 5/6] Add html closing comment tags test cases Handle the test cases mentioned in #102555 --- Lib/test/test_htmlparser.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 937be33061e8ce..62fc4403a1fe30 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -329,6 +329,8 @@ def test_comments(self): '' '' '' + '' '' '' '') @@ -343,6 +345,8 @@ def test_comments(self): ('comment', ''), ('comment', ''), ('comment', 'Me too (invalid character) --x>'), ('comment', 'Me too (invalid characters) --cheese>') From caba26781b36a0e2e7309d361b128434f6a64419 Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Sat, 6 Apr 2024 16:15:30 +0200 Subject: [PATCH 6/6] Add EOF abrupted comment tag case handling and tests --- Lib/_markupbase.py | 10 ++++++++-- Lib/html/parser.py | 2 +- Lib/test/test_htmlparser.py | 7 ++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/Lib/_markupbase.py b/Lib/_markupbase.py index 3c6986600eb64c..ac95f44b69a316 100644 --- a/Lib/_markupbase.py +++ b/Lib/_markupbase.py @@ -161,13 +161,19 @@ def parse_marked_section(self, i, report=1): self.unknown_decl(rawdata[i+3: j]) return match.end(0) - # Internal -- parse comment, return length or -1 if not terminated - def parse_comment(self, i, report=1): + # Internal -- parse comment + # if end is True, returns EOF location if no close tag is found, otherwise + # return length or -1 if not terminated + def parse_comment(self, i, report=1, end=False): rawdata = self.rawdata if rawdata[i:i+4] != '' '' '' - '') + '' + '

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy