From 196e0bd39bf070f26d682d23762320febcf50d63 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Jul 2025 13:07:15 +0300 Subject: [PATCH 1/2] [3.14] gh-135661: Fix parsing attributes with whitespaces around the "=" separator in HTMLParser (GH-136908) This fixes a regression introduced in GH-135930. (cherry picked from commit dee650189497735edbc08a54edabb5b06ef1bd09) Co-authored-by: Serhiy Storchaka --- Lib/html/parser.py | 4 +-- Lib/test/test_htmlparser.py | 28 +++++++++++-------- Misc/NEWS.d/3.14.0b4.rst | 2 +- ...-07-21-14-15-25.gh-issue-135661.nAxXw5.rst | 2 ++ 4 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2025-07-21-14-15-25.gh-issue-135661.nAxXw5.rst diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 9b4f09599134bd..7eea885cfe63c5 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -45,7 +45,7 @@ ( (?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name ) - (= # value indicator + ([\t\n\r\f ]*=[\t\n\r\f ]* # value indicator ('[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value @@ -57,7 +57,7 @@ [a-zA-Z][^\t\n\r\f />]* # tag name [\t\n\r\f /]* # optional whitespace before attribute name (?:(?<=['"\t\n\r\f /])[^\t\n\r\f />][^\t\n\r\f /=>]* # attribute name - (?:= # value indicator + (?:[\t\n\r\f ]*=[\t\n\r\f ]* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\t\n\r\f ]* # bare value diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 15cad061889a79..47c0752fb517b9 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -623,7 +623,7 @@ def test_correct_detection_of_start_tags(self): html = '
The rain' expected = [ - ('starttag', 'div', [('style', ''), (',', None), ('foo', None), ('=', None), ('"bar"', None)]), + ('starttag', 'div', [('style', ''), (',', None), ('foo', 'bar')]), ('starttag', 'b', []), ('data', 'The '), ('starttag', 'a', [('href', 'some_url')]), @@ -813,12 +813,12 @@ def test_attr_syntax(self): ] self._run_check("""""", output) self._run_check("", [('starttag', 'a', [('foo', '=bar')])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', None), ('=bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\v', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo\xa0', 'bar')])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) - self._run_check("", [('starttag', 'a', [('foo', ''), ('bar', None)])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) + self._run_check("", [('starttag', 'a', [('foo', 'bar')])]) self._run_check("", [('starttag', 'a', [('foo', '\vbar')])]) self._run_check("", [('starttag', 'a', [('foo', '\xa0bar')])]) @@ -829,8 +829,8 @@ def test_attr_values(self): ("d", "\txyz\n")])]) self._run_check("""""", [("starttag", "a", [("b", ""), ("c", "")])]) - self._run_check("", - [("starttag", "a", [("b", ""), ("c", "")])]) + self._run_check("", + [('starttag', 'a', [('b', 'x'), ('c', 'y')])]) self._run_check("", [("starttag", "a", [("b", "\v"), ("c", "\xa0")])]) # Regression test for SF patch #669683. @@ -899,13 +899,17 @@ def test_malformed_attributes(self): ) expected = [ ('starttag', 'a', [('href', "test'style='color:red;bad1'")]), - ('data', 'test - bad1'), ('endtag', 'a'), + ('data', 'test - bad1'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'+style='color:red;ba2'")]), - ('data', 'test - bad2'), ('endtag', 'a'), + ('data', 'test - bad2'), + ('endtag', 'a'), ('starttag', 'a', [('href', "test'\xa0style='color:red;bad3'")]), - ('data', 'test - bad3'), ('endtag', 'a'), - ('starttag', 'a', [('href', None), ('=', None), ("test' style", 'color:red;bad4')]), - ('data', 'test - bad4'), ('endtag', 'a') + ('data', 'test - bad3'), + ('endtag', 'a'), + ('starttag', 'a', [('href', "test'\xa0style='color:red;bad4'")]), + ('data', 'test - bad4'), + ('endtag', 'a'), ] self._run_check(html, expected) diff --git a/Misc/NEWS.d/3.14.0b4.rst b/Misc/NEWS.d/3.14.0b4.rst index b96f96caa3f280..2d01cb1cb62466 100644 --- a/Misc/NEWS.d/3.14.0b4.rst +++ b/Misc/NEWS.d/3.14.0b4.rst @@ -75,7 +75,7 @@ to the HTML5 standard. * Multiple ``=`` between attribute name and value are no longer collapsed. E.g. ```` produces attribute "foo" with value "=bar". -* Whitespaces between the ``=`` separator and attribute name or value are no +* [REVERTED] Whitespaces between the ``=`` separator and attribute name or value are no longer ignored. E.g. ```` produces two attributes "foo" and "=bar", both with value None; ```` produces two attributes: "foo" with value "" and "bar" with value None. diff --git a/Misc/NEWS.d/next/Security/2025-07-21-14-15-25.gh-issue-135661.nAxXw5.rst b/Misc/NEWS.d/next/Security/2025-07-21-14-15-25.gh-issue-135661.nAxXw5.rst new file mode 100644 index 00000000000000..533e4df8626b90 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2025-07-21-14-15-25.gh-issue-135661.nAxXw5.rst @@ -0,0 +1,2 @@ +Fix parsing attributes with whitespaces around the ``=`` separator in +:class:`html.parser.HTMLParser` according to the HTML5 standard. From 1ba5fed8c72c60ac08f6ece9e3f4e6a91615fc37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 21 Jul 2025 18:05:32 +0200 Subject: [PATCH 2/2] Update 3.14.0b4.rst --- Misc/NEWS.d/3.14.0b4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/3.14.0b4.rst b/Misc/NEWS.d/3.14.0b4.rst index 2d01cb1cb62466..349023ec75865d 100644 --- a/Misc/NEWS.d/3.14.0b4.rst +++ b/Misc/NEWS.d/3.14.0b4.rst @@ -75,7 +75,7 @@ to the HTML5 standard. * Multiple ``=`` between attribute name and value are no longer collapsed. E.g. ```` produces attribute "foo" with value "=bar". -* [REVERTED] Whitespaces between the ``=`` separator and attribute name or value are no +* [Reverted in :gh:`136927`] Whitespaces between the ``=`` separator and attribute name or value are no longer ignored. E.g. ```` produces two attributes "foo" and "=bar", both with value None; ```` produces two attributes: "foo" with value "" and "bar" with value None. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy