From fb02be2d0fdf30f2a1c17bf0c6d37c461c9fef19 Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Sun, 27 Jul 2025 14:45:38 +0000 Subject: [PATCH 1/6] [PATCH] urllib.parse: Restrict IPvFuture regex to RFC 3986-valid characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IPvFuture hostnames in URLs were being matched using a too-permissive regex (`.+`), which allowed invalid characters not defined by RFC 3986. This patch updates the pattern to only accept characters explicitly allowed by the RFC for IPvFuture addresses. According to RFC 3986 ยง3.2.2, the format of IPvFuture is: "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) Where: - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" Before the fix: >>> import urllib.parse >>> urllib.parse.urlparse("http://[v45.test|test]/path") ParseResult(scheme='http', netloc='[v45.test|test]', path='/path', ...) Invalid characters such as `|` were incorrectly accepted. After the fix: >>> import urllib.parse >>> urllib.parse.urlparse("http://[v45.test|test]/path") Traceback (most recent call last): ... ValueError: IPvFuture address is invalid This improves standards compliance and prevents malformed URLs from being silently accepted. --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 67d9bbea0d3150..316cee29bfa596 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -460,7 +460,7 @@ def _check_bracketed_netloc(netloc): # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ def _check_bracketed_host(hostname): if hostname.startswith('v'): - if not re.match(r"\Av[a-fA-F0-9]+\..+\z", hostname): + if not re.match(r"\Av[a-fA-F0-9]+\.[\w\.~!$&*+,;=:'()-]+\z", hostname): raise ValueError(f"IPvFuture address is invalid") else: ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 From 9790b029c6f51ef4e387791172cad2aec298dac6 Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Sun, 27 Jul 2025 15:22:13 +0000 Subject: [PATCH 2/6] Add blurb entries for IPvFuture validation fixes --- .../next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst new file mode 100644 index 00000000000000..f2ae232580b61b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst @@ -0,0 +1 @@ +Fix overly permissive validation of IPvFuture hostnames in `urllib.parse`, in compliance with RFC 3986. Invalid characters are now correctly rejected. From 9864ce14c59a17e26e8a76de7ad05e4ce643ed82 Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:25:18 +0000 Subject: [PATCH 3/6] gh-137146: Fix unicode characters in PIvFuture --- Lib/urllib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 316cee29bfa596..1ff5fa419ed421 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -460,7 +460,7 @@ def _check_bracketed_netloc(netloc): # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ def _check_bracketed_host(hostname): if hostname.startswith('v'): - if not re.match(r"\Av[a-fA-F0-9]+\.[\w\.~!$&*+,;=:'()-]+\z", hostname): + if not re.match(r"\Av[a-fA-F0-9]+\.[\w\.~!$&*+,;=:'()-]+\z", hostname, flags=re.ASCII): raise ValueError(f"IPvFuture address is invalid") else: ip = ipaddress.ip_address(hostname) # Throws Value Error if not IPv6 or IPv4 From 0f0cc9ba24c8fb7fb89265ba534f4bad173f3f00 Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:27:19 +0000 Subject: [PATCH 4/6] gh-137146: Add tests on IPvFuture --- Lib/test/test_urlparse.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index b2bde5a9b1d696..59f47739911c25 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -3,6 +3,7 @@ import unittest import urllib.parse from test import support +from strings import ascii_letters, digits RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q" @@ -1419,6 +1420,17 @@ def test_invalid_bracketed_hosts(self): self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip') self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + unreserved = ascii_letters + digits + "-" + "." + "_" + "~" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" + sub_delims = "!" + "$" + "&" + "'" + "(" + ")" + "*" + "+" + "," + ";" + "=" + ipvfuture_authorized_characters = unreserved + sub_delims + ":" + removed_characters = "\t\n\r" + for character in range(256): + character = chr(character) + if character in ipvfuture_authorized_characters or character in removed_characters: + continue + self.assertRaises(ValueError, urllib.parse.urlsplit, f'scheme://[v7.invalid{character}invalid]/') def test_splitting_bracketed_hosts(self): p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query') From 5021430693d15d55ba65a6d209697b59a341bf05 Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Mon, 28 Jul 2025 20:30:40 +0000 Subject: [PATCH 5/6] Fix: reStructuredText language syntax --- .../next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst index f2ae232580b61b..14e057ec1efdb1 100644 --- a/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst +++ b/Misc/NEWS.d/next/Library/2025-07-27-15-17-43.gh-issue-137146.4QsMAT.rst @@ -1 +1 @@ -Fix overly permissive validation of IPvFuture hostnames in `urllib.parse`, in compliance with RFC 3986. Invalid characters are now correctly rejected. +Fix overly permissive validation of IPvFuture hostnames in :func:`urllib.parse.urlparse`, in compliance with RFC 3986. Invalid characters are now correctly rejected. From d528f19f63ce4c860b144370cc61858e27d3a39a Mon Sep 17 00:00:00 2001 From: Maurice Lambert <50479118+mauricelambert@users.noreply.github.com> Date: Mon, 28 Jul 2025 21:19:41 +0000 Subject: [PATCH 6/6] gh-137146: Fix tests on IPvFuture --- Lib/test/test_urlparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 59f47739911c25..950290ada38b3b 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -3,7 +3,7 @@ import unittest import urllib.parse from test import support -from strings import ascii_letters, digits +from string import ascii_letters, digits RFC1808_BASE = "http://a/b/c/d;p?q#f" RFC2396_BASE = "http://a/b/c/d;p?q"
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: