From a284d69de1d1a42714576d4a9562145a94e62127 Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Sat, 12 Nov 2022 15:43:33 -0500 Subject: [PATCH 1/2] gh-99418: Prevent urllib.parse.urlparse from accepting schemes that don't begin with an alphabetical ASCII character. --- Lib/test/test_urlparse.py | 18 ++++++++++++++++++ Lib/urllib/parse.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 59a601d9e85b08..80fb9e5cd2a445 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -668,6 +668,24 @@ def test_attributes_bad_port(self): with self.assertRaises(ValueError): p.port + def test_attributes_bad_scheme(self): + """Check handling of invalid schemes.""" + for bytes in (False, True): + for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): + for scheme in (".", "+", "-", "0", "http&", "६http"): + with self.subTest(bytes=bytes, parse=parse, scheme=scheme): + url = scheme + "://www.example.net" + if bytes: + if url.isascii(): + url = url.encode("ascii") + else: + continue + p = parse(url) + if bytes: + self.assertEqual(p.scheme, b"") + else: + self.assertEqual(p.scheme, "") + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 9a3102afd63b9c..4f6867accbc0eb 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -460,7 +460,7 @@ def urlsplit(url, scheme='', allow_fragments=True): allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') - if i > 0: + if i > 0 and url[0].isascii() and url[0].isalpha(): for c in url[:i]: if c not in scheme_chars: break From 09d30c9cff31d6a96563b609174a00d0b568fcfa Mon Sep 17 00:00:00 2001 From: Ben Kallus Date: Sat, 12 Nov 2022 15:46:31 -0500 Subject: [PATCH 2/2] gh-99418: Prevent urllib.parse.urlparse from accepting schemes that don't begin with an alphabetical ASCII character. --- .../next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst diff --git a/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst new file mode 100644 index 00000000000000..0a06e7c5c6ac48 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst @@ -0,0 +1,2 @@ +Fix bug in :func:`urllib.parse.urlparse` that causes URL schemes that begin +with a digit, a plus sign, or a minus sign to be parsed incorrectly. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy