diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 59a601d9e85b08..80fb9e5cd2a445 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -668,6 +668,24 @@ def test_attributes_bad_port(self): with self.assertRaises(ValueError): p.port + def test_attributes_bad_scheme(self): + """Check handling of invalid schemes.""" + for bytes in (False, True): + for parse in (urllib.parse.urlsplit, urllib.parse.urlparse): + for scheme in (".", "+", "-", "0", "http&", "६http"): + with self.subTest(bytes=bytes, parse=parse, scheme=scheme): + url = scheme + "://www.example.net" + if bytes: + if url.isascii(): + url = url.encode("ascii") + else: + continue + p = parse(url) + if bytes: + self.assertEqual(p.scheme, b"") + else: + self.assertEqual(p.scheme, "") + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 9a3102afd63b9c..4f6867accbc0eb 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -460,7 +460,7 @@ def urlsplit(url, scheme='', allow_fragments=True): allow_fragments = bool(allow_fragments) netloc = query = fragment = '' i = url.find(':') - if i > 0: + if i > 0 and url[0].isascii() and url[0].isalpha(): for c in url[:i]: if c not in scheme_chars: break diff --git a/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst new file mode 100644 index 00000000000000..0a06e7c5c6ac48 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-12-15-45-51.gh-issue-99418.FxfAXS.rst @@ -0,0 +1,2 @@ +Fix bug in :func:`urllib.parse.urlparse` that causes URL schemes that begin +with a digit, a plus sign, or a minus sign to be parsed incorrectly.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: