diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 31943f357f49f3..dff9a8ede9b601 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1044,16 +1044,24 @@ def test_telurl_params(self): self.assertEqual(p1.params, 'phone-context=+1-914-555') def test_Quoter_repr(self): - quoter = urllib.parse.Quoter(urllib.parse._ALWAYS_SAFE) + quoter = urllib.parse._Quoter(urllib.parse._ALWAYS_SAFE) self.assertIn('Quoter', repr(quoter)) + def test_clear_cache_for_code_coverage(self): + urllib.parse.clear_cache() + + def test_urllib_parse_getattr_failure(self): + """Test that urllib.parse.__getattr__() fails correctly.""" + with self.assertRaises(AttributeError): + unused = urllib.parse.this_does_not_exist + def test_all(self): expected = [] undocumented = { 'splitattr', 'splithost', 'splitnport', 'splitpasswd', 'splitport', 'splitquery', 'splittag', 'splittype', 'splituser', 'splitvalue', - 'Quoter', 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', + 'ResultBase', 'clear_cache', 'to_bytes', 'unwrap', } for name in dir(urllib.parse): if name.startswith('_') or name in undocumented: @@ -1245,6 +1253,12 @@ def test_unwrap(self): class DeprecationTest(unittest.TestCase): + def test_Quoter_deprecation(self): + with self.assertWarns(DeprecationWarning) as cm: + old_class = urllib.parse.Quoter + self.assertIs(old_class, urllib.parse._Quoter) + self.assertIn('Quoter will be removed', str(cm.warning)) + def test_splittype_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splittype('') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b35997bc00ce16..bf16d0f42e5794 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -27,10 +27,11 @@ test_urlparse.py provides a good indicator of parsing behavior. """ +from collections import namedtuple +import functools import re import sys import types -import collections import warnings __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", @@ -81,15 +82,10 @@ # Unsafe bytes to be removed per WHATWG spec _UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] -# XXX: Consider replacing with functools.lru_cache -MAX_CACHE_SIZE = 20 -_parse_cache = {} - def clear_cache(): - """Clear the parse cache and the quoters cache.""" - _parse_cache.clear() - _safe_quoters.clear() - + """Clear internal performance caches. Undocumented; some tests want it.""" + urlsplit.cache_clear() + _byte_quoter_factory.cache_clear() # Helpers for bytes handling # For 3.2, we deliberately require applications that @@ -243,8 +239,6 @@ def _hostinfo(self): return hostname, port -from collections import namedtuple - _DefragResultBase = namedtuple('DefragResult', 'url fragment') _SplitResultBase = namedtuple( 'SplitResult', 'scheme netloc path query fragment') @@ -434,6 +428,9 @@ def _checknetloc(netloc): raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") +# typed=True avoids BytesWarnings being emitted during cache key +# comparison since this API supports both bytes and str input. +@functools.lru_cache(typed=True) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: :///?# @@ -462,12 +459,6 @@ def urlsplit(url, scheme='', allow_fragments=True): scheme = scheme.replace(b, "") allow_fragments = bool(allow_fragments) - key = url, scheme, allow_fragments, type(url), type(scheme) - cached = _parse_cache.get(key, None) - if cached: - return _coerce_result(cached) - if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth - clear_cache() netloc = query = fragment = '' i = url.find(':') if i > 0: @@ -488,7 +479,6 @@ def urlsplit(url, scheme='', allow_fragments=True): url, query = url.split('?', 1) _checknetloc(netloc) v = SplitResult(scheme, netloc, url, query, fragment) - _parse_cache[key] = v return _coerce_result(v) def urlunparse(components): @@ -791,23 +781,30 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): b'0123456789' b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -_safe_quoters = {} -class Quoter(collections.defaultdict): - """A mapping from bytes (in range(0,256)) to strings. +def __getattr__(name): + if name == 'Quoter': + warnings.warn('Deprecated in 3.11. ' + 'urllib.parse.Quoter will be removed in Python 3.14. ' + 'It was not intended to be a public API.', + DeprecationWarning, stacklevel=2) + return _Quoter + raise AttributeError(f'module {__name__!r} has no attribute {name!r}') + +class _Quoter(dict): + """A mapping from bytes numbers (in range(0,256)) to strings. String values are percent-encoded byte values, unless the key < 128, and - in the "safe" set (either the specified safe set, or default set). + in either of the specified safe set, or the always safe set. """ - # Keeps a cache internally, using defaultdict, for efficiency (lookups + # Keeps a cache internally, via __missing__, for efficiency (lookups # of cached keys don't call Python code at all). def __init__(self, safe): """safe: bytes object.""" self.safe = _ALWAYS_SAFE.union(safe) def __repr__(self): - # Without this, will just display as a defaultdict - return "<%s %r>" % (self.__class__.__name__, dict(self)) + return f"" def __missing__(self, b): # Handle a cache miss. Store quoted string in cache and return. @@ -886,6 +883,11 @@ def quote_plus(string, safe='', encoding=None, errors=None): string = quote(string, safe + space, encoding, errors) return string.replace(' ', '+') +# Expectation: A typical program is unlikely to create more than 5 of these. +@functools.lru_cache +def _byte_quoter_factory(safe): + return _Quoter(safe).__getitem__ + def quote_from_bytes(bs, safe='/'): """Like quote(), but accepts a bytes object rather than a str, and does not perform string-to-bytes encoding. It always returns an ASCII string. @@ -899,13 +901,11 @@ def quote_from_bytes(bs, safe='/'): # Normalize 'safe' by converting to bytes and removing non-ASCII chars safe = safe.encode('ascii', 'ignore') else: + # List comprehensions are faster than generator expressions. safe = bytes([c for c in safe if c < 128]) if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): return bs.decode() - try: - quoter = _safe_quoters[safe] - except KeyError: - _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ + quoter = _byte_quoter_factory(safe) return ''.join([quoter(char) for char in bs]) def urlencode(query, doseq=False, safe='', encoding=None, errors=None, diff --git a/Misc/NEWS.d/next/Library/2021-05-01-15-43-37.bpo-44002.KLT_wd.rst b/Misc/NEWS.d/next/Library/2021-05-01-15-43-37.bpo-44002.KLT_wd.rst new file mode 100644 index 00000000000000..9d662d9827a91d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-05-01-15-43-37.bpo-44002.KLT_wd.rst @@ -0,0 +1,5 @@ +:mod:`urllib.parse` now uses :func:`functool.lru_cache` for its internal URL +splitting and quoting caches instead of rolling its own like its the '90s. + +The undocumented internal :mod:`urllib.parse` ``Quoted`` class API is now +deprecated, for removal in 3.14. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy