From 31712dbb9b98b863ea382789630e4140fc3199e1 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 26 Apr 2020 17:58:54 -0400 Subject: [PATCH 1/3] bpo-39791: Update importlib.resources to support files() API (importlib_resources 1.5). --- Lib/importlib/_common.py | 72 ++++++++++++ Lib/importlib/abc.py | 86 ++++++++++++++ Lib/importlib/resources.py | 162 ++++++++++---------------- Lib/test/test_importlib/test_files.py | 39 +++++++ Lib/test/test_importlib/test_path.py | 1 + 5 files changed, 257 insertions(+), 103 deletions(-) create mode 100644 Lib/importlib/_common.py create mode 100644 Lib/test/test_importlib/test_files.py diff --git a/Lib/importlib/_common.py b/Lib/importlib/_common.py new file mode 100644 index 00000000000000..ba7cbac3c9bfda --- /dev/null +++ b/Lib/importlib/_common.py @@ -0,0 +1,72 @@ +import os +import pathlib +import zipfile +import tempfile +import functools +import contextlib + + +def from_package(package): + """ + Return a Traversable object for the given package. + + """ + spec = package.__spec__ + return from_traversable_resources(spec) or fallback_resources(spec) + + +def from_traversable_resources(spec): + """ + If the spec.loader implements TraversableResources, + directly or implicitly, it will have a ``files()`` method. + """ + with contextlib.suppress(AttributeError): + return spec.loader.files() + + +def fallback_resources(spec): + package_directory = pathlib.Path(spec.origin).parent + try: + archive_path = spec.loader.archive + rel_path = package_directory.relative_to(archive_path) + return zipfile.Path(archive_path, str(rel_path) + '/') + except Exception: + pass + return package_directory + + +@contextlib.contextmanager +def _tempfile(reader, suffix=''): + # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' + # blocks due to the need to close the temporary file to work on Windows + # properly. + fd, raw_path = tempfile.mkstemp(suffix=suffix) + try: + os.write(fd, reader()) + os.close(fd) + yield pathlib.Path(raw_path) + finally: + try: + os.remove(raw_path) + except FileNotFoundError: + pass + + +@functools.singledispatch +@contextlib.contextmanager +def as_file(path): + """ + Given a Traversable object, return that object as a + path on the local file system in a context manager. + """ + with _tempfile(path.read_bytes, suffix=path.name) as local: + yield local + + +@as_file.register(pathlib.Path) +@contextlib.contextmanager +def _(path): + """ + Degenerate behavior for pathlib.Path objects. + """ + yield path diff --git a/Lib/importlib/abc.py b/Lib/importlib/abc.py index b1b5ccce4bd35d..b8a9bb1a21ef77 100644 --- a/Lib/importlib/abc.py +++ b/Lib/importlib/abc.py @@ -14,6 +14,7 @@ _frozen_importlib_external = _bootstrap_external import abc import warnings +from typing import Protocol, runtime_checkable def _register(abstract_cls, *classes): @@ -386,3 +387,88 @@ def contents(self): _register(ResourceReader, machinery.SourceFileLoader) + + +@runtime_checkable +class Traversable(Protocol): + """ + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + """ + + @abc.abstractmethod + def iterdir(self): + """ + Yield Traversable objects in self + """ + + @abc.abstractmethod + def read_bytes(self): + """ + Read contents of self as bytes + """ + + @abc.abstractmethod + def read_text(self, encoding=None): + """ + Read contents of self as bytes + """ + + @abc.abstractmethod + def is_dir(self): + """ + Return True if self is a dir + """ + + @abc.abstractmethod + def is_file(self): + """ + Return True if self is a file + """ + + @abc.abstractmethod + def joinpath(self, child): + """ + Return Traversable child in self + """ + + @abc.abstractmethod + def __truediv__(self, child): + """ + Return Traversable child in self + """ + + @abc.abstractmethod + def open(self, mode='r', *args, **kwargs): + """ + mode may be 'r' or 'rb' to open as text or binary. Return a handle + suitable for reading (same as pathlib.Path.open). + + When opening as text, accepts encoding parameters such as those + accepted by io.TextIOWrapper. + """ + + @abc.abstractproperty + def name(self): + # type: () -> str + """ + The base name of this object without any parent references. + """ + + +class TraversableResources(ResourceReader): + @abc.abstractmethod + def files(self): + """Return a Traversable object for the loaded package.""" + + def open_resource(self, resource): + return self.files().joinpath(resource).open('rb') + + def resource_path(self, resource): + raise FileNotFoundError(resource) + + def is_resource(self, path): + return self.files().joinpath(path).isfile() + + def contents(self): + return (item.name for item in self.files().iterdir()) diff --git a/Lib/importlib/resources.py b/Lib/importlib/resources.py index fc3a1c9cabe636..b803a01c91d652 100644 --- a/Lib/importlib/resources.py +++ b/Lib/importlib/resources.py @@ -1,23 +1,25 @@ import os -import tempfile from . import abc as resources_abc +from . import _common +from ._common import as_file from contextlib import contextmanager, suppress from importlib import import_module from importlib.abc import ResourceLoader from io import BytesIO, TextIOWrapper from pathlib import Path from types import ModuleType -from typing import Iterable, Iterator, Optional, Set, Union # noqa: F401 +from typing import ContextManager, Iterable, Optional, Union from typing import cast from typing.io import BinaryIO, TextIO -from zipimport import ZipImportError __all__ = [ 'Package', 'Resource', + 'as_file', 'contents', + 'files', 'is_resource', 'open_binary', 'open_text', @@ -31,24 +33,23 @@ Resource = Union[str, os.PathLike] +def _resolve(name) -> ModuleType: + """If name is a string, resolve to a module.""" + if hasattr(name, '__spec__'): + return name + return import_module(name) + + def _get_package(package) -> ModuleType: """Take a package name or module object and return the module. - If a name, the module is imported. If the passed or imported module + If a name, the module is imported. If the resolved module object is not a package, raise an exception. """ - if hasattr(package, '__spec__'): - if package.__spec__.submodule_search_locations is None: - raise TypeError('{!r} is not a package'.format( - package.__spec__.name)) - else: - return package - else: - module = import_module(package) - if module.__spec__.submodule_search_locations is None: - raise TypeError('{!r} is not a package'.format(package)) - else: - return module + module = _resolve(package) + if module.__spec__.submodule_search_locations is None: + raise TypeError('{!r} is not a package'.format(package)) + return module def _normalize_path(path) -> str: @@ -59,8 +60,7 @@ def _normalize_path(path) -> str: parent, file_name = os.path.split(path) if parent: raise ValueError('{!r} must be only a file name'.format(path)) - else: - return file_name + return file_name def _get_resource_reader( @@ -89,8 +89,8 @@ def open_binary(package: Package, resource: Resource) -> BinaryIO: reader = _get_resource_reader(package) if reader is not None: return reader.open_resource(resource) - _check_location(package) - absolute_package_path = os.path.abspath(package.__spec__.origin) + absolute_package_path = os.path.abspath( + package.__spec__.origin or 'non-existent file') package_path = os.path.dirname(absolute_package_path) full_path = os.path.join(package_path, resource) try: @@ -109,8 +109,7 @@ def open_binary(package: Package, resource: Resource) -> BinaryIO: message = '{!r} resource not found in {!r}'.format( resource, package_name) raise FileNotFoundError(message) - else: - return BytesIO(data) + return BytesIO(data) def open_text(package: Package, @@ -118,39 +117,12 @@ def open_text(package: Package, encoding: str = 'utf-8', errors: str = 'strict') -> TextIO: """Return a file-like object opened for text reading of the resource.""" - resource = _normalize_path(resource) - package = _get_package(package) - reader = _get_resource_reader(package) - if reader is not None: - return TextIOWrapper(reader.open_resource(resource), encoding, errors) - _check_location(package) - absolute_package_path = os.path.abspath(package.__spec__.origin) - package_path = os.path.dirname(absolute_package_path) - full_path = os.path.join(package_path, resource) - try: - return open(full_path, mode='r', encoding=encoding, errors=errors) - except OSError: - # Just assume the loader is a resource loader; all the relevant - # importlib.machinery loaders are and an AttributeError for - # get_data() will make it clear what is needed from the loader. - loader = cast(ResourceLoader, package.__spec__.loader) - data = None - if hasattr(package.__spec__.loader, 'get_data'): - with suppress(OSError): - data = loader.get_data(full_path) - if data is None: - package_name = package.__spec__.name - message = '{!r} resource not found in {!r}'.format( - resource, package_name) - raise FileNotFoundError(message) - else: - return TextIOWrapper(BytesIO(data), encoding, errors) + return TextIOWrapper( + open_binary(package, resource), encoding=encoding, errors=errors) def read_binary(package: Package, resource: Resource) -> bytes: """Return the binary contents of the resource.""" - resource = _normalize_path(resource) - package = _get_package(package) with open_binary(package, resource) as fp: return fp.read() @@ -164,14 +136,20 @@ def read_text(package: Package, The decoding-related arguments have the same semantics as those of bytes.decode(). """ - resource = _normalize_path(resource) - package = _get_package(package) with open_text(package, resource, encoding, errors) as fp: return fp.read() -@contextmanager -def path(package: Package, resource: Resource) -> Iterator[Path]: +def files(package: Package) -> resources_abc.Traversable: + """ + Get a Traversable resource from a package + """ + return _common.from_package(_get_package(package)) + + +def path( + package: Package, resource: Resource, + ) -> 'ContextManager[Path]': """A context manager providing a file path object to the resource. If the resource does not already exist on its own on the file system, @@ -180,39 +158,23 @@ def path(package: Package, resource: Resource) -> Iterator[Path]: raised if the file was deleted prior to the context manager exiting). """ - resource = _normalize_path(resource) - package = _get_package(package) - reader = _get_resource_reader(package) - if reader is not None: - try: - yield Path(reader.resource_path(resource)) - return - except FileNotFoundError: - pass - else: - _check_location(package) - # Fall-through for both the lack of resource_path() *and* if - # resource_path() raises FileNotFoundError. - package_directory = Path(package.__spec__.origin).parent - file_path = package_directory / resource - if file_path.exists(): - yield file_path - else: - with open_binary(package, resource) as fp: - data = fp.read() - # Not using tempfile.NamedTemporaryFile as it leads to deeper 'try' - # blocks due to the need to close the temporary file to work on - # Windows properly. - fd, raw_path = tempfile.mkstemp() - try: - os.write(fd, data) - os.close(fd) - yield Path(raw_path) - finally: - try: - os.remove(raw_path) - except FileNotFoundError: - pass + reader = _get_resource_reader(_get_package(package)) + return ( + _path_from_reader(reader, resource) + if reader else + _common.as_file(files(package).joinpath(_normalize_path(resource))) + ) + + +@contextmanager +def _path_from_reader(reader, resource): + norm_resource = _normalize_path(resource) + with suppress(FileNotFoundError): + yield Path(reader.resource_path(norm_resource)) + return + opener_reader = reader.open_resource(norm_resource) + with _common._tempfile(opener_reader.read, suffix=norm_resource) as res: + yield res def is_resource(package: Package, name: str) -> bool: @@ -225,17 +187,10 @@ def is_resource(package: Package, name: str) -> bool: reader = _get_resource_reader(package) if reader is not None: return reader.is_resource(name) - try: - package_contents = set(contents(package)) - except (NotADirectoryError, FileNotFoundError): - return False + package_contents = set(contents(package)) if name not in package_contents: return False - # Just because the given file_name lives as an entry in the package's - # contents doesn't necessarily mean it's a resource. Directories are not - # resources, so let's try to find out if it's a directory or not. - path = Path(package.__spec__.origin).parent / name - return path.is_file() + return (_common.from_package(package) / name).is_file() def contents(package: Package) -> Iterable[str]: @@ -250,10 +205,11 @@ def contents(package: Package) -> Iterable[str]: if reader is not None: return reader.contents() # Is the package a namespace package? By definition, namespace packages - # cannot have resources. We could use _check_location() and catch the - # exception, but that's extra work, so just inline the check. - elif package.__spec__.origin is None or not package.__spec__.has_location: + # cannot have resources. + namespace = ( + package.__spec__.origin is None or + package.__spec__.origin == 'namespace' + ) + if namespace or not package.__spec__.has_location: return () - else: - package_directory = Path(package.__spec__.origin).parent - return os.listdir(package_directory) + return list(item.name for item in _common.from_package(package).iterdir()) diff --git a/Lib/test/test_importlib/test_files.py b/Lib/test/test_importlib/test_files.py new file mode 100644 index 00000000000000..fa7af82bf0c28b --- /dev/null +++ b/Lib/test/test_importlib/test_files.py @@ -0,0 +1,39 @@ +import typing +import unittest + +from importlib import resources +from importlib.abc import Traversable +from . import data01 +from . import util + + +class FilesTests: + def test_read_bytes(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_bytes() + assert actual == b'Hello, UTF-8 world!\n' + + def test_read_text(self): + files = resources.files(self.data) + actual = files.joinpath('utf-8.file').read_text() + assert actual == 'Hello, UTF-8 world!\n' + + @unittest.skipUnless( + hasattr(typing, 'runtime_checkable'), + "Only suitable when typing supports runtime_checkable", + ) + def test_traversable(self): + assert isinstance(resources.files(self.data), Traversable) + + +class OpenDiskTests(FilesTests, unittest.TestCase): + def setUp(self): + self.data = data01 + + +class OpenZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + pass + + +if __name__ == '__main__': + unittest.main() diff --git a/Lib/test/test_importlib/test_path.py b/Lib/test/test_importlib/test_path.py index 2d3dcda7ed2e79..c4e7285411322c 100644 --- a/Lib/test/test_importlib/test_path.py +++ b/Lib/test/test_importlib/test_path.py @@ -17,6 +17,7 @@ def test_reading(self): # Test also implicitly verifies the returned object is a pathlib.Path # instance. with resources.path(self.data, 'utf-8.file') as path: + self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) # pathlib.Path.read_text() was introduced in Python 3.5. with path.open('r', encoding='utf-8') as file: text = file.read() From edf023c548d35fb5203a70936cad4167fcf9a99b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 27 Apr 2020 00:51:44 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst diff --git a/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst b/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst new file mode 100644 index 00000000000000..237bcf7f99b0f0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-04-27-00-51-40.bpo-39791.wv8Dxn.rst @@ -0,0 +1 @@ +Added ``files()`` function to importlib.resources with support for subdirectories in package data, matching backport in importlib_resources 1.5. \ No newline at end of file From 49a116e83ece441397ba0498e4063769f6aaf32f Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 27 Apr 2020 21:02:44 -0400 Subject: [PATCH 3/3] Add some documentation about the new objects added. --- Doc/library/importlib.rst | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Doc/library/importlib.rst b/Doc/library/importlib.rst index a612b1e1455a0c..99bfeacbbc7407 100644 --- a/Doc/library/importlib.rst +++ b/Doc/library/importlib.rst @@ -480,6 +480,8 @@ ABC hierarchy:: .. class:: ResourceReader + *Superseded by TraversableReader* + An :term:`abstract base class` to provide the ability to read *resources*. @@ -795,6 +797,28 @@ ABC hierarchy:: itself does not end in ``__init__``. +.. class:: Traversable + + An object with a subset of pathlib.Path methods suitable for + traversing directories and opening files. + + .. versionadded:: 3.9 + + +.. class:: TraversableReader + + An abstract base class for resource readers capable of serving + the ``files`` interface. Subclasses ResourceReader and provides + concrete implementations of the ResourceReader's abstract + methods. Therefore, any loader supplying TraversableReader + also supplies ResourceReader. + + Loaders that wish to support resource reading are expected to + implement this interface. + + .. versionadded:: 3.9 + + :mod:`importlib.resources` -- Resources --------------------------------------- @@ -853,6 +877,19 @@ The following types are defined. The following functions are available. + +.. function:: files(package) + + Returns an :class:`importlib.resources.abc.Traversable` object + representing the resource container for the package (think directory) + and its resources (think files). A Traversable may contain other + containers (think subdirectories). + + *package* is either a name or a module object which conforms to the + ``Package`` requirements. + + .. versionadded:: 3.9 + .. function:: open_binary(package, resource) Open for binary reading the *resource* within *package*. pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy