From 4b29e2e81d094aa902ab145d79ef9f97d0499df1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 23 Jun 2023 21:46:48 +0100 Subject: [PATCH 01/31] Add `pathlib._VirtualPath` --- Lib/pathlib.py | 405 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 320 insertions(+), 85 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e15718dc98d677..d05c499eb085c2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -5,6 +5,7 @@ operating systems. """ +import contextlib import fnmatch import functools import io @@ -15,10 +16,19 @@ import sys import warnings from _collections_abc import Sequence -from errno import ENOENT, ENOTDIR, EBADF, ELOOP +from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes +try: + import pwd +except ImportError: + pwd = None +try: + import grp +except ImportError: + grp = None + __all__ = [ "UnsupportedOperation", @@ -771,23 +781,20 @@ class PureWindowsPath(PurePath): # Filesystem-accessing classes -class Path(PurePath): - """PurePath subclass that can make system calls. - - Path represents a filesystem path but unlike PurePath, also offers - methods to do system calls on path objects. Depending on your system, - instantiating a Path will return either a PosixPath or a WindowsPath - object. You can also instantiate a PosixPath or WindowsPath directly, - but cannot instantiate a WindowsPath on a POSIX system or vice versa. +class _VirtualPath(PurePath): + """PurePath subclass for virtual filesystems, such as archives and remote + storage. """ __slots__ = () + __bytes__ = None + __fspath__ = None def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - return os.stat(self, follow_symlinks=follow_symlinks) + raise UnsupportedOperation(f"{type(self).__name__}.stat()") def lstat(self): """ @@ -854,7 +861,21 @@ def is_mount(self): """ Check if this path is a mount point """ - return self._flavour.ismount(self) + # Need to exist and be a dir + if not self.exists() or not self.is_dir(): + return False + + try: + parent_dev = self.parent.stat().st_dev + except OSError: + return False + + dev = self.stat().st_dev + if dev != parent_dev: + return True + ino = self.stat().st_ino + parent_ino = self.parent.stat().st_ino + return ino == parent_ino def is_symlink(self): """ @@ -875,7 +896,15 @@ def is_junction(self): """ Whether this path is a junction. """ - return self._flavour.isjunction(self) + import stat + try: + return self.lstat().st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT + except OSError as e: + if not _ignore_error(e): + raise + return False + except (ValueError, AttributeError): + return False def is_block_device(self): """ @@ -958,9 +987,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - if "b" not in mode: - encoding = io.text_encoding(encoding) - return io.open(self, mode, buffering, encoding, errors, newline) + raise UnsupportedOperation(f"{type(self).__name__}.open()") def read_bytes(self): """ @@ -1003,14 +1030,10 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - for name in os.listdir(self): - yield self._make_child_relpath(name) + raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") def _scandir(self): - # bpo-24132: a future version of pathlib will support subclassing of - # pathlib.Path to customize how the filesystem is accessed. This - # includes scandir(), which is used to implement glob(). - return os.scandir(self) + return contextlib.nullcontext(list(self.iterdir())) def _make_child_relpath(self, name): sep = self._flavour.sep @@ -1134,13 +1157,13 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): # blow up for a minor reason when (say) a thousand readable # directories are still left to visit. That logic is copied here. try: - scandir_it = path._scandir() + scandir_obj = path._scandir() except OSError as error: if on_error is not None: on_error(error) continue - with scandir_it: + with scandir_obj as scandir_it: dirnames = [] filenames = [] for entry in scandir_it: @@ -1162,6 +1185,210 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + def absolute(self): + """Return an absolute version of this path by prepending the current + working directory. No normalization or symlink resolution is performed. + + Use resolve() to get the canonical path to a file. + """ + raise UnsupportedOperation(f"{type(self).__name__}.absolute()") + + @classmethod + def cwd(cls): + """Return a new path pointing to the current working directory.""" + return cls().absolute() + + def expanduser(self): + """ Return a new path with expanded ~ and ~user constructs + (as returned by os.path.expanduser) + """ + raise UnsupportedOperation(f"{type(self).__name__}.expanduser()") + + @classmethod + def home(cls): + """Return a new path pointing to the user's home directory (as + returned by os.path.expanduser('~')). + """ + return cls("~").expanduser() + + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + raise UnsupportedOperation(f"{type(self).__name__}.readlink()") + + def resolve(self, strict=False): + """ + Resolve '..' segments in the path. Where possible, make the path + absolute and resolve symlinks on the way. + """ + try: + path = self.absolute() + tail_idx = len(path._tail) - len(self._tail) + except UnsupportedOperation: + path = self + tail_idx = 0 + if not path._tail: + return path + drv = path.drive + root = path.root + tail = list(path._tail) + dirty = False + link_count = 0 + readlink_supported = True + while tail_idx < len(tail): + if tail[tail_idx] == '..': + if tail_idx == 0: + if root: + # Delete '..' part immediately following root. + del tail[tail_idx] + dirty = True + continue + elif tail[tail_idx - 1] != '..': + # Delete '..' part and its predecessor. + tail_idx -= 1 + del tail[tail_idx:tail_idx + 2] + dirty = True + continue + elif readlink_supported: + link = self._from_parsed_parts(drv, root, tail[:tail_idx + 1]) + try: + link_target = link.readlink() + except UnsupportedOperation: + readlink_supported = False + except OSError as e: + if e.errno != EINVAL: + if strict: + raise + else: + break + else: + link_count += 1 + if link_count >= 40: + raise OSError(ELOOP, "Symlink loop", path) + elif link_target.root or link_target.drive: + link_target = link.parent / link_target + drv = link_target.drive + root = link_target.root + tail[:tail_idx + 1] = link_target._tail + tail_idx = 0 + else: + tail[tail_idx:tail_idx + 1] = link_target._tail + dirty = True + continue + tail_idx += 1 + if dirty: + path = self._from_parsed_parts(drv, root, tail) + return path + + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + raise UnsupportedOperation(f"{type(self).__name__}.symlink_to()") + + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + raise UnsupportedOperation(f"{type(self).__name__}.hardlink_to()") + + def touch(self, mode=0o666, exist_ok=True): + """ + Create this file with the given access mode, if it doesn't exist. + """ + raise UnsupportedOperation(f"{type(self).__name__}.touch()") + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """ + Create a new directory at this given path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.mkdir()") + + def rename(self, target): + """ + Rename this path to the target path. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.rename()") + + def replace(self, target): + """ + Rename this path to the target path, overwriting if that path exists. + + The target path may be absolute or relative. Relative paths are + interpreted relative to the current working directory, *not* the + directory of the Path object. + + Returns the new Path instance pointing to the target path. + """ + raise UnsupportedOperation(f"{type(self).__name__}.replace()") + + def chmod(self, mode, *, follow_symlinks=True): + """ + Change the permissions of the path, like os.chmod(). + """ + raise UnsupportedOperation(f"{type(self).__name__}.chmod()") + + def lchmod(self, mode): + """ + Like chmod(), except if the path points to a symlink, the symlink's + permissions are changed, rather than its target's. + """ + self.chmod(mode, follow_symlinks=False) + + def unlink(self, missing_ok=False): + """ + Remove this file or link. + If the path is a directory, use rmdir() instead. + """ + raise UnsupportedOperation(f"{type(self).__name__}.unlink()") + + def rmdir(self): + """ + Remove this directory. The directory must be empty. + """ + raise UnsupportedOperation(f"{type(self).__name__}.rmdir()") + + def owner(self): + """ + Return the login name of the file owner. + """ + raise UnsupportedOperation(f"{type(self).__name__}.owner()") + + def group(self): + """ + Return the group name of the file gid. + """ + raise UnsupportedOperation(f"{type(self).__name__}.group()") + + def as_uri(self): + """Return the path as a URI.""" + raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") + + +class Path(_VirtualPath): + """PurePath subclass that can make system calls. + + Path represents a filesystem path but unlike PurePath, also offers + methods to do system calls on path objects. Depending on your system, + instantiating a Path will return either a PosixPath or a WindowsPath + object. You can also instantiate a PosixPath or WindowsPath directly, + but cannot instantiate a WindowsPath on a POSIX system or vice versa. + """ + __slots__ = () + __bytes__ = PurePath.__bytes__ + __fspath__ = PurePath.__fspath__ + as_uri = PurePath.as_uri + def __init__(self, *args, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " @@ -1174,21 +1401,46 @@ def __new__(cls, *args, **kwargs): cls = WindowsPath if os.name == 'nt' else PosixPath return object.__new__(cls) - @classmethod - def cwd(cls): - """Return a new path pointing to the current working directory.""" - # We call 'absolute()' rather than using 'os.getcwd()' directly to - # enable users to replace the implementation of 'absolute()' in a - # subclass and benefit from the new behaviour here. This works because - # os.path.abspath('.') == os.getcwd(). - return cls().absolute() + def stat(self, *, follow_symlinks=True): + """ + Return the result of the stat() system call on this path, like + os.stat() does. + """ + return os.stat(self, follow_symlinks=follow_symlinks) - @classmethod - def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). + def is_mount(self): """ - return cls("~").expanduser() + Check if this path is a mount point + """ + return self._flavour.ismount(self) + + def is_junction(self): + """ + Whether this path is a junction. + """ + return self._flavour.isjunction(self) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + """ + Open the file pointed by this path and return a file object, as + the built-in open() function does. + """ + if "b" not in mode: + encoding = io.text_encoding(encoding) + return io.open(self, mode, buffering, encoding, errors, newline) + + def iterdir(self): + """Yield path objects of the directory contents. + + The children are yielded in arbitrary order, and the + special entries '.' and '..' are not included. + """ + for name in os.listdir(self): + yield self._make_child_relpath(name) + + def _scandir(self): + return os.scandir(self) def absolute(self): """Return an absolute version of this path by prepending the current @@ -1241,34 +1493,26 @@ def check_eloop(e): check_eloop(e) return p - def owner(self): - """ - Return the login name of the file owner. - """ - try: - import pwd + if pwd: + def owner(self): + """ + Return the login name of the file owner. + """ return pwd.getpwuid(self.stat().st_uid).pw_name - except ImportError: - raise UnsupportedOperation("Path.owner() is unsupported on this system") - - def group(self): - """ - Return the group name of the file gid. - """ - try: - import grp + if grp: + def group(self): + """ + Return the group name of the file gid. + """ return grp.getgrgid(self.stat().st_gid).gr_name - except ImportError: - raise UnsupportedOperation("Path.group() is unsupported on this system") - def readlink(self): - """ - Return the path to which the symbolic link points. - """ - if not hasattr(os, "readlink"): - raise UnsupportedOperation("os.readlink() not available on this system") - return self.with_segments(os.readlink(self)) + if hasattr(os, "readlink"): + def readlink(self): + """ + Return the path to which the symbolic link points. + """ + return self.with_segments(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1315,13 +1559,6 @@ def chmod(self, mode, *, follow_symlinks=True): """ os.chmod(self, mode, follow_symlinks=follow_symlinks) - def lchmod(self, mode): - """ - Like chmod(), except if the path points to a symlink, the symlink's - permissions are changed, rather than its target's. - """ - self.chmod(mode, follow_symlinks=False) - def unlink(self, missing_ok=False): """ Remove this file or link. @@ -1365,24 +1602,22 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) - def symlink_to(self, target, target_is_directory=False): - """ - Make this path a symlink pointing to the target path. - Note the order of arguments (link, target) is the reverse of os.symlink. - """ - if not hasattr(os, "symlink"): - raise UnsupportedOperation("os.symlink() not available on this system") - os.symlink(target, self, target_is_directory) - - def hardlink_to(self, target): - """ - Make this path a hard link pointing to the same file as *target*. - - Note the order of arguments (self, target) is the reverse of os.link's. - """ - if not hasattr(os, "link"): - raise UnsupportedOperation("os.link() not available on this system") - os.link(target, self) + if hasattr(os, "symlink"): + def symlink_to(self, target, target_is_directory=False): + """ + Make this path a symlink pointing to the target path. + Note the order of arguments (link, target) is the reverse of os.symlink. + """ + os.symlink(target, self, target_is_directory) + + if hasattr(os, "link"): + def hardlink_to(self, target): + """ + Make this path a hard link pointing to the same file as *target*. + + Note the order of arguments (self, target) is the reverse of os.link's. + """ + os.link(target, self) def expanduser(self): """ Return a new path with expanded ~ and ~user constructs From 8ce0139454b9a4937cc0c9a0b64bf971c642a38b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Jul 2023 19:42:40 +0100 Subject: [PATCH 02/31] Add tests for `pathlib._VirtualPath` --- Lib/test/test_pathlib.py | 324 ++++++++++++++++++++++++++++++++++----- 1 file changed, 282 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 464a835212d472..5d3750fd152a7d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1566,14 +1566,165 @@ def test_group(self): # -# Tests for the concrete classes. +# Tests for the virtual classes. # -class PathTest(unittest.TestCase): - """Tests for the FS-accessing functionalities of the Path classes.""" +class VirtualPathTest(PurePathTest): + cls = pathlib._VirtualPath - cls = pathlib.Path - can_symlink = os_helper.can_symlink() + def test_unsupported_operation(self): + P = self.cls + p = self.cls() + e = pathlib.UnsupportedOperation + self.assertRaises(e, p.stat) + self.assertRaises(e, p.lstat) + self.assertRaises(e, p.exists) + self.assertRaises(e, p.samefile, 'foo') + self.assertRaises(e, p.is_dir) + self.assertRaises(e, p.is_file) + self.assertRaises(e, p.is_mount) + self.assertRaises(e, p.is_symlink) + self.assertRaises(e, p.is_block_device) + self.assertRaises(e, p.is_char_device) + self.assertRaises(e, p.is_fifo) + self.assertRaises(e, p.is_socket) + self.assertRaises(e, p.is_junction) + self.assertRaises(e, p.open) + self.assertRaises(e, p.read_bytes) + self.assertRaises(e, p.read_text) + self.assertRaises(e, p.write_bytes, b'foo') + self.assertRaises(e, p.write_text, 'foo') + self.assertRaises(e, p.iterdir) + self.assertRaises(e, p.glob, '*') + self.assertRaises(e, p.rglob, '*') + self.assertRaises(e, lambda: list(p.walk())) + self.assertRaises(e, p.absolute) + self.assertRaises(e, P.cwd) + self.assertRaises(e, p.expanduser) + self.assertRaises(e, p.home) + self.assertRaises(e, p.readlink) + self.assertRaises(e, p.symlink_to, 'foo') + self.assertRaises(e, p.hardlink_to, 'foo') + self.assertRaises(e, p.mkdir) + self.assertRaises(e, p.touch) + self.assertRaises(e, p.rename, 'foo') + self.assertRaises(e, p.replace, 'foo') + self.assertRaises(e, p.chmod, 0o755) + self.assertRaises(e, p.lchmod, 0o755) + self.assertRaises(e, p.unlink) + self.assertRaises(e, p.rmdir) + self.assertRaises(e, p.owner) + self.assertRaises(e, p.group) + self.assertRaises(e, p.as_uri) + + def test_as_uri_common(self): + e = pathlib.UnsupportedOperation + self.assertRaises(e, self.cls().as_uri) + + def test_fspath_common(self): + self.assertRaises(TypeError, os.fspath, self.cls()) + + def test_as_bytes_common(self): + self.assertRaises(TypeError, bytes, self.cls()) + + +class DummyVirtualPathIO(io.BytesIO): + """ + Used by DummyVirtualPath to implement `open('w')` + """ + + def __init__(self, files, path): + super().__init__() + self.files = files + self.path = path + + def close(self): + self.files[self.path] = self.getvalue() + super().close() + + +class DummyVirtualPath(pathlib._VirtualPath): + """ + Simple implementation of VirtualPath that keeps files and directories in + memory. + """ + _files = {} + _directories = {} + _symlinks = {} + + def stat(self, *, follow_symlinks=True): + if follow_symlinks: + path = str(self.resolve()) + else: + path = str(self.parent.resolve() / self.name) + if path in self._files: + st_mode = stat.S_IFREG + elif path in self._directories: + st_mode = stat.S_IFDIR + elif path in self._symlinks: + st_mode = stat.S_IFLNK + else: + raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) + return os.stat_result((st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0)) + + def open(self, mode='r', buffering=-1, encoding=None, + errors=None, newline=None): + if buffering != -1: + raise NotImplementedError + path_obj = self.resolve() + path = str(path_obj) + name = path_obj.name + parent = str(path_obj.parent) + if path in self._directories: + raise IsADirectoryError(errno.EISDIR, "Is a directory", path) + + text = 'b' not in mode + mode = ''.join(c for c in mode if c not in 'btU') + if mode == 'r': + if path not in self._files: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + stream = io.BytesIO(self._files[path]) + elif mode == 'w': + if parent not in self._directories: + raise FileNotFoundError(errno.ENOENT, "File not found", parent) + stream = DummyVirtualPathIO(self._files, path) + self._files[path] = b'' + self._directories[parent].add(name) + else: + raise NotImplementedError + if text: + stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) + return stream + + def iterdir(self): + path = str(self.resolve()) + if path in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path in self._directories: + for name in self._directories[path]: + yield self / name + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + try: + self._directories[str(self.parent)].add(self.name) + self._directories[str(self)] = set() + except KeyError: + if not parents or self.parent == self: + raise FileNotFoundError(errno.ENOENT, "File not found", str(self.parent)) from None + self.parent.mkdir(parents=True, exist_ok=True) + self.mkdir(mode, parents=False, exist_ok=exist_ok) + except FileExistsError: + if not exist_ok: + raise + + +class DummyVirtualPathTest(unittest.TestCase): + """Tests for VirtualPath methods that use stat(), open() and iterdir().""" + + cls = DummyVirtualPath + can_symlink = False # (BASE) # | @@ -1596,37 +1747,37 @@ class PathTest(unittest.TestCase): # def setUp(self): - def cleanup(): - os.chmod(join('dirE'), 0o777) - os_helper.rmtree(BASE) - self.addCleanup(cleanup) - os.mkdir(BASE) - os.mkdir(join('dirA')) - os.mkdir(join('dirB')) - os.mkdir(join('dirC')) - os.mkdir(join('dirC', 'dirD')) - os.mkdir(join('dirE')) - with open(join('fileA'), 'wb') as f: - f.write(b"this is file A\n") - with open(join('dirB', 'fileB'), 'wb') as f: - f.write(b"this is file B\n") - with open(join('dirC', 'fileC'), 'wb') as f: - f.write(b"this is file C\n") - with open(join('dirC', 'novel.txt'), 'wb') as f: - f.write(b"this is a novel\n") - with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: - f.write(b"this is file D\n") - os.chmod(join('dirE'), 0) - if self.can_symlink: - # Relative symlinks. - os.symlink('fileA', join('linkA')) - os.symlink('non-existing', join('brokenLink')) - os.symlink('dirB', join('linkB'), target_is_directory=True) - os.symlink(os.path.join('..', 'dirB'), join('dirA', 'linkC'), target_is_directory=True) - # This one goes upwards, creating a loop. - os.symlink(os.path.join('..', 'dirB'), join('dirB', 'linkD'), target_is_directory=True) - # Broken symlink (pointing to itself). - os.symlink('brokenLinkLoop', join('brokenLinkLoop')) + # note: this must be kept in sync with `PathTest.setUp()` + cls = self.cls + cls._files.clear() + cls._directories.clear() + cls._symlinks.clear() + cls._files.update({ + f'{BASE}/fileA': b'this is file A\n', + f'{BASE}/dirB/fileB': b'this is file B\n', + f'{BASE}/dirC/fileC': b'this is file C\n', + f'{BASE}/dirC/dirD/fileD': b'this is file D\n', + f'{BASE}/dirC/novel.txt': b'this is a novel\n', + }) + cls._directories.update({ + BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA', }, + f'{BASE}/dirA': set(), + f'{BASE}/dirB': {'fileB'}, + f'{BASE}/dirC': {'dirD', 'fileC', 'novel.txt'}, + f'{BASE}/dirC/dirD': {'fileD'}, + f'{BASE}/dirE': {}, + }) + dirname = BASE + while True: + dirname, basename = os.path.split(dirname) + if not basename: + break + cls._directories[dirname] = {basename} + + def tempdir(self): + path = self.cls(BASE).with_name('tmp-dirD') + path.mkdir() + return path def assertFileNotFound(self, func, *args, **kwargs): with self.assertRaises(FileNotFoundError) as cm: @@ -1975,9 +2126,11 @@ def test_rglob_symlink_loop(self): def test_glob_many_open_files(self): depth = 30 P = self.cls - base = P(BASE) / 'deep' - p = P(base, *(['d']*depth)) - p.mkdir(parents=True) + p = base = P(BASE) / 'deep' + p.mkdir() + for _ in range(depth): + p /= 'd' + p.mkdir() pattern = '/'.join(['*'] * depth) iters = [base.glob(pattern) for j in range(100)] for it in iters: @@ -2109,9 +2262,7 @@ def test_resolve_common(self): # resolves to 'dirB/..' first before resolving to parent of dirB. self._check_resolve_relative(p, P(BASE, 'foo', 'in', 'spam'), False) # Now create absolute symlinks. - d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', - dir=os.getcwd())) - self.addCleanup(os_helper.rmtree, d) + d = self.tempdir() P(BASE, 'dirA', 'linkX').symlink_to(d) P(BASE, str(d), 'linkY').symlink_to(join('dirB')) p = P(BASE, 'dirA', 'linkX', 'linkY', 'fileB') @@ -2353,6 +2504,10 @@ def _check_complex_symlinks(self, link0_target): self.assertEqualNormCase(str(p), BASE) # Resolve relative paths. + try: + self.cls().absolute() + except pathlib.UnsupportedOperation: + return old_path = os.getcwd() os.chdir(BASE) try: @@ -2380,6 +2535,91 @@ def test_complex_symlinks_relative(self): def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) + +class DummyVirtualPathWithSymlinks(DummyVirtualPath): + def readlink(self): + path = str(self) + if path in self._symlinks: + return self.with_segments(self._symlinks[path]) + elif path in self._files or path in self._directories: + raise OSError(errno.EINVAL, "Not a symlink", path) + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + + def symlink_to(self, target, target_is_directory=False): + self._directories[str(self.parent)].add(self.name) + self._symlinks[str(self)] = str(target) + + +class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): + cls = DummyVirtualPathWithSymlinks + can_symlink = True + + def setUp(self): + super().setUp() + cls = self.cls + cls._symlinks.update({ + f'{BASE}/linkA': 'fileA', + f'{BASE}/linkB': 'dirB', + f'{BASE}/dirA/linkC': '../dirB', + f'{BASE}/dirB/linkD': '../dirB', + f'{BASE}/brokenLink': 'non-existing', + f'{BASE}/brokenLinkLoop': 'brokenLinkLoop', + }) + cls._directories[BASE].update({'linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'}) + cls._directories[f'{BASE}/dirA'].add('linkC') + cls._directories[f'{BASE}/dirB'].add('linkD') + + +# +# Tests for the concrete classes. +# + +class PathTest(DummyVirtualPathTest): + """Tests for the FS-accessing functionalities of the Path classes.""" + cls = pathlib.Path + can_symlink = os_helper.can_symlink() + + def setUp(self): + # note: this must be kept in sync with `DummyVirtualPathTest.setUp()` + def cleanup(): + os.chmod(join('dirE'), 0o777) + os_helper.rmtree(BASE) + self.addCleanup(cleanup) + os.mkdir(BASE) + os.mkdir(join('dirA')) + os.mkdir(join('dirB')) + os.mkdir(join('dirC')) + os.mkdir(join('dirC', 'dirD')) + os.mkdir(join('dirE')) + with open(join('fileA'), 'wb') as f: + f.write(b"this is file A\n") + with open(join('dirB', 'fileB'), 'wb') as f: + f.write(b"this is file B\n") + with open(join('dirC', 'fileC'), 'wb') as f: + f.write(b"this is file C\n") + with open(join('dirC', 'novel.txt'), 'wb') as f: + f.write(b"this is a novel\n") + with open(join('dirC', 'dirD', 'fileD'), 'wb') as f: + f.write(b"this is file D\n") + os.chmod(join('dirE'), 0) + if self.can_symlink: + # Relative symlinks. + os.symlink('fileA', join('linkA')) + os.symlink('non-existing', join('brokenLink')) + os.symlink('dirB', join('linkB'), target_is_directory=True) + os.symlink(os.path.join('..', 'dirB'), join('dirA', 'linkC'), target_is_directory=True) + # This one goes upwards, creating a loop. + os.symlink(os.path.join('..', 'dirB'), join('dirB', 'linkD'), target_is_directory=True) + # Broken symlink (pointing to itself). + os.symlink('brokenLinkLoop', join('brokenLinkLoop')) + + def tempdir(self): + d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', + dir=os.getcwd())) + self.addCleanup(os_helper.rmtree, d) + return d + def test_concrete_class(self): if self.cls is pathlib.Path: expected = pathlib.WindowsPath if os.name == 'nt' else pathlib.PosixPath From b850d116c6c21d17f8a0b01ba40b97fb6384e515 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 2 Jul 2023 20:10:16 +0100 Subject: [PATCH 03/31] Fix tests on Windows --- Lib/test/test_pathlib.py | 42 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 5d3750fd152a7d..d8f6648e4d2cb7 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1752,24 +1752,25 @@ def setUp(self): cls._files.clear() cls._directories.clear() cls._symlinks.clear() + join = cls._flavour.join cls._files.update({ - f'{BASE}/fileA': b'this is file A\n', - f'{BASE}/dirB/fileB': b'this is file B\n', - f'{BASE}/dirC/fileC': b'this is file C\n', - f'{BASE}/dirC/dirD/fileD': b'this is file D\n', - f'{BASE}/dirC/novel.txt': b'this is a novel\n', + join(BASE, 'fileA'): b'this is file A\n', + join(BASE, 'dirB', 'fileB'): b'this is file B\n', + join(BASE, 'dirC', 'fileC'): b'this is file C\n', + join(BASE, 'dirC', 'dirD', 'fileD'): b'this is file D\n', + join(BASE, 'dirC', 'novel.txt'): b'this is a novel\n', }) cls._directories.update({ - BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA', }, - f'{BASE}/dirA': set(), - f'{BASE}/dirB': {'fileB'}, - f'{BASE}/dirC': {'dirD', 'fileC', 'novel.txt'}, - f'{BASE}/dirC/dirD': {'fileD'}, - f'{BASE}/dirE': {}, + BASE: {'dirA', 'dirB', 'dirC', 'dirE', 'fileA'}, + join(BASE, 'dirA'): set(), + join(BASE, 'dirB'): {'fileB'}, + join(BASE, 'dirC'): {'dirD', 'fileC', 'novel.txt'}, + join(BASE, 'dirC', 'dirD'): {'fileD'}, + join(BASE, 'dirE'): {}, }) dirname = BASE while True: - dirname, basename = os.path.split(dirname) + dirname, basename = cls._flavour.split(dirname) if not basename: break cls._directories[dirname] = {basename} @@ -2558,17 +2559,18 @@ class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): def setUp(self): super().setUp() cls = self.cls + join = cls._flavour.join cls._symlinks.update({ - f'{BASE}/linkA': 'fileA', - f'{BASE}/linkB': 'dirB', - f'{BASE}/dirA/linkC': '../dirB', - f'{BASE}/dirB/linkD': '../dirB', - f'{BASE}/brokenLink': 'non-existing', - f'{BASE}/brokenLinkLoop': 'brokenLinkLoop', + join(BASE, 'linkA'): 'fileA', + join(BASE, 'linkB'): 'dirB', + join(BASE, 'dirA', 'linkC'): join('..', 'dirB'), + join(BASE, 'dirB', 'linkD'): join('..', 'dirB'), + join(BASE, 'brokenLink'): 'non-existing', + join(BASE, 'brokenLinkLoop'): 'brokenLinkLoop', }) cls._directories[BASE].update({'linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'}) - cls._directories[f'{BASE}/dirA'].add('linkC') - cls._directories[f'{BASE}/dirB'].add('linkD') + cls._directories[join(BASE, 'dirA')].add('linkC') + cls._directories[join(BASE, 'dirB')].add('linkD') # From 39bf6b378b2d5111ba8c05a4d6b3d0f73c1ca6fc Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 11:36:20 +0100 Subject: [PATCH 04/31] Fix tests on Windows (take 2) --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index d8f6648e4d2cb7..456ee3fe714ba4 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2253,7 +2253,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if os.name == 'nt': + if isinstance(p, pathlib.WindowsPath): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(BASE, 'dirA', 'foo', 'in', From 0515deaf96ad8d148936400ba88d8f9518ce3a1c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 12:24:56 +0100 Subject: [PATCH 05/31] Fix tests on Windows (take 3) --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 456ee3fe714ba4..d751cd0e930901 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2273,7 +2273,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if os.name == 'nt': + if isinstance(p, pathlib.WindowsPath): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) From 596016f2154fc916ad48ad79aacaaf5335fedb5e Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 12:30:10 +0100 Subject: [PATCH 06/31] Fix tests on Windows (take 4) --- Lib/test/test_pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index d751cd0e930901..15065ff5f51767 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2253,7 +2253,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'fileB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if isinstance(p, pathlib.WindowsPath): + if os.name == 'nt' and isinstance(p, pathlib.Path): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(BASE, 'dirA', 'foo', 'in', @@ -2273,7 +2273,7 @@ def test_resolve_common(self): self._check_resolve_relative(p, P(BASE, 'dirB', 'foo', 'in', 'spam'), False) p = P(BASE, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if isinstance(p, pathlib.WindowsPath): + if os.name == 'nt' and isinstance(p, pathlib.Path): # In Windows, if linkY points to dirB, 'dirA\linkY\..' # resolves to 'dirA' without resolving linkY first. self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) From 1a6122bc0ee919fd468cc2460db4da3a639375d4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 19:52:31 +0100 Subject: [PATCH 07/31] Add `tarfile.TarPath` --- Lib/tarfile.py | 229 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 228 insertions(+), 1 deletion(-) diff --git a/Lib/tarfile.py b/Lib/tarfile.py index df4e41f7a0d23a..a8ca2264040063 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -37,9 +37,13 @@ # Imports #--------- from builtins import open as bltn_open +from collections import namedtuple +import errno import sys import os import io +import pathlib +import posixpath import shutil import stat import time @@ -69,7 +73,7 @@ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", "tar_filter", "FilterError", "AbsoluteLinkError", "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", - "LinkOutsideDestinationError"] + "LinkOutsideDestinationError", "TarPath"] #--------------------------------------------------------- @@ -2772,6 +2776,229 @@ def __exit__(self, type, value, traceback): self.fileobj.close() self.closed = True + +_tar_stat_fields = ('st_mode st_ino st_dev st_nlink st_uid st_gid ' + 'st_size st_atime st_mtime st_ctime st_uname st_gname') + + +class _TarStatResult(namedtuple('_TarStatResult', _tar_stat_fields)): + """Tar-specific version of os.stat_result. Returned by TarPath.stat().""" + __slots__ = () + + @classmethod + def from_tarinfo(cls, tarfile, tarinfo): + """Create a _TarStatResult from TarFile and TarInfo objects.""" + if tarinfo.type in REGULAR_TYPES: + st_mode = stat.S_IFREG + elif tarinfo.type == DIRTYPE: + st_mode = stat.S_IFDIR + elif tarinfo.type == SYMTYPE or tarinfo.type == LNKTYPE: + st_mode = stat.S_IFLNK + elif tarinfo.type == FIFOTYPE: + st_mode = stat.S_IFIFO + elif tarinfo.type == CHRTYPE: + st_mode = stat.S_IFCHR + elif tarinfo.type == BLKTYPE: + st_mode = stat.S_IFBLK + else: + raise ValueError(tarinfo.type) + return cls(st_mode=tarinfo.mode | st_mode, + st_ino=tarinfo.offset_data, + st_dev=id(tarfile), + st_nlink=0, + st_uid=tarinfo.uid, + st_gid=tarinfo.gid, + st_size=tarinfo.size, + st_atime=0, + st_mtime=tarinfo.mtime, + st_ctime=0, + st_uname=tarinfo.uname, + st_gname=tarinfo.gname) + + @classmethod + def implied_directory(cls, tarfile, path): + """Create a _TarStatResult for a directory that is implied to exist + by another archive member's path. + """ + return cls(stat.S_IFDIR, hash(path), id(tarfile), 0, 0, 0, 0, 0, 0, 0, None, None) + + +class _TarPathWriter(io.BytesIO): + """File object that flushes its contents to a tar archive on close. + Returned by TarPath.open(mode="w"). + """ + + def __init__(self, tarfile, path): + super().__init__() + self.tarfile = tarfile + self.path = path + + def close(self): + info = TarInfo(self.path) + info.size = self.tell() + self.seek(0) + self.tarfile.addfile(info, self) + super().close() + + +class TarPath(pathlib._VirtualPath): + """A pathlib-compatible interface for tar files.""" + + __slots__ = ('tarfile',) + _flavour = posixpath + + def __init__(self, *pathsegments, tarfile): + super().__init__(*pathsegments) + self.tarfile = tarfile + + def __repr__(self): + return f"{type(self).__name__}({str(self)!r}, tarfile={self.tarfile!r})" + + def __hash__(self): + return hash((id(self.tarfile), str(self))) + + def __eq__(self, other): + if not isinstance(other, TarPath): + return NotImplemented + elif other.tarfile is not self.tarfile: + return False + return super().__eq__(other) + + def __lt__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__lt__(other) + + def __le__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__le__(other) + + def __gt__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__gt__(other) + + def __ge__(self, other): + if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: + return NotImplemented + return super().__ge__(other) + + def with_segments(self, *pathsegments): + """Construct a new TarPath object with the same underlying TarFile + object from any number of path-like objects. + """ + return type(self)(*pathsegments, tarfile=self.tarfile) + + def stat(self, *, follow_symlinks=True): + """Return the path's status, similar to os.stat().""" + if follow_symlinks: + resolved = self.resolve() + else: + resolved = self.parent.resolve() / self.name + implied_directory = False + for info in reversed(self.tarfile.getmembers()): + path = self.with_segments(info.name) + if path == resolved: + return _TarStatResult.from_tarinfo(self.tarfile, info) + elif resolved in path.parents: + implied_directory = True + if implied_directory: + return _TarStatResult.implied_directory(self.tarfile, str(resolved)) + else: + raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) + + def owner(self): + """Return the user name of the path owner.""" + name = self.stat().st_uname + if name is not None: + return name + raise pathlib.UnsupportedOperation() + + def group(self): + """Return the group name of the path owner.""" + name = self.stat().st_gname + if name is not None: + return name + raise pathlib.UnsupportedOperation() + + def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): + """Open the archive member pointed by this path and return a file + object, similar to the built-in open() function. + """ + if buffering != -1: + return super().open(mode, buffering, encoding, errors, newline) + action = ''.join(c for c in mode if c not in 'btU') + if action == 'r': + fileobj = self.tarfile.extractfile(str(self.resolve())) + elif action == 'w': + fileobj = _TarPathWriter(self.tarfile, str(self.resolve())) + else: + raise pathlib.UnsupportedOperation() + if 'b' not in mode: + fileobj = io.TextIOWrapper(fileobj, encoding, errors, newline) + return fileobj + + def iterdir(self): + """Yield path objects of the directory contents. The children are + yielded in arbitrary order. + """ + resolved = self.resolve() + seen = set() + for info in self.tarfile.getmembers(): + path = self.with_segments(info.name) + if path == resolved: + if info.type != DIRTYPE: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", str(self)) + while True: + parent = path.parent + if parent == path: + break + elif parent == resolved: + path_str = str(path) + if path_str not in seen: + seen.add(path_str) + yield self / path.name + break + path = parent + if not seen: + raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) + + def readlink(self): + """Return the path to which the symbolic link points.""" + for info in reversed(self.tarfile.getmembers()): + path = self.with_segments(info.name) + if path == self: + if info.issym(): + return self.with_segments(info.linkname) + else: + raise OSError(errno.EINVAL, "Not a symlink", str(self)) + elif self in path.parents: + raise OSError(errno.EINVAL, "Not a symlink", str(self)) + raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) + + def mkdir(self, mode=0o777, parents=False, exist_ok=False): + """Create a new directory at this given path.""" + info = TarInfo(str(self)) + info.type = DIRTYPE + info.mode = mode + self.tarfile.addfile(info) + + def symlink_to(self, target, target_is_directory=False): + """Make this path a symlink pointing to the target path.""" + info = TarInfo(str(self)) + info.type = SYMTYPE + info.linkname = str(self.with_segments(target)) + self.tarfile.addfile(info) + + def hardlink_to(self, target): + """Make this path a hard link pointing to the target path.""" + info = TarInfo(str(self)) + info.type = LNKTYPE + info.linkname = str(self.with_segments(target)) + self.tarfile.addfile(info) + + #-------------------- # exported functions #-------------------- From 6833ed8a006ca90d811e2efb59330307b77c5c19 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 20:24:48 +0100 Subject: [PATCH 08/31] Add docs for `tarfile.TarPath` --- Doc/library/tarfile.rst | 41 +++++++++++++++++++ Doc/whatsnew/3.13.rst | 7 ++++ ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 2 + 3 files changed, 50 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index fd4820e78d68d1..431d422ec13682 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -854,6 +854,47 @@ A :class:`TarInfo` object also provides some convenient query methods: Return :const:`True` if it is one of character device, block device or FIFO. +TarPath Objects +--------------- + +The :class:`TarPath` class provides an interface for tar files that's +compatible with :class:`pathlib.Path`. + +.. class:: TarPath(*pathsegments, tarfile) + + Create a :class:`TarPath` object from a given :class:`TarFile` object. + If *pathsegments* are supplied, they are joined together to form a path + within the archive; otherwise the path is positioned at the archive root. + + .. versionadded:: 3.13 + +.. attribute:: TarPath.tarfile + + The backing :class:`TarFile` instance, as supplied to the initializer. + +Features such as testing file types, reading or writing files, and iterating +or globbing directories are supported:: + + import tarfile + with tarfile.open("sample.tar.gz", "r:gz") as tar: + root = tarfile.TarPath(tarfile=tar) + for readme in root.glob("**/README*", case_sensitive=False): + print(f"Found README file at {readme}:") + print(readme.read_text()) + break + +Some :class:`TarPath` methods unconditionally raise +:exc:`pathlib.UnsupportedOperation`. They are: + +- ``absolute()``, ``cwd()``, ``expanduser()``, ``home()`` and ``as_uri()``, + because tar archives lack these features. +- ``touch()``, ``rename()``, ``replace()``, ``chmod()``, ``lchmod()``, + ``unlink()`` and ``rmdir()``, because the :class:`TarFile` class does not + support reading and writing the same archive. + +Refer to the :mod:`pathlib` documentation for information about other methods. + + .. _tarfile-extraction-filter: Extraction filters diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 9696dd4ff0b700..083c46dd905bc7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -118,6 +118,13 @@ pathlib :meth:`~pathlib.Path.is_dir`. (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`.) +tarfile +------- + +* Add :class:`tarfile.TarPath` class, which provides access to tar archive + members via the :class:`pathlib.Path` interface. + (Contributed by Barney Gale in :gh:`89812`.) + traceback --------- diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst new file mode 100644 index 00000000000000..9ad271a33d6057 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -0,0 +1,2 @@ +Add :class:`tarfile.TarPath` class, which provides access to tar archive +members via the :class:`pathlib.Path` interface. From 4d2e8a923c936a195a8b7b7496ea909f06c58801 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 3 Jul 2023 21:11:01 +0100 Subject: [PATCH 09/31] Add tests for `tarfile.TarPath` --- Lib/test/test_tarfile.py | 568 +++++++++++++++++++++++++++++++++++++++ Lib/test/testtarpath.tar | Bin 0 -> 20480 bytes 2 files changed, 568 insertions(+) create mode 100644 Lib/test/testtarpath.tar diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 2eda7fc4ceac71..96b60abc975ca0 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,3 +1,4 @@ +import errno import sys import os import io @@ -3943,6 +3944,573 @@ def valueerror_filter(tarinfo, path): self.expect_exception(TypeError) # errorlevel is not int +class TarPathTest(unittest.TestCase): + def setUp(self): + self.tarpath = support.findfile("testtarpath.tar") + self.tar = tarfile.TarFile(self.tarpath) + self.root = tarfile.TarPath(tarfile=self.tar) + + def tearDown(self): + self.tar.close() + + def test_tarfile(self): + self.assertIs(self.root.tarfile, self.tar) + + def test_hash(self): + with tarfile.TarFile(self.tarpath) as tar0: + with tarfile.TarFile(self.tarpath) as tar1: + p = tarfile.TarPath('fileA', tarfile=tar0) + p1 = tarfile.TarPath('fileA', tarfile=tar0) + p2 = tarfile.TarPath('fileA', tarfile=tar1) + p3 = tarfile.TarPath('fileB', tarfile=tar0) + self.assertEqual(hash(p), hash(p1)) + self.assertNotEqual(hash(p), hash(p2)) + self.assertNotEqual(hash(p), hash(p3)) + + def test_eq(self): + with tarfile.TarFile(self.tarpath) as tar0: + with tarfile.TarFile(self.tarpath) as tar1: + p = tarfile.TarPath('fileA', tarfile=tar0) + p1 = tarfile.TarPath('fileA', tarfile=tar0) + p2 = tarfile.TarPath('fileA', tarfile=tar1) + p3 = tarfile.TarPath('fileB', tarfile=tar0) + self.assertEqual(p, p1) + self.assertNotEqual(p, p2) + self.assertNotEqual(p, p3) + + def test_samefile(self): + p = self.root / 'fileA' + pp = self.root / 'fileA' + q = self.root / 'dirB' / 'fileB' + self.assertTrue(p.samefile('fileA')) + self.assertTrue(p.samefile(pp)) + self.assertFalse(p.samefile('dirB/fileB')) + self.assertFalse(p.samefile(q)) + # Test the non-existent file case + r = self.root / 'foo' + self.assertRaises(FileNotFoundError, p.samefile, r) + self.assertRaises(FileNotFoundError, p.samefile, 'foo') + self.assertRaises(FileNotFoundError, r.samefile, p) + self.assertRaises(FileNotFoundError, r.samefile, 'foo') + self.assertRaises(FileNotFoundError, r.samefile, r) + self.assertRaises(FileNotFoundError, r.samefile, 'foo') + + def test_exists(self): + p = self.root + self.assertTrue(p.exists()) + self.assertTrue((p / 'dirA').exists()) + self.assertTrue((p / 'fileA').exists()) + self.assertFalse((p / 'fileA' / 'bah').exists()) + self.assertTrue((p / 'linkA').exists()) + self.assertTrue((p / 'linkB').exists()) + self.assertTrue((p / 'linkB' / 'fileB').exists()) + self.assertFalse((p / 'linkA' / 'bah').exists()) + self.assertFalse((p / 'brokenLink').exists()) + self.assertTrue((p / 'brokenLink').exists(follow_symlinks=False)) + self.assertFalse((p / 'foo').exists()) + self.assertFalse(p.with_segments('/xyzzy').exists()) + + def test_open(self): + with (self.root / 'fileA').open('r') as f: + self.assertIsInstance(f, io.TextIOBase) + self.assertEqual(f.read(), "this is file A\n") + with (self.root / 'fileA').open('rb') as f: + self.assertIsInstance(f, io.BufferedIOBase) + self.assertEqual(f.read().strip(), b"this is file A") + + def test_iterdir(self): + it = self.root.iterdir() + paths = sorted(it) + expected = ['brokenLink', 'brokenLinkLoop', + 'dirA', 'dirB', 'dirC', 'dirE', 'fileA', + 'linkA', 'linkB'] + self.assertEqual(paths, [ self.root / q for q in expected ]) + + def test_iterdir_symlink(self): + p = self.root / 'linkB' + paths = sorted(p.iterdir()) + expected = [ p / q for q in ['fileB', 'linkD'] ] + self.assertEqual(paths, expected) + + def test_iterdir_nodir(self): + p = self.root / 'foo' + with self.assertRaises(OSError) as cm: + next(p.iterdir()) + + def test_glob(self): + def _check(pattern, expected): + actual = sorted(self.root.glob(pattern)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileA", ["fileA"]) + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ['dirA', 'fileA', 'linkA']) + _check("*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD']) + _check("*/fileB", ['dirB/fileB', 'linkB/fileB']) + _check("brokenLink", ['brokenLink']) + _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + + def test_glob_case_sensitive(self): + def _check(pattern, case_sensitive, expected): + actual = sorted([str(q) for q in self.root.glob(pattern, case_sensitive=case_sensitive)]) + expected = [str(self.root / q) for q in expected] + self.assertEqual(actual, expected) + + _check("DIRB/FILE*", True, []) + _check("DIRB/FILE*", False, ["dirB/fileB"]) + _check("dirb/file*", True, []) + _check("dirb/file*", False, ["dirB/fileB"]) + + def test_glob_follow_symlinks(self): + def _check(pattern, expected): + actual = sorted([q for q in self.root.glob(pattern, follow_symlinks=True) + if "linkD" not in q.parent.parts]) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ["dirA", "fileA", "linkA"]) + _check("*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check("*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + _check("dir*/*/..", ["dirA/linkC/..", "dirC/dirD/.."]) + _check("dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE"]) + _check("dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check("dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"]) + _check("dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) + _check("dir*/**/fileC", ["dirC/fileC"]) + _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) + _check("*/dirD/**/", ["dirC/dirD"]) + + def test_glob_no_follow_symlinks(self): + def _check(pattern, expected): + actual = sorted(self.root.glob(pattern, follow_symlinks=False)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + _check("fileB", []) + _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check("*A", ["dirA", "fileA", "linkA"]) + _check("*B/*", ["dirB/fileB", "dirB/linkD"]) + _check("*/fileB", ["dirB/fileB"]) + _check("*/", ["dirA", "dirB", "dirC", "dirE"]) + _check("dir*/*/..", ["dirC/dirD/.."]) + _check("dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check("dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check("dir*/*/**/", ["dirC/dirD"]) + _check("dir*/*/**/..", ["dirC/dirD/.."]) + _check("dir*/**/fileC", ["dirC/fileC"]) + _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) + _check("*/dirD/**/", ["dirC/dirD"]) + + def test_rglob(self): + def _check(glob, expected): + self.assertEqual(sorted(glob), sorted(self.root / q for q in expected)) + p = self.root + _check(p.rglob("fileA"), ["fileA"]) + _check(p.rglob("fileB"), ["dirB/fileB"]) + _check(p.rglob("**/fileB"), ["dirB/fileB"]) + _check(p.rglob("*/fileA"), []) + _check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB", + "linkB/fileB", "dirA/linkC/fileB"]) + _check(p.rglob("file*"), ["fileA", "dirB/fileB", + "dirC/fileC", "dirC/dirD/fileD"]) + _check(p.rglob("*/"), [ + "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", + "dirC/dirD", "dirE", "linkB", + ]) + _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + q = p / "dirC" + _check(q.rglob("*"), ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(q.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(q.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) + _check(q.rglob("dir*/**"), ["dirC/dirD"]) + _check(q.rglob("*/*"), ["dirC/dirD/fileD"]) + _check(q.rglob("*/"), ["dirC/dirD"]) + _check(q.rglob(""), ["dirC", "dirC/dirD"]) + _check(q.rglob("**"), ["dirC", "dirC/dirD"]) + _check(q.rglob("*.txt"), ["dirC/novel.txt"]) + _check(q.rglob("*.*"), ["dirC/novel.txt"]) + + def test_rglob_follow_symlinks(self): + def _check(path, pattern, expected): + actual = sorted([q for q in path.rglob(pattern, follow_symlinks=True) + if "linkD" not in q.parent.parts]) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + p = self.root + _check(p, "fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) + _check(p, "file*", ["dirA/linkC/fileB", "dirB/fileB", + "dirC/dirD/fileD", "dirC/fileC", "fileA", "linkB/fileB"]) + _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", + "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) + + q = p / "dirC" + _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) + _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC"]) + _check(q, "*/*", ["dirC/dirD/fileD"]) + _check(q, "*/", ["dirC/dirD"]) + _check(q, "", ["dirC", "dirC/dirD"]) + _check(q, "*.txt", ["dirC/novel.txt"]) + _check(q, "*.*", ["dirC/novel.txt"]) + + def test_rglob_no_follow_symlinks(self): + def _check(path, pattern, expected): + actual = sorted(path.rglob(pattern, follow_symlinks=False)) + expected = [self.root / q for q in expected] + self.assertEqual(actual, expected) + + p = self.root + _check(p, "fileB", ["dirB/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB"]) + _check(p, "file*", ["dirB/fileB", "dirC/dirD/fileD", "dirC/fileC", "fileA"]) + _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p, "", ["", "dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + + q = p / "dirC" + _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) + _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC", ]) + _check(q, "*/*", ["dirC/dirD/fileD"]) + _check(q, "*/", ["dirC/dirD"]) + _check(q, "", ["dirC", "dirC/dirD"]) + _check(q, "*.txt", ["dirC/novel.txt"]) + _check(q, "*.*", ["dirC/novel.txt"]) + + def test_rglob_symlink_loop(self): + given = sorted(self.root.rglob('*')) + expect = ['brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + ] + self.assertEqual(given, sorted(self.root / x for x in expect)) + + def test_glob_dotdot(self): + p = self.root + self.assertEqual(sorted(p.glob("..")), [ p / ".." ]) + self.assertEqual(sorted(p.glob("../..")), [ p / ".." / ".." ]) + self.assertEqual(sorted(p.glob("dirA/..")), [ p / "dirA" / ".." ]) + self.assertEqual(sorted(p.glob("dirA/../file*")), [ p / "dirA/../fileA" ]) + self.assertEqual(sorted(p.glob("dirA/../file*/..")), []) + self.assertEqual(sorted(p.glob("../xyzzy")), []) + self.assertEqual(sorted(p.glob("xyzzy/..")), []) + self.assertEqual(sorted(p.glob("/".join([".."] * 50))), [ p.joinpath(*[".."] * 50)]) + + def test_walk(self): + def _sorted_walk(follow_symlinks): + results = [] + for dirpath, dirnames, filenames in self.root.walk(follow_symlinks=follow_symlinks): + if 'linkD' in dirnames: + # Treat recursive symlink as file + dirnames.remove('linkD') + filenames.append('linkD') + dirnames.sort() + filenames.sort() + results.append((dirpath, dirnames, filenames)) + return results + + p = self.root + self.assertEqual(_sorted_walk(False), [ + (p, + ['dirA', 'dirB', 'dirC', 'dirE'], + ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB']), + (p / 'dirA', [], ['linkC']), + (p / 'dirB', [], ['fileB', 'linkD']), + (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), + (p / 'dirC' / 'dirD', [], ['fileD']), + ]) + + self.assertEqual(_sorted_walk(True), [ + (p, + ['dirA', 'dirB', 'dirC', 'dirE', 'linkB'], + ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA']), + (p / 'dirA', ['linkC'], []), + (p / 'dirA' / 'linkC', [], ['fileB', 'linkD']), + (p / 'dirB', [], ['fileB', 'linkD']), + (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), + (p / 'dirC' / 'dirD', [], ['fileD']), + (p / 'linkB', [], ['fileB', 'linkD']), + ]) + + def test_readlink(self): + p = self.root + self.assertEqual((p / 'linkA').readlink(), p / 'fileA') + self.assertEqual((p / 'brokenLink').readlink(), p / 'non-existing') + self.assertEqual((p / 'linkB').readlink(), p / 'dirB') + with self.assertRaises(OSError): + (p / 'fileA').readlink() + + def test_resolve(self): + with self.assertRaises(OSError) as cm: + self.root.joinpath('foo').resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + def _check(path, expected, strict=True): + self.assertEqual(self.root.joinpath(path).resolve(strict=strict), + self.root.joinpath(expected)) + _check('foo/in/spam', 'foo/in/spam', False) + _check('../foo/in/spam', '../foo/in/spam', False) + _check('dirB/fileB', 'dirB/fileB') + _check('linkA', 'fileA') + _check('dirA/linkC/fileB', 'dirB/fileB') + _check('dirB/linkD/fileB', 'dirB/fileB') + _check('dirA/linkC/fileB/foo/in/spam', 'dirB/fileB/foo/in/spam', False) + _check('dirA/linkC/../foo/in/spam', 'foo/in/spam', False) + + def test_stat(self): + statA = self.root.joinpath('fileA').stat() + statB = self.root.joinpath('dirB', 'fileB').stat() + statC = self.root.joinpath('dirC').stat() + # st_mode: files are the same, directory differs. + self.assertIsInstance(statA.st_mode, int) + self.assertEqual(statA.st_mode, statB.st_mode) + self.assertNotEqual(statA.st_mode, statC.st_mode) + self.assertNotEqual(statB.st_mode, statC.st_mode) + # st_ino: all different, + self.assertIsInstance(statA.st_ino, int) + self.assertNotEqual(statA.st_ino, statB.st_ino) + self.assertNotEqual(statA.st_ino, statC.st_ino) + self.assertNotEqual(statB.st_ino, statC.st_ino) + # st_dev: all the same. + self.assertIsInstance(statA.st_dev, int) + self.assertEqual(statA.st_dev, statB.st_dev) + self.assertEqual(statA.st_dev, statC.st_dev) + # other attributes not used by pathlib. + + def test_stat_no_follow_symlinks(self): + p = self.root / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + + def test_stat_no_follow_symlinks_nosymlink(self): + p = self.root / 'fileA' + st = p.stat() + self.assertEqual(st, p.stat(follow_symlinks=False)) + + def test_lstat(self): + p = self.root / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.lstat()) + + def test_lstat_nosymlink(self): + p = self.root / 'fileA' + st = p.stat() + self.assertEqual(st, p.lstat()) + + def test_owner(self): + p = self.root + self.assertRaises(pathlib.UnsupportedOperation, p.owner) + self.assertEqual((p / 'fileA').owner(), 'barney') + + def test_group(self): + p = self.root + self.assertRaises(pathlib.UnsupportedOperation, p.group) + self.assertEqual((p / 'fileA').group(), 'barney') + + def test_read_write_bytes(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + p.write_bytes(b'abcdefg') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + self.assertEqual(p.read_bytes(), b'abcdefg') + + def test_read_write_text(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + p.write_text('äbcdefg', encoding='latin-1') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + p = tarfile.TarPath('fileA', tarfile=tar) + self.assertEqual(p.read_text(encoding='utf-8', errors='ignore'), 'bcdefg') + + def test_mkdir(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath('dirA', tarfile=tar) + p.mkdir() + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('dirA') + self.assertEqual(info.type, tarfile.DIRTYPE) + + def test_symlink_to(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath(tarfile=tar) + p.joinpath('linkA').symlink_to('fileA') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('linkA') + self.assertEqual(info.type, tarfile.SYMTYPE) + self.assertEqual(info.linkname, 'fileA') + + def test_hardlink_to(self): + fileobj = io.BytesIO() + with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: + p = tarfile.TarPath(tarfile=tar) + p.joinpath('linkA').hardlink_to('fileA') + + fileobj.seek(0) + with tarfile.TarFile(fileobj=fileobj) as tar: + info = tar.getmember('linkA') + self.assertEqual(info.type, tarfile.LNKTYPE) + self.assertEqual(info.linkname, 'fileA') + + +class TarPathFileTypeTest(unittest.TestCase): + def setUp(self): + tarpath = support.findfile("testtar.tar") + self.tar = tarfile.TarFile(tarpath) + self.root = tarfile.TarPath(tarfile=self.tar) + + def tearDown(self): + self.tar.close() + + def test_is_dir(self): + p = self.root + self.assertTrue(p.is_dir()) + self.assertTrue((p / 'ustar').is_dir()) + self.assertTrue((p / 'ustar' / 'dirtype').is_dir()) + self.assertFalse((p / 'ustar' / 'regtype').is_dir()) + self.assertFalse((p / 'non-existing').is_dir()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_dir()) + self.assertFalse((p / 'ustar' / 'symtype').is_dir()) + self.assertFalse((p / 'ustar' / 'lnktype').is_dir()) + self.assertFalse((p / 'ustar' / 'fifotype').is_dir()) + self.assertFalse((p / 'ustar' / 'blktype').is_dir()) + self.assertFalse((p / 'ustar' / 'chrtype').is_dir()) + + def test_is_file(self): + p = self.root + self.assertFalse(p.is_file()) + self.assertFalse((p / 'ustar').is_file()) + self.assertFalse((p / 'ustar' / 'dirtype').is_file()) + self.assertTrue((p / 'ustar' / 'regtype').is_file()) + self.assertFalse((p / 'non-existing').is_file()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_file()) + self.assertTrue((p / 'ustar' / 'symtype').is_file()) + self.assertFalse((p / 'ustar' / 'symtype').is_file(follow_symlinks=False)) + self.assertFalse((p / 'ustar' / 'fifotype').is_file()) + self.assertFalse((p / 'ustar' / 'blktype').is_file()) + self.assertFalse((p / 'ustar' / 'chrtype').is_file()) + + def test_is_mount(self): + p = self.root + self.assertTrue(p.is_mount()) + self.assertFalse((p / 'ustar').is_mount()) + self.assertFalse((p / 'ustar' / 'dirtype').is_mount()) + self.assertFalse((p / 'ustar' / 'regtype').is_mount()) + self.assertFalse((p / 'non-existing').is_mount()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_mount()) + self.assertFalse((p / 'ustar' / 'symtype').is_mount()) + self.assertFalse((p / 'ustar' / 'fifotype').is_mount()) + self.assertFalse((p / 'ustar' / 'blktype').is_mount()) + self.assertFalse((p / 'ustar' / 'chrtype').is_mount()) + + def test_is_symlink(self): + p = self.root + self.assertFalse(p.is_symlink()) + self.assertFalse((p / 'ustar').is_symlink()) + self.assertFalse((p / 'ustar' / 'dirtype').is_symlink()) + self.assertFalse((p / 'ustar' / 'regtype').is_symlink()) + self.assertFalse((p / 'non-existing').is_symlink()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_symlink()) + self.assertTrue((p / 'ustar' / 'symtype').is_symlink()) + self.assertFalse((p / 'ustar' / 'fifotype').is_symlink()) + self.assertFalse((p / 'ustar' / 'blktype').is_symlink()) + self.assertFalse((p / 'ustar' / 'chrtype').is_symlink()) + + def test_is_junction(self): + p = self.root + self.assertFalse(p.is_junction()) + self.assertFalse((p / 'ustar').is_junction()) + self.assertFalse((p / 'ustar' / 'dirtype').is_junction()) + self.assertFalse((p / 'ustar' / 'regtype').is_junction()) + self.assertFalse((p / 'non-existing').is_junction()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_junction()) + self.assertFalse((p / 'ustar' / 'symtype').is_junction()) + self.assertFalse((p / 'ustar' / 'fifotype').is_junction()) + self.assertFalse((p / 'ustar' / 'blktype').is_junction()) + self.assertFalse((p / 'ustar' / 'chrtype').is_junction()) + + def test_is_fifo(self): + p = self.root + self.assertFalse(p.is_fifo()) + self.assertFalse((p / 'ustar').is_fifo()) + self.assertFalse((p / 'ustar' / 'dirtype').is_fifo()) + self.assertFalse((p / 'ustar' / 'regtype').is_fifo()) + self.assertFalse((p / 'non-existing').is_fifo()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_fifo()) + self.assertFalse((p / 'ustar' / 'symtype').is_fifo()) + self.assertTrue((p / 'ustar' / 'fifotype').is_fifo()) + self.assertFalse((p / 'ustar' / 'blktype').is_fifo()) + self.assertFalse((p / 'ustar' / 'chrtype').is_fifo()) + + def test_is_socket(self): + p = self.root + self.assertFalse(p.is_socket()) + self.assertFalse((p / 'ustar').is_socket()) + self.assertFalse((p / 'ustar' / 'dirtype').is_socket()) + self.assertFalse((p / 'ustar' / 'regtype').is_socket()) + self.assertFalse((p / 'non-existing').is_socket()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_socket()) + self.assertFalse((p / 'ustar' / 'symtype').is_socket()) + self.assertFalse((p / 'ustar' / 'fifotype').is_socket()) + self.assertFalse((p / 'ustar' / 'blktype').is_socket()) + self.assertFalse((p / 'ustar' / 'chrtype').is_socket()) + + def test_is_block_device(self): + p = self.root + self.assertFalse(p.is_block_device()) + self.assertFalse((p / 'ustar').is_block_device()) + self.assertFalse((p / 'ustar' / 'dirtype').is_block_device()) + self.assertFalse((p / 'ustar' / 'regtype').is_block_device()) + self.assertFalse((p / 'non-existing').is_block_device()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_block_device()) + self.assertFalse((p / 'ustar' / 'symtype').is_block_device()) + self.assertFalse((p / 'ustar' / 'fifotype').is_block_device()) + self.assertTrue((p / 'ustar' / 'blktype').is_block_device()) + self.assertFalse((p / 'ustar' / 'chrtype').is_block_device()) + + def test_is_char_device(self): + p = self.root + self.assertFalse(p.is_char_device()) + self.assertFalse((p / 'ustar').is_char_device()) + self.assertFalse((p / 'ustar' / 'dirtype').is_char_device()) + self.assertFalse((p / 'ustar' / 'regtype').is_char_device()) + self.assertFalse((p / 'non-existing').is_char_device()) + self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_char_device()) + self.assertFalse((p / 'ustar' / 'symtype').is_char_device()) + self.assertFalse((p / 'ustar' / 'fifotype').is_char_device()) + self.assertFalse((p / 'ustar' / 'blktype').is_char_device()) + self.assertTrue((p / 'ustar' / 'chrtype').is_char_device()) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Lib/test/testtarpath.tar b/Lib/test/testtarpath.tar new file mode 100644 index 0000000000000000000000000000000000000000..f90c18fa9de46a30e75fda4b9a0e261488cb0a19 GIT binary patch literal 20480 zcmeI2O>V+45QV+&DSUyF*iM`^KizQ&P}4R7B~?f(di#z|fk1_*D@|l6FG3J0!ZzRY z8;{4V+h$YN%c|Z8wkxHpC<;Lzs>sv+r+efsp|UJbRc>sqM5?mF7DDQ}sUORIwd<<- zm1}e#yKdD=Db}mDF28Q~a_{-=Z$|xOebRr~H19k^bpnU!Z7or>;GJ}(}YL% zQ>zF4^SV6^H=gxZA(U%IsaRm-H!jNxQhQ;n^eej2mmAZfA?1HB)!}ekh6y& zh@+2)2zdXy^?$)PsDG{h2J1hcBIlM#|9O<_KkfgJ|DSc@y7T(e+k$Zt9Q+@$|I>=f zK+vB{^j^DX{apsYvj~Lee?}?V|0DnF3B`1Kk^e1_|1n{Xwps80y7?@(lWyPf4D+u4 z^>zKvsEqPI_@76Qeyd0SucV&@2mpOJBe?hecS}EW{WrM&qv8J{rr**3zWy_Z{fA&4 zQy=92C&sOx|Eb~iA3FY}^&k6x{+Z%@KV|ObZvOYH{;y0%^&jMa%)(If7x90J_|H!~ zt{zqE|8x2?9{u{s9=`r-W$FB1Q*y#Rwz2>JBd%zK(SQI5 zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>m GmB2R@KXU~D literal 0 HcmV?d00001 From 508cabe051440b0f1a5f5ec902dc454156391417 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Jul 2023 19:53:33 +0100 Subject: [PATCH 10/31] Undo changes to tarfile. --- Doc/library/tarfile.rst | 41 -- Doc/whatsnew/3.13.rst | 7 - Lib/tarfile.py | 229 +------ Lib/test/test_tarfile.py | 568 ------------------ Lib/test/testtarpath.tar | Bin 20480 -> 0 bytes ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 4 +- 6 files changed, 3 insertions(+), 846 deletions(-) delete mode 100644 Lib/test/testtarpath.tar diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 431d422ec13682..fd4820e78d68d1 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -854,47 +854,6 @@ A :class:`TarInfo` object also provides some convenient query methods: Return :const:`True` if it is one of character device, block device or FIFO. -TarPath Objects ---------------- - -The :class:`TarPath` class provides an interface for tar files that's -compatible with :class:`pathlib.Path`. - -.. class:: TarPath(*pathsegments, tarfile) - - Create a :class:`TarPath` object from a given :class:`TarFile` object. - If *pathsegments* are supplied, they are joined together to form a path - within the archive; otherwise the path is positioned at the archive root. - - .. versionadded:: 3.13 - -.. attribute:: TarPath.tarfile - - The backing :class:`TarFile` instance, as supplied to the initializer. - -Features such as testing file types, reading or writing files, and iterating -or globbing directories are supported:: - - import tarfile - with tarfile.open("sample.tar.gz", "r:gz") as tar: - root = tarfile.TarPath(tarfile=tar) - for readme in root.glob("**/README*", case_sensitive=False): - print(f"Found README file at {readme}:") - print(readme.read_text()) - break - -Some :class:`TarPath` methods unconditionally raise -:exc:`pathlib.UnsupportedOperation`. They are: - -- ``absolute()``, ``cwd()``, ``expanduser()``, ``home()`` and ``as_uri()``, - because tar archives lack these features. -- ``touch()``, ``rename()``, ``replace()``, ``chmod()``, ``lchmod()``, - ``unlink()`` and ``rmdir()``, because the :class:`TarFile` class does not - support reading and writing the same archive. - -Refer to the :mod:`pathlib` documentation for information about other methods. - - .. _tarfile-extraction-filter: Extraction filters diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 1e34e5055721e6..b7c436fc151611 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -119,13 +119,6 @@ pathlib :meth:`~pathlib.Path.is_dir`. (Contributed by Barney Gale in :gh:`77609` and :gh:`105793`.) -tarfile -------- - -* Add :class:`tarfile.TarPath` class, which provides access to tar archive - members via the :class:`pathlib.Path` interface. - (Contributed by Barney Gale in :gh:`89812`.) - traceback --------- diff --git a/Lib/tarfile.py b/Lib/tarfile.py index a8ca2264040063..df4e41f7a0d23a 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -37,13 +37,9 @@ # Imports #--------- from builtins import open as bltn_open -from collections import namedtuple -import errno import sys import os import io -import pathlib -import posixpath import shutil import stat import time @@ -73,7 +69,7 @@ "DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter", "tar_filter", "FilterError", "AbsoluteLinkError", "OutsideDestinationError", "SpecialFileError", "AbsolutePathError", - "LinkOutsideDestinationError", "TarPath"] + "LinkOutsideDestinationError"] #--------------------------------------------------------- @@ -2776,229 +2772,6 @@ def __exit__(self, type, value, traceback): self.fileobj.close() self.closed = True - -_tar_stat_fields = ('st_mode st_ino st_dev st_nlink st_uid st_gid ' - 'st_size st_atime st_mtime st_ctime st_uname st_gname') - - -class _TarStatResult(namedtuple('_TarStatResult', _tar_stat_fields)): - """Tar-specific version of os.stat_result. Returned by TarPath.stat().""" - __slots__ = () - - @classmethod - def from_tarinfo(cls, tarfile, tarinfo): - """Create a _TarStatResult from TarFile and TarInfo objects.""" - if tarinfo.type in REGULAR_TYPES: - st_mode = stat.S_IFREG - elif tarinfo.type == DIRTYPE: - st_mode = stat.S_IFDIR - elif tarinfo.type == SYMTYPE or tarinfo.type == LNKTYPE: - st_mode = stat.S_IFLNK - elif tarinfo.type == FIFOTYPE: - st_mode = stat.S_IFIFO - elif tarinfo.type == CHRTYPE: - st_mode = stat.S_IFCHR - elif tarinfo.type == BLKTYPE: - st_mode = stat.S_IFBLK - else: - raise ValueError(tarinfo.type) - return cls(st_mode=tarinfo.mode | st_mode, - st_ino=tarinfo.offset_data, - st_dev=id(tarfile), - st_nlink=0, - st_uid=tarinfo.uid, - st_gid=tarinfo.gid, - st_size=tarinfo.size, - st_atime=0, - st_mtime=tarinfo.mtime, - st_ctime=0, - st_uname=tarinfo.uname, - st_gname=tarinfo.gname) - - @classmethod - def implied_directory(cls, tarfile, path): - """Create a _TarStatResult for a directory that is implied to exist - by another archive member's path. - """ - return cls(stat.S_IFDIR, hash(path), id(tarfile), 0, 0, 0, 0, 0, 0, 0, None, None) - - -class _TarPathWriter(io.BytesIO): - """File object that flushes its contents to a tar archive on close. - Returned by TarPath.open(mode="w"). - """ - - def __init__(self, tarfile, path): - super().__init__() - self.tarfile = tarfile - self.path = path - - def close(self): - info = TarInfo(self.path) - info.size = self.tell() - self.seek(0) - self.tarfile.addfile(info, self) - super().close() - - -class TarPath(pathlib._VirtualPath): - """A pathlib-compatible interface for tar files.""" - - __slots__ = ('tarfile',) - _flavour = posixpath - - def __init__(self, *pathsegments, tarfile): - super().__init__(*pathsegments) - self.tarfile = tarfile - - def __repr__(self): - return f"{type(self).__name__}({str(self)!r}, tarfile={self.tarfile!r})" - - def __hash__(self): - return hash((id(self.tarfile), str(self))) - - def __eq__(self, other): - if not isinstance(other, TarPath): - return NotImplemented - elif other.tarfile is not self.tarfile: - return False - return super().__eq__(other) - - def __lt__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__lt__(other) - - def __le__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__le__(other) - - def __gt__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__gt__(other) - - def __ge__(self, other): - if not isinstance(other, TarPath) or other.tarfile is not self.tarfile: - return NotImplemented - return super().__ge__(other) - - def with_segments(self, *pathsegments): - """Construct a new TarPath object with the same underlying TarFile - object from any number of path-like objects. - """ - return type(self)(*pathsegments, tarfile=self.tarfile) - - def stat(self, *, follow_symlinks=True): - """Return the path's status, similar to os.stat().""" - if follow_symlinks: - resolved = self.resolve() - else: - resolved = self.parent.resolve() / self.name - implied_directory = False - for info in reversed(self.tarfile.getmembers()): - path = self.with_segments(info.name) - if path == resolved: - return _TarStatResult.from_tarinfo(self.tarfile, info) - elif resolved in path.parents: - implied_directory = True - if implied_directory: - return _TarStatResult.implied_directory(self.tarfile, str(resolved)) - else: - raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) - - def owner(self): - """Return the user name of the path owner.""" - name = self.stat().st_uname - if name is not None: - return name - raise pathlib.UnsupportedOperation() - - def group(self): - """Return the group name of the path owner.""" - name = self.stat().st_gname - if name is not None: - return name - raise pathlib.UnsupportedOperation() - - def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): - """Open the archive member pointed by this path and return a file - object, similar to the built-in open() function. - """ - if buffering != -1: - return super().open(mode, buffering, encoding, errors, newline) - action = ''.join(c for c in mode if c not in 'btU') - if action == 'r': - fileobj = self.tarfile.extractfile(str(self.resolve())) - elif action == 'w': - fileobj = _TarPathWriter(self.tarfile, str(self.resolve())) - else: - raise pathlib.UnsupportedOperation() - if 'b' not in mode: - fileobj = io.TextIOWrapper(fileobj, encoding, errors, newline) - return fileobj - - def iterdir(self): - """Yield path objects of the directory contents. The children are - yielded in arbitrary order. - """ - resolved = self.resolve() - seen = set() - for info in self.tarfile.getmembers(): - path = self.with_segments(info.name) - if path == resolved: - if info.type != DIRTYPE: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", str(self)) - while True: - parent = path.parent - if parent == path: - break - elif parent == resolved: - path_str = str(path) - if path_str not in seen: - seen.add(path_str) - yield self / path.name - break - path = parent - if not seen: - raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) - - def readlink(self): - """Return the path to which the symbolic link points.""" - for info in reversed(self.tarfile.getmembers()): - path = self.with_segments(info.name) - if path == self: - if info.issym(): - return self.with_segments(info.linkname) - else: - raise OSError(errno.EINVAL, "Not a symlink", str(self)) - elif self in path.parents: - raise OSError(errno.EINVAL, "Not a symlink", str(self)) - raise FileNotFoundError(errno.ENOENT, "File not found", str(self)) - - def mkdir(self, mode=0o777, parents=False, exist_ok=False): - """Create a new directory at this given path.""" - info = TarInfo(str(self)) - info.type = DIRTYPE - info.mode = mode - self.tarfile.addfile(info) - - def symlink_to(self, target, target_is_directory=False): - """Make this path a symlink pointing to the target path.""" - info = TarInfo(str(self)) - info.type = SYMTYPE - info.linkname = str(self.with_segments(target)) - self.tarfile.addfile(info) - - def hardlink_to(self, target): - """Make this path a hard link pointing to the target path.""" - info = TarInfo(str(self)) - info.type = LNKTYPE - info.linkname = str(self.with_segments(target)) - self.tarfile.addfile(info) - - #-------------------- # exported functions #-------------------- diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 96b60abc975ca0..2eda7fc4ceac71 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -1,4 +1,3 @@ -import errno import sys import os import io @@ -3944,573 +3943,6 @@ def valueerror_filter(tarinfo, path): self.expect_exception(TypeError) # errorlevel is not int -class TarPathTest(unittest.TestCase): - def setUp(self): - self.tarpath = support.findfile("testtarpath.tar") - self.tar = tarfile.TarFile(self.tarpath) - self.root = tarfile.TarPath(tarfile=self.tar) - - def tearDown(self): - self.tar.close() - - def test_tarfile(self): - self.assertIs(self.root.tarfile, self.tar) - - def test_hash(self): - with tarfile.TarFile(self.tarpath) as tar0: - with tarfile.TarFile(self.tarpath) as tar1: - p = tarfile.TarPath('fileA', tarfile=tar0) - p1 = tarfile.TarPath('fileA', tarfile=tar0) - p2 = tarfile.TarPath('fileA', tarfile=tar1) - p3 = tarfile.TarPath('fileB', tarfile=tar0) - self.assertEqual(hash(p), hash(p1)) - self.assertNotEqual(hash(p), hash(p2)) - self.assertNotEqual(hash(p), hash(p3)) - - def test_eq(self): - with tarfile.TarFile(self.tarpath) as tar0: - with tarfile.TarFile(self.tarpath) as tar1: - p = tarfile.TarPath('fileA', tarfile=tar0) - p1 = tarfile.TarPath('fileA', tarfile=tar0) - p2 = tarfile.TarPath('fileA', tarfile=tar1) - p3 = tarfile.TarPath('fileB', tarfile=tar0) - self.assertEqual(p, p1) - self.assertNotEqual(p, p2) - self.assertNotEqual(p, p3) - - def test_samefile(self): - p = self.root / 'fileA' - pp = self.root / 'fileA' - q = self.root / 'dirB' / 'fileB' - self.assertTrue(p.samefile('fileA')) - self.assertTrue(p.samefile(pp)) - self.assertFalse(p.samefile('dirB/fileB')) - self.assertFalse(p.samefile(q)) - # Test the non-existent file case - r = self.root / 'foo' - self.assertRaises(FileNotFoundError, p.samefile, r) - self.assertRaises(FileNotFoundError, p.samefile, 'foo') - self.assertRaises(FileNotFoundError, r.samefile, p) - self.assertRaises(FileNotFoundError, r.samefile, 'foo') - self.assertRaises(FileNotFoundError, r.samefile, r) - self.assertRaises(FileNotFoundError, r.samefile, 'foo') - - def test_exists(self): - p = self.root - self.assertTrue(p.exists()) - self.assertTrue((p / 'dirA').exists()) - self.assertTrue((p / 'fileA').exists()) - self.assertFalse((p / 'fileA' / 'bah').exists()) - self.assertTrue((p / 'linkA').exists()) - self.assertTrue((p / 'linkB').exists()) - self.assertTrue((p / 'linkB' / 'fileB').exists()) - self.assertFalse((p / 'linkA' / 'bah').exists()) - self.assertFalse((p / 'brokenLink').exists()) - self.assertTrue((p / 'brokenLink').exists(follow_symlinks=False)) - self.assertFalse((p / 'foo').exists()) - self.assertFalse(p.with_segments('/xyzzy').exists()) - - def test_open(self): - with (self.root / 'fileA').open('r') as f: - self.assertIsInstance(f, io.TextIOBase) - self.assertEqual(f.read(), "this is file A\n") - with (self.root / 'fileA').open('rb') as f: - self.assertIsInstance(f, io.BufferedIOBase) - self.assertEqual(f.read().strip(), b"this is file A") - - def test_iterdir(self): - it = self.root.iterdir() - paths = sorted(it) - expected = ['brokenLink', 'brokenLinkLoop', - 'dirA', 'dirB', 'dirC', 'dirE', 'fileA', - 'linkA', 'linkB'] - self.assertEqual(paths, [ self.root / q for q in expected ]) - - def test_iterdir_symlink(self): - p = self.root / 'linkB' - paths = sorted(p.iterdir()) - expected = [ p / q for q in ['fileB', 'linkD'] ] - self.assertEqual(paths, expected) - - def test_iterdir_nodir(self): - p = self.root / 'foo' - with self.assertRaises(OSError) as cm: - next(p.iterdir()) - - def test_glob(self): - def _check(pattern, expected): - actual = sorted(self.root.glob(pattern)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileA", ["fileA"]) - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ['dirA', 'fileA', 'linkA']) - _check("*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD']) - _check("*/fileB", ['dirB/fileB', 'linkB/fileB']) - _check("brokenLink", ['brokenLink']) - _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) - - def test_glob_case_sensitive(self): - def _check(pattern, case_sensitive, expected): - actual = sorted([str(q) for q in self.root.glob(pattern, case_sensitive=case_sensitive)]) - expected = [str(self.root / q) for q in expected] - self.assertEqual(actual, expected) - - _check("DIRB/FILE*", True, []) - _check("DIRB/FILE*", False, ["dirB/fileB"]) - _check("dirb/file*", True, []) - _check("dirb/file*", False, ["dirB/fileB"]) - - def test_glob_follow_symlinks(self): - def _check(pattern, expected): - actual = sorted([q for q in self.root.glob(pattern, follow_symlinks=True) - if "linkD" not in q.parent.parts]) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ["dirA", "fileA", "linkA"]) - _check("*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check("*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check("*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) - _check("dir*/*/..", ["dirA/linkC/..", "dirC/dirD/.."]) - _check("dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE"]) - _check("dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check("dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"]) - _check("dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) - _check("dir*/**/fileC", ["dirC/fileC"]) - _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check("*/dirD/**/", ["dirC/dirD"]) - - def test_glob_no_follow_symlinks(self): - def _check(pattern, expected): - actual = sorted(self.root.glob(pattern, follow_symlinks=False)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - _check("fileB", []) - _check("dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check("*A", ["dirA", "fileA", "linkA"]) - _check("*B/*", ["dirB/fileB", "dirB/linkD"]) - _check("*/fileB", ["dirB/fileB"]) - _check("*/", ["dirA", "dirB", "dirC", "dirE"]) - _check("dir*/*/..", ["dirC/dirD/.."]) - _check("dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - _check("dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check("dir*/*/**/", ["dirC/dirD"]) - _check("dir*/*/**/..", ["dirC/dirD/.."]) - _check("dir*/**/fileC", ["dirC/fileC"]) - _check("dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check("*/dirD/**/", ["dirC/dirD"]) - - def test_rglob(self): - def _check(glob, expected): - self.assertEqual(sorted(glob), sorted(self.root / q for q in expected)) - p = self.root - _check(p.rglob("fileA"), ["fileA"]) - _check(p.rglob("fileB"), ["dirB/fileB"]) - _check(p.rglob("**/fileB"), ["dirB/fileB"]) - _check(p.rglob("*/fileA"), []) - _check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB", - "linkB/fileB", "dirA/linkC/fileB"]) - _check(p.rglob("file*"), ["fileA", "dirB/fileB", - "dirC/fileC", "dirC/dirD/fileD"]) - _check(p.rglob("*/"), [ - "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", - "dirC/dirD", "dirE", "linkB", - ]) - _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) - q = p / "dirC" - _check(q.rglob("*"), ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(q.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(q.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(q.rglob("dir*/**"), ["dirC/dirD"]) - _check(q.rglob("*/*"), ["dirC/dirD/fileD"]) - _check(q.rglob("*/"), ["dirC/dirD"]) - _check(q.rglob(""), ["dirC", "dirC/dirD"]) - _check(q.rglob("**"), ["dirC", "dirC/dirD"]) - _check(q.rglob("*.txt"), ["dirC/novel.txt"]) - _check(q.rglob("*.*"), ["dirC/novel.txt"]) - - def test_rglob_follow_symlinks(self): - def _check(path, pattern, expected): - actual = sorted([q for q in path.rglob(pattern, follow_symlinks=True) - if "linkD" not in q.parent.parts]) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - p = self.root - _check(p, "fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirA/linkC/fileB", "dirB/fileB", "linkB/fileB"]) - _check(p, "file*", ["dirA/linkC/fileB", "dirB/fileB", - "dirC/dirD/fileD", "dirC/fileC", "fileA", "linkB/fileB"]) - _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) - _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) - - q = p / "dirC" - _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) - _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC"]) - _check(q, "*/*", ["dirC/dirD/fileD"]) - _check(q, "*/", ["dirC/dirD"]) - _check(q, "", ["dirC", "dirC/dirD"]) - _check(q, "*.txt", ["dirC/novel.txt"]) - _check(q, "*.*", ["dirC/novel.txt"]) - - def test_rglob_no_follow_symlinks(self): - def _check(path, pattern, expected): - actual = sorted(path.rglob(pattern, follow_symlinks=False)) - expected = [self.root / q for q in expected] - self.assertEqual(actual, expected) - - p = self.root - _check(p, "fileB", ["dirB/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "file*", ["dirB/fileB", "dirC/dirD/fileD", "dirC/fileC", "fileA"]) - _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - _check(p, "", ["", "dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - - q = p / "dirC" - _check(q, "*", ["dirC/dirD", "dirC/dirD/fileD", "dirC/fileC", "dirC/novel.txt"]) - _check(q, "file*", ["dirC/dirD/fileD", "dirC/fileC", ]) - _check(q, "*/*", ["dirC/dirD/fileD"]) - _check(q, "*/", ["dirC/dirD"]) - _check(q, "", ["dirC", "dirC/dirD"]) - _check(q, "*.txt", ["dirC/novel.txt"]) - _check(q, "*.*", ["dirC/novel.txt"]) - - def test_rglob_symlink_loop(self): - given = sorted(self.root.rglob('*')) - expect = ['brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - ] - self.assertEqual(given, sorted(self.root / x for x in expect)) - - def test_glob_dotdot(self): - p = self.root - self.assertEqual(sorted(p.glob("..")), [ p / ".." ]) - self.assertEqual(sorted(p.glob("../..")), [ p / ".." / ".." ]) - self.assertEqual(sorted(p.glob("dirA/..")), [ p / "dirA" / ".." ]) - self.assertEqual(sorted(p.glob("dirA/../file*")), [ p / "dirA/../fileA" ]) - self.assertEqual(sorted(p.glob("dirA/../file*/..")), []) - self.assertEqual(sorted(p.glob("../xyzzy")), []) - self.assertEqual(sorted(p.glob("xyzzy/..")), []) - self.assertEqual(sorted(p.glob("/".join([".."] * 50))), [ p.joinpath(*[".."] * 50)]) - - def test_walk(self): - def _sorted_walk(follow_symlinks): - results = [] - for dirpath, dirnames, filenames in self.root.walk(follow_symlinks=follow_symlinks): - if 'linkD' in dirnames: - # Treat recursive symlink as file - dirnames.remove('linkD') - filenames.append('linkD') - dirnames.sort() - filenames.sort() - results.append((dirpath, dirnames, filenames)) - return results - - p = self.root - self.assertEqual(_sorted_walk(False), [ - (p, - ['dirA', 'dirB', 'dirC', 'dirE'], - ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA', 'linkB']), - (p / 'dirA', [], ['linkC']), - (p / 'dirB', [], ['fileB', 'linkD']), - (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), - (p / 'dirC' / 'dirD', [], ['fileD']), - ]) - - self.assertEqual(_sorted_walk(True), [ - (p, - ['dirA', 'dirB', 'dirC', 'dirE', 'linkB'], - ['brokenLink', 'brokenLinkLoop', 'fileA', 'linkA']), - (p / 'dirA', ['linkC'], []), - (p / 'dirA' / 'linkC', [], ['fileB', 'linkD']), - (p / 'dirB', [], ['fileB', 'linkD']), - (p / 'dirC', ['dirD'], ['fileC', 'novel.txt']), - (p / 'dirC' / 'dirD', [], ['fileD']), - (p / 'linkB', [], ['fileB', 'linkD']), - ]) - - def test_readlink(self): - p = self.root - self.assertEqual((p / 'linkA').readlink(), p / 'fileA') - self.assertEqual((p / 'brokenLink').readlink(), p / 'non-existing') - self.assertEqual((p / 'linkB').readlink(), p / 'dirB') - with self.assertRaises(OSError): - (p / 'fileA').readlink() - - def test_resolve(self): - with self.assertRaises(OSError) as cm: - self.root.joinpath('foo').resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - def _check(path, expected, strict=True): - self.assertEqual(self.root.joinpath(path).resolve(strict=strict), - self.root.joinpath(expected)) - _check('foo/in/spam', 'foo/in/spam', False) - _check('../foo/in/spam', '../foo/in/spam', False) - _check('dirB/fileB', 'dirB/fileB') - _check('linkA', 'fileA') - _check('dirA/linkC/fileB', 'dirB/fileB') - _check('dirB/linkD/fileB', 'dirB/fileB') - _check('dirA/linkC/fileB/foo/in/spam', 'dirB/fileB/foo/in/spam', False) - _check('dirA/linkC/../foo/in/spam', 'foo/in/spam', False) - - def test_stat(self): - statA = self.root.joinpath('fileA').stat() - statB = self.root.joinpath('dirB', 'fileB').stat() - statC = self.root.joinpath('dirC').stat() - # st_mode: files are the same, directory differs. - self.assertIsInstance(statA.st_mode, int) - self.assertEqual(statA.st_mode, statB.st_mode) - self.assertNotEqual(statA.st_mode, statC.st_mode) - self.assertNotEqual(statB.st_mode, statC.st_mode) - # st_ino: all different, - self.assertIsInstance(statA.st_ino, int) - self.assertNotEqual(statA.st_ino, statB.st_ino) - self.assertNotEqual(statA.st_ino, statC.st_ino) - self.assertNotEqual(statB.st_ino, statC.st_ino) - # st_dev: all the same. - self.assertIsInstance(statA.st_dev, int) - self.assertEqual(statA.st_dev, statB.st_dev) - self.assertEqual(statA.st_dev, statC.st_dev) - # other attributes not used by pathlib. - - def test_stat_no_follow_symlinks(self): - p = self.root / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - - def test_stat_no_follow_symlinks_nosymlink(self): - p = self.root / 'fileA' - st = p.stat() - self.assertEqual(st, p.stat(follow_symlinks=False)) - - def test_lstat(self): - p = self.root / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.lstat()) - - def test_lstat_nosymlink(self): - p = self.root / 'fileA' - st = p.stat() - self.assertEqual(st, p.lstat()) - - def test_owner(self): - p = self.root - self.assertRaises(pathlib.UnsupportedOperation, p.owner) - self.assertEqual((p / 'fileA').owner(), 'barney') - - def test_group(self): - p = self.root - self.assertRaises(pathlib.UnsupportedOperation, p.group) - self.assertEqual((p / 'fileA').group(), 'barney') - - def test_read_write_bytes(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - p.write_bytes(b'abcdefg') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - self.assertEqual(p.read_bytes(), b'abcdefg') - - def test_read_write_text(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - p.write_text('äbcdefg', encoding='latin-1') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - p = tarfile.TarPath('fileA', tarfile=tar) - self.assertEqual(p.read_text(encoding='utf-8', errors='ignore'), 'bcdefg') - - def test_mkdir(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath('dirA', tarfile=tar) - p.mkdir() - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('dirA') - self.assertEqual(info.type, tarfile.DIRTYPE) - - def test_symlink_to(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath(tarfile=tar) - p.joinpath('linkA').symlink_to('fileA') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('linkA') - self.assertEqual(info.type, tarfile.SYMTYPE) - self.assertEqual(info.linkname, 'fileA') - - def test_hardlink_to(self): - fileobj = io.BytesIO() - with tarfile.TarFile(fileobj=fileobj, mode="w") as tar: - p = tarfile.TarPath(tarfile=tar) - p.joinpath('linkA').hardlink_to('fileA') - - fileobj.seek(0) - with tarfile.TarFile(fileobj=fileobj) as tar: - info = tar.getmember('linkA') - self.assertEqual(info.type, tarfile.LNKTYPE) - self.assertEqual(info.linkname, 'fileA') - - -class TarPathFileTypeTest(unittest.TestCase): - def setUp(self): - tarpath = support.findfile("testtar.tar") - self.tar = tarfile.TarFile(tarpath) - self.root = tarfile.TarPath(tarfile=self.tar) - - def tearDown(self): - self.tar.close() - - def test_is_dir(self): - p = self.root - self.assertTrue(p.is_dir()) - self.assertTrue((p / 'ustar').is_dir()) - self.assertTrue((p / 'ustar' / 'dirtype').is_dir()) - self.assertFalse((p / 'ustar' / 'regtype').is_dir()) - self.assertFalse((p / 'non-existing').is_dir()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_dir()) - self.assertFalse((p / 'ustar' / 'symtype').is_dir()) - self.assertFalse((p / 'ustar' / 'lnktype').is_dir()) - self.assertFalse((p / 'ustar' / 'fifotype').is_dir()) - self.assertFalse((p / 'ustar' / 'blktype').is_dir()) - self.assertFalse((p / 'ustar' / 'chrtype').is_dir()) - - def test_is_file(self): - p = self.root - self.assertFalse(p.is_file()) - self.assertFalse((p / 'ustar').is_file()) - self.assertFalse((p / 'ustar' / 'dirtype').is_file()) - self.assertTrue((p / 'ustar' / 'regtype').is_file()) - self.assertFalse((p / 'non-existing').is_file()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_file()) - self.assertTrue((p / 'ustar' / 'symtype').is_file()) - self.assertFalse((p / 'ustar' / 'symtype').is_file(follow_symlinks=False)) - self.assertFalse((p / 'ustar' / 'fifotype').is_file()) - self.assertFalse((p / 'ustar' / 'blktype').is_file()) - self.assertFalse((p / 'ustar' / 'chrtype').is_file()) - - def test_is_mount(self): - p = self.root - self.assertTrue(p.is_mount()) - self.assertFalse((p / 'ustar').is_mount()) - self.assertFalse((p / 'ustar' / 'dirtype').is_mount()) - self.assertFalse((p / 'ustar' / 'regtype').is_mount()) - self.assertFalse((p / 'non-existing').is_mount()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_mount()) - self.assertFalse((p / 'ustar' / 'symtype').is_mount()) - self.assertFalse((p / 'ustar' / 'fifotype').is_mount()) - self.assertFalse((p / 'ustar' / 'blktype').is_mount()) - self.assertFalse((p / 'ustar' / 'chrtype').is_mount()) - - def test_is_symlink(self): - p = self.root - self.assertFalse(p.is_symlink()) - self.assertFalse((p / 'ustar').is_symlink()) - self.assertFalse((p / 'ustar' / 'dirtype').is_symlink()) - self.assertFalse((p / 'ustar' / 'regtype').is_symlink()) - self.assertFalse((p / 'non-existing').is_symlink()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_symlink()) - self.assertTrue((p / 'ustar' / 'symtype').is_symlink()) - self.assertFalse((p / 'ustar' / 'fifotype').is_symlink()) - self.assertFalse((p / 'ustar' / 'blktype').is_symlink()) - self.assertFalse((p / 'ustar' / 'chrtype').is_symlink()) - - def test_is_junction(self): - p = self.root - self.assertFalse(p.is_junction()) - self.assertFalse((p / 'ustar').is_junction()) - self.assertFalse((p / 'ustar' / 'dirtype').is_junction()) - self.assertFalse((p / 'ustar' / 'regtype').is_junction()) - self.assertFalse((p / 'non-existing').is_junction()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_junction()) - self.assertFalse((p / 'ustar' / 'symtype').is_junction()) - self.assertFalse((p / 'ustar' / 'fifotype').is_junction()) - self.assertFalse((p / 'ustar' / 'blktype').is_junction()) - self.assertFalse((p / 'ustar' / 'chrtype').is_junction()) - - def test_is_fifo(self): - p = self.root - self.assertFalse(p.is_fifo()) - self.assertFalse((p / 'ustar').is_fifo()) - self.assertFalse((p / 'ustar' / 'dirtype').is_fifo()) - self.assertFalse((p / 'ustar' / 'regtype').is_fifo()) - self.assertFalse((p / 'non-existing').is_fifo()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_fifo()) - self.assertFalse((p / 'ustar' / 'symtype').is_fifo()) - self.assertTrue((p / 'ustar' / 'fifotype').is_fifo()) - self.assertFalse((p / 'ustar' / 'blktype').is_fifo()) - self.assertFalse((p / 'ustar' / 'chrtype').is_fifo()) - - def test_is_socket(self): - p = self.root - self.assertFalse(p.is_socket()) - self.assertFalse((p / 'ustar').is_socket()) - self.assertFalse((p / 'ustar' / 'dirtype').is_socket()) - self.assertFalse((p / 'ustar' / 'regtype').is_socket()) - self.assertFalse((p / 'non-existing').is_socket()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_socket()) - self.assertFalse((p / 'ustar' / 'symtype').is_socket()) - self.assertFalse((p / 'ustar' / 'fifotype').is_socket()) - self.assertFalse((p / 'ustar' / 'blktype').is_socket()) - self.assertFalse((p / 'ustar' / 'chrtype').is_socket()) - - def test_is_block_device(self): - p = self.root - self.assertFalse(p.is_block_device()) - self.assertFalse((p / 'ustar').is_block_device()) - self.assertFalse((p / 'ustar' / 'dirtype').is_block_device()) - self.assertFalse((p / 'ustar' / 'regtype').is_block_device()) - self.assertFalse((p / 'non-existing').is_block_device()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_block_device()) - self.assertFalse((p / 'ustar' / 'symtype').is_block_device()) - self.assertFalse((p / 'ustar' / 'fifotype').is_block_device()) - self.assertTrue((p / 'ustar' / 'blktype').is_block_device()) - self.assertFalse((p / 'ustar' / 'chrtype').is_block_device()) - - def test_is_char_device(self): - p = self.root - self.assertFalse(p.is_char_device()) - self.assertFalse((p / 'ustar').is_char_device()) - self.assertFalse((p / 'ustar' / 'dirtype').is_char_device()) - self.assertFalse((p / 'ustar' / 'regtype').is_char_device()) - self.assertFalse((p / 'non-existing').is_char_device()) - self.assertFalse((p / 'ustar' / 'regtype' / 'bah').is_char_device()) - self.assertFalse((p / 'ustar' / 'symtype').is_char_device()) - self.assertFalse((p / 'ustar' / 'fifotype').is_char_device()) - self.assertFalse((p / 'ustar' / 'blktype').is_char_device()) - self.assertTrue((p / 'ustar' / 'chrtype').is_char_device()) - - def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Lib/test/testtarpath.tar b/Lib/test/testtarpath.tar deleted file mode 100644 index f90c18fa9de46a30e75fda4b9a0e261488cb0a19..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeI2O>V+45QV+&DSUyF*iM`^KizQ&P}4R7B~?f(di#z|fk1_*D@|l6FG3J0!ZzRY z8;{4V+h$YN%c|Z8wkxHpC<;Lzs>sv+r+efsp|UJbRc>sqM5?mF7DDQ}sUORIwd<<- zm1}e#yKdD=Db}mDF28Q~a_{-=Z$|xOebRr~H19k^bpnU!Z7or>;GJ}(}YL% zQ>zF4^SV6^H=gxZA(U%IsaRm-H!jNxQhQ;n^eej2mmAZfA?1HB)!}ekh6y& zh@+2)2zdXy^?$)PsDG{h2J1hcBIlM#|9O<_KkfgJ|DSc@y7T(e+k$Zt9Q+@$|I>=f zK+vB{^j^DX{apsYvj~Lee?}?V|0DnF3B`1Kk^e1_|1n{Xwps80y7?@(lWyPf4D+u4 z^>zKvsEqPI_@76Qeyd0SucV&@2mpOJBe?hecS}EW{WrM&qv8J{rr**3zWy_Z{fA&4 zQy=92C&sOx|Eb~iA3FY}^&k6x{+Z%@KV|ObZvOYH{;y0%^&jMa%)(If7x90J_|H!~ zt{zqE|8x2?9{u{s9=`r-W$FB1Q*y#Rwz2>JBd%zK(SQI5 zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*>m GmB2R@KXU~D diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst index 9ad271a33d6057..2b100f09c67ad9 100644 --- a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -1,2 +1,2 @@ -Add :class:`tarfile.TarPath` class, which provides access to tar archive -members via the :class:`pathlib.Path` interface. +Add private ``pathlib._VirtualPath`` class, which provides experimental support +for virtual filesystems, and may be made public in a future version of Python. From 2c565916bc86242f54ab5600344e67f1bf9cbd21 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 12 Jul 2023 20:01:38 +0100 Subject: [PATCH 11/31] `_VirtualPath` --> `_PathBase` --- Lib/pathlib.py | 4 +-- Lib/test/test_pathlib.py | 30 +++++++++---------- ...3-07-03-20-23-56.gh-issue-89812.cFkDOE.rst | 2 +- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index bed05295046e6a..1c08665c6d07f0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -785,7 +785,7 @@ class PureWindowsPath(PurePath): # Filesystem-accessing classes -class _VirtualPath(PurePath): +class _PathBase(PurePath): """PurePath subclass for virtual filesystems, such as archives and remote storage. """ @@ -1379,7 +1379,7 @@ def as_uri(self): raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") -class Path(_VirtualPath): +class Path(_PathBase): """PurePath subclass that can make system calls. Path represents a filesystem path but unlike PurePath, also offers diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 28581c493070d3..004ecba3f0b0ef 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1573,8 +1573,8 @@ def test_group(self): # Tests for the virtual classes. # -class VirtualPathTest(PurePathTest): - cls = pathlib._VirtualPath +class PathBaseTest(PurePathTest): + cls = pathlib._PathBase def test_unsupported_operation(self): P = self.cls @@ -1632,9 +1632,9 @@ def test_as_bytes_common(self): self.assertRaises(TypeError, bytes, self.cls()) -class DummyVirtualPathIO(io.BytesIO): +class DummyPathIO(io.BytesIO): """ - Used by DummyVirtualPath to implement `open('w')` + Used by DummyPath to implement `open('w')` """ def __init__(self, files, path): @@ -1647,9 +1647,9 @@ def close(self): super().close() -class DummyVirtualPath(pathlib._VirtualPath): +class DummyPath(pathlib._PathBase): """ - Simple implementation of VirtualPath that keeps files and directories in + Simple implementation of PathBase that keeps files and directories in memory. """ _files = {} @@ -1691,7 +1691,7 @@ def open(self, mode='r', buffering=-1, encoding=None, elif mode == 'w': if parent not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", parent) - stream = DummyVirtualPathIO(self._files, path) + stream = DummyPathIO(self._files, path) self._files[path] = b'' self._directories[parent].add(name) else: @@ -1724,10 +1724,10 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): raise -class DummyVirtualPathTest(unittest.TestCase): - """Tests for VirtualPath methods that use stat(), open() and iterdir().""" +class DummyPathTest(unittest.TestCase): + """Tests for PathBase methods that use stat(), open() and iterdir().""" - cls = DummyVirtualPath + cls = DummyPath can_symlink = False # (BASE) @@ -2541,7 +2541,7 @@ def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(os.path.join('dirA', '..')) -class DummyVirtualPathWithSymlinks(DummyVirtualPath): +class DummyPathWithSymlinks(DummyPath): def readlink(self): path = str(self) if path in self._symlinks: @@ -2556,8 +2556,8 @@ def symlink_to(self, target, target_is_directory=False): self._symlinks[str(self)] = str(target) -class DummyVirtualPathWithSymlinksTest(DummyVirtualPathTest): - cls = DummyVirtualPathWithSymlinks +class DummyPathWithSymlinksTest(DummyPathTest): + cls = DummyPathWithSymlinks can_symlink = True def setUp(self): @@ -2581,13 +2581,13 @@ def setUp(self): # Tests for the concrete classes. # -class PathTest(DummyVirtualPathTest): +class PathTest(DummyPathTest): """Tests for the FS-accessing functionalities of the Path classes.""" cls = pathlib.Path can_symlink = os_helper.can_symlink() def setUp(self): - # note: this must be kept in sync with `DummyVirtualPathTest.setUp()` + # note: this must be kept in sync with `DummyPathTest.setUp()` def cleanup(): os.chmod(join('dirE'), 0o777) os_helper.rmtree(BASE) diff --git a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst index 2b100f09c67ad9..a4221fc4ca900b 100644 --- a/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst +++ b/Misc/NEWS.d/next/Library/2023-07-03-20-23-56.gh-issue-89812.cFkDOE.rst @@ -1,2 +1,2 @@ -Add private ``pathlib._VirtualPath`` class, which provides experimental support +Add private ``pathlib._PathBase`` class, which provides experimental support for virtual filesystems, and may be made public in a future version of Python. From 89440987805b583e9e1d42991e1140a7a4f63bc2 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 28 Aug 2023 14:55:59 +0100 Subject: [PATCH 12/31] Apply suggestions from code review Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/pathlib.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 75c86c9b7b1600..5dd88634462077 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -790,7 +790,7 @@ class _PathBase(PurePath): """ __slots__ = () __bytes__ = None - __fspath__ = None + __fspath__ = None # virtual paths have no local file system representation def stat(self, *, follow_symlinks=True): """ @@ -1190,8 +1190,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): paths += [path._make_child_relpath(d) for d in reversed(dirnames)] def absolute(self): - """Return an absolute version of this path by prepending the current - working directory. No normalization or symlink resolution is performed. + """Return an absolute version of this path + No normalization or symlink resolution is performed. Use resolve() to get the canonical path to a file. """ @@ -1210,8 +1210,7 @@ def expanduser(self): @classmethod def home(cls): - """Return a new path pointing to the user's home directory (as - returned by os.path.expanduser('~')). + """Return a new path pointing to expanduser('~'). """ return cls("~").expanduser() From b61141ae2a568583e8825f691f39869398d8ec82 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 14:57:13 +0100 Subject: [PATCH 13/31] Improve _PathBase docstring --- Lib/pathlib.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5dd88634462077..8f4b62924f2858 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -785,8 +785,17 @@ class PureWindowsPath(PurePath): class _PathBase(PurePath): - """PurePath subclass for virtual filesystems, such as archives and remote - storage. + """Base class for concrete path objects. + + This class provides dummy implementations for many methods that derived + classes can override selectively; the default implementations raise + UnsupportedOperation. The most basic methods, such as stat() and open(), + directly raise UnsupportedOperation; these basic methods are called by + other methods such as is_dir() and read_text(). + + The Path class derives this class to implement local filesystem paths. + Users may derive their own classes to implement virtual filesystem paths, + such as paths in archive files or on remote storage systems. """ __slots__ = () __bytes__ = None From 1e462b08e67ce6e6878e4fa2cc0c3f7ac79d8576 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 18:12:11 +0100 Subject: [PATCH 14/31] Explain use of nullcontext() in comment --- Lib/pathlib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8f4b62924f2858..cdf9c02aa4de6e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1046,6 +1046,7 @@ def iterdir(self): raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") def _scandir(self): + # os.scandir() returns an object that can be used as a context manager return contextlib.nullcontext(list(self.iterdir())) def _make_child_relpath(self, name): From d321cadbfd53c47a60b2ce81b0ded5c5bac4057c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 18:42:07 +0100 Subject: [PATCH 15/31] Align and test Path/PathBase docstrings --- Lib/pathlib.py | 14 +++++++------- Lib/test/test_pathlib.py | 9 +++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b2f916e36e6e8f..d2d11341b88c3c 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -426,7 +426,7 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def as_uri(self): - """Return the path as a 'file' URI.""" + """Return the path as a URI.""" if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") @@ -1210,7 +1210,7 @@ def absolute(self): """Return an absolute version of this path No normalization or symlink resolution is performed. - Use resolve() to get the canonical path to a file. + Use resolve() to resolve symlinks and remove '..' segments. """ raise UnsupportedOperation(f"{type(self).__name__}.absolute()") @@ -1239,8 +1239,8 @@ def readlink(self): def resolve(self, strict=False): """ - Resolve '..' segments in the path. Where possible, make the path - absolute and resolve symlinks on the way. + Make the path absolute, resolving all symlinks on the way and also + normalizing it. """ try: path = self.absolute() @@ -1463,10 +1463,10 @@ def _scandir(self): return os.scandir(self) def absolute(self): - """Return an absolute version of this path by prepending the current - working directory. No normalization or symlink resolution is performed. + """Return an absolute version of this path + No normalization or symlink resolution is performed. - Use resolve() to get the canonical path to a file. + Use resolve() to resolve symlinks and remove '..' segments. """ if self.is_absolute(): return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index e9c9e2b93c3d3f..35b4c9ec7d144e 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1643,6 +1643,15 @@ def test_fspath_common(self): def test_as_bytes_common(self): self.assertRaises(TypeError, bytes, self.cls()) + def test_matches_path_api(self): + our_names = {name for name in dir(self.cls) if name[0] != '_'} + path_names = {name for name in dir(pathlib.Path) if name[0] != '_'} + self.assertEqual(our_names, path_names) + for attr_name in our_names: + our_attr = getattr(self.cls, attr_name) + path_attr = getattr(pathlib.Path, attr_name) + self.assertEqual(our_attr.__doc__, path_attr.__doc__) + class DummyPathIO(io.BytesIO): """ From acfc1b04bdabeea77d7a286aeabdfb99afdb95c5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 19:49:57 +0100 Subject: [PATCH 16/31] Revise `_PathBase.is_junction()` --- Lib/pathlib.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index d2d11341b88c3c..c433e3b3e3d6c0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -910,15 +910,20 @@ def is_junction(self): """ Whether this path is a junction. """ - import stat try: - return self.lstat().st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT + self.lstat() except OSError as e: if not _ignore_error(e): raise + except ValueError: + # Non-encodable path return False - except (ValueError, AttributeError): - return False + + # Junctions are a Windows-only feature, not present in POSIX nor the + # vast majority of virtual filesystems. There is no cross-platform + # idiom to check for junctions (using stat().st_mode). And so this + # default implementation returns false if lstat() doesn't raise. + return False def is_block_device(self): """ From bc82225072d8639e5be818dc7ad9110d229961f1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 28 Aug 2023 20:05:56 +0100 Subject: [PATCH 17/31] Make is_junction() code more consistent with other is_*() methods. --- Lib/pathlib.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c433e3b3e3d6c0..65180471dfd2ac 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -912,19 +912,20 @@ def is_junction(self): """ try: self.lstat() + # Junctions are a Windows-only feature, not present in POSIX nor + # the majority of virtual filesystems. There is no cross-platform + # idiom to check for junctions (using stat().st_mode). And so this + # default implementation returns false if lstat() doesn't raise. + return False except OSError as e: if not _ignore_error(e): raise + # Path doesn't exist + return False except ValueError: # Non-encodable path return False - # Junctions are a Windows-only feature, not present in POSIX nor the - # vast majority of virtual filesystems. There is no cross-platform - # idiom to check for junctions (using stat().st_mode). And so this - # default implementation returns false if lstat() doesn't raise. - return False - def is_block_device(self): """ Whether this path is a block device. From c3127b865d7bb8655d4d4e258e1361d75cbf9a4e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 17:00:37 +0100 Subject: [PATCH 18/31] Improve `UnsupportedOperation` exception message. --- Lib/pathlib.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b2633fa69c9b11..127a1dbe84da4a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -803,12 +803,18 @@ class _PathBase(PurePath): __bytes__ = None __fspath__ = None # virtual paths have no local file system representation + def _unsupported(self, method_name): + msg = f"{type(self).__name__}.{method_name}() is unsupported" + if isinstance(self, Path): + msg += " on this system" + raise UnsupportedOperation(msg) + def stat(self, *, follow_symlinks=True): """ Return the result of the stat() system call on this path, like os.stat() does. """ - raise UnsupportedOperation(f"{type(self).__name__}.stat()") + return self._unsupported("stat") def lstat(self): """ @@ -1008,7 +1014,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - raise UnsupportedOperation(f"{type(self).__name__}.open()") + return self._unsupported("open") def read_bytes(self): """ @@ -1051,7 +1057,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(f"{type(self).__name__}.iterdir()") + return self._unsupported("iterdir") def _scandir(self): # os.scandir() returns an object that can be used as a context manager @@ -1218,7 +1224,7 @@ def absolute(self): Use resolve() to resolve symlinks and remove '..' segments. """ - raise UnsupportedOperation(f"{type(self).__name__}.absolute()") + return self._unsupported("absolute") @classmethod def cwd(cls): @@ -1229,7 +1235,7 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - raise UnsupportedOperation(f"{type(self).__name__}.expanduser()") + return self._unsupported("expanduser") @classmethod def home(cls): @@ -1241,7 +1247,7 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - raise UnsupportedOperation(f"{type(self).__name__}.readlink()") + return self._unsupported("readlink") def resolve(self, strict=False): """ @@ -1312,7 +1318,7 @@ def symlink_to(self, target, target_is_directory=False): Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - raise UnsupportedOperation(f"{type(self).__name__}.symlink_to()") + return self._unsupported("symlink_to") def hardlink_to(self, target): """ @@ -1320,19 +1326,19 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - raise UnsupportedOperation(f"{type(self).__name__}.hardlink_to()") + return self._unsupported("hardlink_to") def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - raise UnsupportedOperation(f"{type(self).__name__}.touch()") + return self._unsupported("touch") def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - raise UnsupportedOperation(f"{type(self).__name__}.mkdir()") + return self._unsupported("mkdir") def rename(self, target): """ @@ -1344,7 +1350,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - raise UnsupportedOperation(f"{type(self).__name__}.rename()") + return self._unsupported("rename") def replace(self, target): """ @@ -1356,13 +1362,13 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - raise UnsupportedOperation(f"{type(self).__name__}.replace()") + return self._unsupported("replace") def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - raise UnsupportedOperation(f"{type(self).__name__}.chmod()") + return self._unsupported("chmod") def lchmod(self, mode): """ @@ -1376,29 +1382,29 @@ def unlink(self, missing_ok=False): Remove this file or link. If the path is a directory, use rmdir() instead. """ - raise UnsupportedOperation(f"{type(self).__name__}.unlink()") + return self._unsupported("unlink") def rmdir(self): """ Remove this directory. The directory must be empty. """ - raise UnsupportedOperation(f"{type(self).__name__}.rmdir()") + return self._unsupported("rmdir") def owner(self): """ Return the login name of the file owner. """ - raise UnsupportedOperation(f"{type(self).__name__}.owner()") + return self._unsupported("owner") def group(self): """ Return the group name of the file gid. """ - raise UnsupportedOperation(f"{type(self).__name__}.group()") + return self._unsupported("group") def as_uri(self): """Return the path as a URI.""" - raise UnsupportedOperation(f"{type(self).__name__}.as_uri()") + return self._unsupported("as_uri") class Path(_PathBase): From 3540ae1a11a29dbd853ce32d1689e27be05dcfc7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 17:42:10 +0100 Subject: [PATCH 19/31] Slightly improve symlink loop code, exception message. --- Lib/pathlib.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 127a1dbe84da4a..111493b59ca5c0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -40,6 +40,9 @@ # Internals # +# Maximum number of symlinks to follow in _PathBase.resolve() +MAX_SYMLINKS = 40 + # Reference for Windows paths can be found at # https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . _WIN_RESERVED_NAMES = frozenset( @@ -1296,8 +1299,8 @@ def resolve(self, strict=False): break else: link_count += 1 - if link_count >= 40: - raise OSError(ELOOP, "Symlink loop", path) + if link_count >= MAX_SYMLINKS: + raise OSError(ELOOP, "Too many symbolic links in path", path) elif link_target.root or link_target.drive: link_target = link.parent / link_target drv = link_target.drive From c9f0f20daf503656c5f4d3c4f7239fc20ca08aeb Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 18:28:02 +0100 Subject: [PATCH 20/31] Restore deleted comment in `cwd()`, expand `_scandir()` comment. --- Lib/pathlib.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 111493b59ca5c0..148508d55bd67a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1063,7 +1063,8 @@ def iterdir(self): return self._unsupported("iterdir") def _scandir(self): - # os.scandir() returns an object that can be used as a context manager + # Emulate os.scandir(), which returns an object that can be used as a + # context manager. This method is called by walk() and glob(). return contextlib.nullcontext(self.iterdir()) def _make_child_relpath(self, name): @@ -1232,6 +1233,10 @@ def absolute(self): @classmethod def cwd(cls): """Return a new path pointing to the current working directory.""" + # We call 'absolute()' rather than using 'os.getcwd()' directly to + # enable users to replace the implementation of 'absolute()' in a + # subclass and benefit from the new behaviour here. This works because + # os.path.abspath('.') == os.getcwd(). return cls().absolute() def expanduser(self): From 0ee10cac6cf01a92ca6c5fb90bfa3a2312ad3d6c Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 2 Sep 2023 18:34:51 +0100 Subject: [PATCH 21/31] Make `_PathBase.is_junction()` immediately return false. --- Lib/pathlib.py | 19 ++++--------------- Lib/test/test_pathlib.py | 1 - 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 148508d55bd67a..03673d5f541f34 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -919,21 +919,10 @@ def is_junction(self): """ Whether this path is a junction. """ - try: - self.lstat() - # Junctions are a Windows-only feature, not present in POSIX nor - # the majority of virtual filesystems. There is no cross-platform - # idiom to check for junctions (using stat().st_mode). And so this - # default implementation returns false if lstat() doesn't raise. - return False - except OSError as e: - if not _ignore_error(e): - raise - # Path doesn't exist - return False - except ValueError: - # Non-encodable path - return False + # Junctions are a Windows-only feature, not present in POSIX nor the + # majority of virtual filesystems. There is no cross-platform idiom + # to check for junctions (using stat().st_mode). + return False def is_block_device(self): """ diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index be24601e653392..de07a67680a3d2 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -1604,7 +1604,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.is_char_device) self.assertRaises(e, p.is_fifo) self.assertRaises(e, p.is_socket) - self.assertRaises(e, p.is_junction) self.assertRaises(e, p.open) self.assertRaises(e, p.read_bytes) self.assertRaises(e, p.read_text) From 17eee2fc16bdf56616f093c508df140d12e9bf95 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 9 Sep 2023 15:37:49 +0100 Subject: [PATCH 22/31] MAX_SYMLINKS --> _MAX_SYMLINKS --- Lib/pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 03673d5f541f34..1a2eb1ec92e260 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -41,7 +41,7 @@ # # Maximum number of symlinks to follow in _PathBase.resolve() -MAX_SYMLINKS = 40 +_MAX_SYMLINKS = 40 # Reference for Windows paths can be found at # https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . @@ -1293,7 +1293,7 @@ def resolve(self, strict=False): break else: link_count += 1 - if link_count >= MAX_SYMLINKS: + if link_count >= _MAX_SYMLINKS: raise OSError(ELOOP, "Too many symbolic links in path", path) elif link_target.root or link_target.drive: link_target = link.parent / link_target From c7c46bc5f7b2b26cb993627e7aa7db21063c048f Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 9 Sep 2023 19:21:20 +0100 Subject: [PATCH 23/31] `return self._unsupported()` --> `self._unsupported()` --- Lib/pathlib.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1a2eb1ec92e260..253f2da838efbe 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -817,7 +817,7 @@ def stat(self, *, follow_symlinks=True): Return the result of the stat() system call on this path, like os.stat() does. """ - return self._unsupported("stat") + self._unsupported("stat") def lstat(self): """ @@ -1006,7 +1006,7 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ - return self._unsupported("open") + self._unsupported("open") def read_bytes(self): """ @@ -1049,7 +1049,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - return self._unsupported("iterdir") + self._unsupported("iterdir") def _scandir(self): # Emulate os.scandir(), which returns an object that can be used as a @@ -1217,7 +1217,7 @@ def absolute(self): Use resolve() to resolve symlinks and remove '..' segments. """ - return self._unsupported("absolute") + self._unsupported("absolute") @classmethod def cwd(cls): @@ -1232,7 +1232,7 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - return self._unsupported("expanduser") + self._unsupported("expanduser") @classmethod def home(cls): @@ -1244,7 +1244,7 @@ def readlink(self): """ Return the path to which the symbolic link points. """ - return self._unsupported("readlink") + self._unsupported("readlink") def resolve(self, strict=False): """ @@ -1315,7 +1315,7 @@ def symlink_to(self, target, target_is_directory=False): Make this path a symlink pointing to the target path. Note the order of arguments (link, target) is the reverse of os.symlink. """ - return self._unsupported("symlink_to") + self._unsupported("symlink_to") def hardlink_to(self, target): """ @@ -1323,19 +1323,19 @@ def hardlink_to(self, target): Note the order of arguments (self, target) is the reverse of os.link's. """ - return self._unsupported("hardlink_to") + self._unsupported("hardlink_to") def touch(self, mode=0o666, exist_ok=True): """ Create this file with the given access mode, if it doesn't exist. """ - return self._unsupported("touch") + self._unsupported("touch") def mkdir(self, mode=0o777, parents=False, exist_ok=False): """ Create a new directory at this given path. """ - return self._unsupported("mkdir") + self._unsupported("mkdir") def rename(self, target): """ @@ -1347,7 +1347,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ - return self._unsupported("rename") + self._unsupported("rename") def replace(self, target): """ @@ -1359,13 +1359,13 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ - return self._unsupported("replace") + self._unsupported("replace") def chmod(self, mode, *, follow_symlinks=True): """ Change the permissions of the path, like os.chmod(). """ - return self._unsupported("chmod") + self._unsupported("chmod") def lchmod(self, mode): """ @@ -1379,29 +1379,29 @@ def unlink(self, missing_ok=False): Remove this file or link. If the path is a directory, use rmdir() instead. """ - return self._unsupported("unlink") + self._unsupported("unlink") def rmdir(self): """ Remove this directory. The directory must be empty. """ - return self._unsupported("rmdir") + self._unsupported("rmdir") def owner(self): """ Return the login name of the file owner. """ - return self._unsupported("owner") + self._unsupported("owner") def group(self): """ Return the group name of the file gid. """ - return self._unsupported("group") + self._unsupported("group") def as_uri(self): """Return the path as a URI.""" - return self._unsupported("as_uri") + self._unsupported("as_uri") class Path(_PathBase): From a51d7a0028d8a8629560ace4f70730b56d33347f Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 15 Sep 2023 20:08:00 +0100 Subject: [PATCH 24/31] WIP --- Lib/pathlib.py | 107 +++++++++++++++++++++------------------ Lib/test/test_pathlib.py | 12 ++++- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 253f2da838efbe..c6a19ae0c4dba7 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -305,6 +305,11 @@ class PurePath: # The `_hash` slot stores the hash of the case-normalized string # path. It's set when `__hash__()` is called for the first time. '_hash', + + # The '_resolving' slot stores a boolean indicating whether the path + # is being processed by `_PathBase.resolve()`. This prevents duplicate + # work from occurring when `resolve()` calls `stat()` or `readlink()`. + '_resolving', ) pathmod = os.path @@ -344,6 +349,7 @@ def __init__(self, *args): f"not {type(path).__name__!r}") paths.append(path) self._raw_paths = paths + self._resolving = False def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -704,7 +710,9 @@ def parent(self): tail = self._tail if not tail: return self - return self._from_parsed_parts(drv, root, tail[:-1]) + path = self._from_parsed_parts(drv, root, tail[:-1]) + path._resolving = self._resolving + return path @property def parents(self): @@ -1251,63 +1259,64 @@ def resolve(self, strict=False): Make the path absolute, resolving all symlinks on the way and also normalizing it. """ + if self._resolving: + return self try: path = self.absolute() - tail_idx = len(path._tail) - len(self._tail) except UnsupportedOperation: path = self - tail_idx = 0 - if not path._tail: - return path - drv = path.drive - root = path.root - tail = list(path._tail) - dirty = False + + def split(path): + return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] + link_count = 0 - readlink_supported = True - while tail_idx < len(tail): - if tail[tail_idx] == '..': - if tail_idx == 0: - if root: - # Delete '..' part immediately following root. - del tail[tail_idx] - dirty = True + stat_cache = {} + target_cache = {} + path, parts = split(path) + while parts: + part = parts.pop() + if part == '..': + if not path._tail: + if path.root: + # Delete '..' segment immediately following root continue - elif tail[tail_idx - 1] != '..': - # Delete '..' part and its predecessor. - tail_idx -= 1 - del tail[tail_idx:tail_idx + 2] - dirty = True + elif path._tail[-1] != '..': + # Delete '..' segment and its predecessor + path = path.parent continue - elif readlink_supported: - link = self._from_parsed_parts(drv, root, tail[:tail_idx + 1]) + path = path._make_child_relpath(part) + else: + lookup_path = path + path = path._make_child_relpath(part) + path._resolving = True + path_str = str(path) try: - link_target = link.readlink() - except UnsupportedOperation: - readlink_supported = False - except OSError as e: - if e.errno != EINVAL: - if strict: - raise - else: - break - else: - link_count += 1 - if link_count >= _MAX_SYMLINKS: - raise OSError(ELOOP, "Too many symbolic links in path", path) - elif link_target.root or link_target.drive: - link_target = link.parent / link_target - drv = link_target.drive - root = link_target.root - tail[:tail_idx + 1] = link_target._tail - tail_idx = 0 + st = stat_cache.get(path_str) + if st is None: + st = stat_cache[path_str] = path.stat(follow_symlinks=False) + if S_ISLNK(st.st_mode): + # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are + # encountered during resolution. + link_count += 1 + if link_count >= _MAX_SYMLINKS: + raise OSError(ELOOP, "Too many symbolic links in path", path_str) + target = target_cache.get(path_str) + if target is None: + target = target_cache[path_str] = path.readlink() + target, target_parts = split(target) + path = target if target.root else lookup_path + parts.extend(target_parts) + elif parts and not S_ISDIR(st.st_mode): + raise NotADirectoryError(ENOTDIR, "Not a directory", path_str) + except OSError: + if strict: + raise else: - tail[tail_idx:tail_idx + 1] = link_target._tail - dirty = True - continue - tail_idx += 1 - if dirty: - path = self._from_parsed_parts(drv, root, tail) + # Append remaining path segments without further processing. + for part in reversed(parts): + path = path._make_child_relpath(part) + break + path._resolving = False return path def symlink_to(self, target, target_is_directory=False): diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index de07a67680a3d2..f8fee9a683d47c 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2241,6 +2241,7 @@ def test_readlink(self): self.assertEqual((P / 'brokenLink').readlink(), self.cls('non-existing')) self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) with self.assertRaises(OSError): (P / 'fileA').readlink() @@ -2262,11 +2263,18 @@ def test_resolve_common(self): if not self.can_symlink: self.skipTest("symlinks required") P = self.cls + # Non-existent file p = P(BASE, 'foo') - with self.assertRaises(OSError) as cm: + with self.assertRaises(FileNotFoundError) as cm: p.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ENOENT) + # File treated as directory + p = P(BASE, 'fileA', 'fileB') + with self.assertRaises(NotADirectoryError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOTDIR) # Non-strict + p = P(BASE, 'foo') self.assertEqualNormCase(str(p.resolve(strict=False)), os.path.join(BASE, 'foo')) p = P(BASE, 'foo', 'in', 'spam') @@ -2575,7 +2583,7 @@ def test_complex_symlinks_relative_dot_dot(self): class DummyPathWithSymlinks(DummyPath): def readlink(self): - path = str(self) + path = str(self.parent.resolve() / self.name) if path in self._symlinks: return self.with_segments(self._symlinks[path]) elif path in self._files or path in self._directories: From 7e3729e017673a89b21d6960a37955a31ce0dd23 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 23 Sep 2023 18:30:32 +0100 Subject: [PATCH 25/31] Undo test change. This will require further refactoring in another PR. --- Lib/test/test_pathlib.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index f8fee9a683d47c..203e6022709003 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2263,18 +2263,11 @@ def test_resolve_common(self): if not self.can_symlink: self.skipTest("symlinks required") P = self.cls - # Non-existent file p = P(BASE, 'foo') - with self.assertRaises(FileNotFoundError) as cm: + with self.assertRaises(OSError) as cm: p.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ENOENT) - # File treated as directory - p = P(BASE, 'fileA', 'fileB') - with self.assertRaises(NotADirectoryError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOTDIR) # Non-strict - p = P(BASE, 'foo') self.assertEqualNormCase(str(p.resolve(strict=False)), os.path.join(BASE, 'foo')) p = P(BASE, 'foo', 'in', 'spam') From 703fe5ccd5c7ca31d0da889d245daf470f1d6fa4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:32:34 +0100 Subject: [PATCH 26/31] Ensure `..` segments are resolved in non-strict mode --- Lib/pathlib.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1758a2beef0dbd..450fd1f49b077a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1269,6 +1269,7 @@ def resolve(self, strict=False): def split(path): return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] + missing = False link_count = 0 stat_cache = {} target_cache = {} @@ -1285,6 +1286,8 @@ def split(path): path = path.parent continue path = path._make_child_relpath(part) + elif missing: + path = path._make_child_relpath(part) else: lookup_path = path path = path._make_child_relpath(part) @@ -1312,10 +1315,7 @@ def split(path): if strict: raise else: - # Append remaining path segments without further processing. - for part in reversed(parts): - path = path._make_child_relpath(part) - break + missing = True path._resolving = False return path From e5e5be5d9962a169fc14df8e41e0ce03f29a82fb Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:34:12 +0100 Subject: [PATCH 27/31] Move symlink loop resolution test from `PosixPathTest` to `DummyPathTest` --- Lib/test/test_pathlib.py | 62 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index c3c58a10c6b299..319148e9065a65 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2334,6 +2334,38 @@ def test_resolve_dot(self): # Non-strict self.assertEqual(r.resolve(strict=False), p / '3' / '4') + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + def test_resolve_loop(self): + if not self.can_symlink: + self.skipTest("symlinks required") + if os.name == 'nt' and issubclass(self.cls, pathlib.Path): + self.skipTest("symlink loops work differently with concrete Windows paths") + # Loops with relative symlinks. + self.cls(BASE, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(BASE, 'linkX') + self.cls(BASE, 'linkY').symlink_to('linkY') + self._check_symlink_loop(BASE, 'linkY') + self.cls(BASE, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(BASE, 'linkZ') + # Non-strict + p = self.cls(BASE, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(BASE, 'linkU').symlink_to(join('linkU/inside')) + self._check_symlink_loop(BASE, 'linkU') + self.cls(BASE, 'linkV').symlink_to(join('linkV')) + self._check_symlink_loop(BASE, 'linkV') + self.cls(BASE, 'linkW').symlink_to(join('linkW/../linkW')) + self._check_symlink_loop(BASE, 'linkW') + # Non-strict + q = self.cls(BASE, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_stat(self): statA = self.cls(BASE).joinpath('fileA').stat() statB = self.cls(BASE).joinpath('dirB', 'fileB').stat() @@ -3428,12 +3460,6 @@ def test_absolute(self): self.assertEqual(str(P('//a').absolute()), '//a') self.assertEqual(str(P('//a/b').absolute()), '//a/b') - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - @unittest.skipIf( is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." @@ -3480,30 +3506,6 @@ def test_touch_mode(self): st = os.stat(join('masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - def test_resolve_loop(self): - if not self.can_symlink: - self.skipTest("symlinks required") - # Loops with relative symlinks. - os.symlink('linkX/inside', join('linkX')) - self._check_symlink_loop(BASE, 'linkX') - os.symlink('linkY', join('linkY')) - self._check_symlink_loop(BASE, 'linkY') - os.symlink('linkZ/../linkZ', join('linkZ')) - self._check_symlink_loop(BASE, 'linkZ') - # Non-strict - p = self.cls(BASE, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - os.symlink(join('linkU/inside'), join('linkU')) - self._check_symlink_loop(BASE, 'linkU') - os.symlink(join('linkV'), join('linkV')) - self._check_symlink_loop(BASE, 'linkV') - os.symlink(join('linkW/../linkW'), join('linkW')) - self._check_symlink_loop(BASE, 'linkW') - # Non-strict - q = self.cls(BASE, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - def test_glob(self): P = self.cls p = P(BASE) From 38769a0ff7e9299ed4dc6a8e1825cd7eae122c8b Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:43:39 +0100 Subject: [PATCH 28/31] Add `PathBase._split_stack()` helper method. --- Lib/pathlib.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 450fd1f49b077a..f8ec98c28e1f63 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1254,6 +1254,14 @@ def readlink(self): """ self._unsupported("readlink") + def _split_stack(self): + """ + Split the path into a 2-tuple (anchor, parts), where *anchor* is the + uppermost parent of the path (equivalent to path.parents[-1]), and + *parts* is a reversed list of parts following the anchor. + """ + return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1] + def resolve(self, strict=False): """ Make the path absolute, resolving all symlinks on the way and also @@ -1266,14 +1274,11 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - def split(path): - return path._from_parsed_parts(path.drive, path.root, []), path._tail[::-1] - missing = False link_count = 0 stat_cache = {} target_cache = {} - path, parts = split(path) + path, parts = path._split_stack() while parts: part = parts.pop() if part == '..': @@ -1306,7 +1311,7 @@ def split(path): target = target_cache.get(path_str) if target is None: target = target_cache[path_str] = path.readlink() - target, target_parts = split(target) + target, target_parts = target._split_stack() path = target if target.root else lookup_path parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): From 7c78952cc951583f80a311079f0b98b15f3eb726 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 26 Sep 2023 20:57:52 +0100 Subject: [PATCH 29/31] Use path object as stat/link target cache key --- Lib/pathlib.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f8ec98c28e1f63..a636d9707d3ca1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1297,25 +1297,24 @@ def resolve(self, strict=False): lookup_path = path path = path._make_child_relpath(part) path._resolving = True - path_str = str(path) try: - st = stat_cache.get(path_str) + st = stat_cache.get(path) if st is None: - st = stat_cache[path_str] = path.stat(follow_symlinks=False) + st = stat_cache[path] = path.stat(follow_symlinks=False) if S_ISLNK(st.st_mode): # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are # encountered during resolution. link_count += 1 if link_count >= _MAX_SYMLINKS: - raise OSError(ELOOP, "Too many symbolic links in path", path_str) - target = target_cache.get(path_str) + raise OSError(ELOOP, "Too many symbolic links in path", str(path)) + target = target_cache.get(path) if target is None: - target = target_cache[path_str] = path.readlink() + target = target_cache[path] = path.readlink() target, target_parts = target._split_stack() path = target if target.root else lookup_path parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): - raise NotADirectoryError(ENOTDIR, "Not a directory", path_str) + raise NotADirectoryError(ENOTDIR, "Not a directory", str(path)) except OSError: if strict: raise From fe57725fc2e1e812ba470c1f9d625b051518698d Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 27 Sep 2023 03:27:24 +0100 Subject: [PATCH 30/31] Optimise resolve(): skip stat() in non-strict mode if readlink() is unsupported. --- Lib/pathlib.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a636d9707d3ca1..26a85de48ee847 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1253,6 +1253,7 @@ def readlink(self): Return the path to which the symbolic link points. """ self._unsupported("readlink") + readlink._unsupported = True def _split_stack(self): """ @@ -1274,7 +1275,7 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - missing = False + querying = strict or not getattr(self.readlink, '_unsupported', False) link_count = 0 stat_cache = {} target_cache = {} @@ -1290,12 +1291,9 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue - path = path._make_child_relpath(part) - elif missing: - path = path._make_child_relpath(part) - else: - lookup_path = path - path = path._make_child_relpath(part) + lookup_path = path + path = path._make_child_relpath(part) + if querying and part != '..': path._resolving = True try: st = stat_cache.get(path) @@ -1319,7 +1317,7 @@ def resolve(self, strict=False): if strict: raise else: - missing = True + querying = False path._resolving = False return path From cf9c8b68bab433d807f4b5ddf79b75df02f7b259 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 29 Sep 2023 23:17:07 +0100 Subject: [PATCH 31/31] Address code review comments --- Lib/pathlib.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 26a85de48ee847..e6be9061013a8a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1253,7 +1253,7 @@ def readlink(self): Return the path to which the symbolic link points. """ self._unsupported("readlink") - readlink._unsupported = True + readlink._supported = False def _split_stack(self): """ @@ -1275,7 +1275,9 @@ def resolve(self, strict=False): except UnsupportedOperation: path = self - querying = strict or not getattr(self.readlink, '_unsupported', False) + # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported + # and (in non-strict mode) we can improve performance by not calling `stat()`. + querying = strict or getattr(self.readlink, '_supported', True) link_count = 0 stat_cache = {} target_cache = {} @@ -1291,7 +1293,8 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue - lookup_path = path + # Join the current part onto the path. + path_parent = path path = path._make_child_relpath(part) if querying and part != '..': path._resolving = True @@ -1309,7 +1312,12 @@ def resolve(self, strict=False): if target is None: target = target_cache[path] = path.readlink() target, target_parts = target._split_stack() - path = target if target.root else lookup_path + # If the symlink target is absolute (like '/etc/hosts'), set the current + # path to its uppermost parent (like '/'). If not, the symlink target is + # relative to the symlink parent, which we recorded earlier. + path = target if target.root else path_parent + # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to + # the stack of unresolved path parts. parts.extend(target_parts) elif parts and not S_ISDIR(st.st_mode): raise NotADirectoryError(ENOTDIR, "Not a directory", str(path)) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy