Skip to content

Commit 3e72de9

Browse files
[3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701)
* bpo-39667: Sync zipp 3.0 (GH-18540) * bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * πŸ“œπŸ€– Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> (cherry picked from commit 0aeab5c) Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> * Clarify the change in behavior with a couple of workaround options. * Restore API compatibility while backporting performance improvements. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
1 parent 59047fa commit 3e72de9

File tree

4 files changed

+27
-26
lines changed

4 files changed

+27
-26
lines changed

β€ŽDoc/library/zipfile.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,12 @@ Path objects are traversable using the ``/`` operator.
494494
Invoke :meth:`ZipFile.open` on the current path. Accepts
495495
the same arguments as :meth:`ZipFile.open`.
496496

497+
.. caution::
498+
499+
The signature on this function changes in an incompatible way
500+
in Python 3.9. For a future-compatible version, consider using
501+
the third-party zipp.Path package (3.0 or later).
502+
497503
.. method:: Path.iterdir()
498504

499505
Enumerate the children of the current directory.

β€ŽLib/test/test_zipfile.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import pathlib
77
import posixpath
8+
import string
89
import struct
910
import subprocess
1011
import sys
@@ -2933,6 +2934,11 @@ def test_joinpath_constant_time(self):
29332934
# Check the file iterated all items
29342935
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
29352936

2937+
# @func_timeout.func_set_timeout(3)
2938+
def test_implied_dirs_performance(self):
2939+
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
2940+
zipfile.CompleteDirs._implied_dirs(data)
2941+
29362942

29372943
if __name__ == "__main__":
29382944
unittest.main()

β€ŽLib/zipfile.py

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import threading
1818
import time
1919
import contextlib
20-
from collections import OrderedDict
2120

2221
try:
2322
import zlib # We may need its compression method
@@ -2125,24 +2124,6 @@ def _compile(file, optimize=-1):
21252124
return (fname, archivename)
21262125

21272126

2128-
def _unique_everseen(iterable, key=None):
2129-
"List unique elements, preserving order. Remember all elements ever seen."
2130-
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
2131-
# unique_everseen('ABBCcAD', str.lower) --> A B C D
2132-
seen = set()
2133-
seen_add = seen.add
2134-
if key is None:
2135-
for element in itertools.filterfalse(seen.__contains__, iterable):
2136-
seen_add(element)
2137-
yield element
2138-
else:
2139-
for element in iterable:
2140-
k = key(element)
2141-
if k not in seen:
2142-
seen_add(k)
2143-
yield element
2144-
2145-
21462127
def _parents(path):
21472128
"""
21482129
Given a path with elements separated by
@@ -2184,6 +2165,18 @@ def _ancestry(path):
21842165
path, tail = posixpath.split(path)
21852166

21862167

2168+
_dedupe = dict.fromkeys
2169+
"""Deduplicate an iterable in original order"""
2170+
2171+
2172+
def _difference(minuend, subtrahend):
2173+
"""
2174+
Return items in minuend not in subtrahend, retaining order
2175+
with O(1) lookup.
2176+
"""
2177+
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
2178+
2179+
21872180
class CompleteDirs(ZipFile):
21882181
"""
21892182
A ZipFile subclass that ensures that implied directories
@@ -2193,13 +2186,8 @@ class CompleteDirs(ZipFile):
21932186
@staticmethod
21942187
def _implied_dirs(names):
21952188
parents = itertools.chain.from_iterable(map(_parents, names))
2196-
# Deduplicate entries in original order
2197-
implied_dirs = OrderedDict.fromkeys(
2198-
p + posixpath.sep for p in parents
2199-
# Cast names to a set for O(1) lookups
2200-
if p + posixpath.sep not in set(names)
2201-
)
2202-
return implied_dirs
2189+
as_dirs = (p + posixpath.sep for p in parents)
2190+
return _dedupe(_difference(as_dirs, names))
22032191

22042192
def namelist(self):
22052193
names = super(CompleteDirs, self).namelist()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Correct performance degradation in ``zipfile.Path`` as found in zipp 3.0. While retaining compatibility, this change discourages the use of ``zipfile.Path.open`` due to the signature change in Python 3.9. For compatibility across Python 3.8 and later versions, consider using ``zipp.Path`` on Python 3.8.x and earlier.

0 commit comments

Comments
Β (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy