diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py index 0f6c0f2107ce6b..4f8b6f030a5b09 100644 --- a/Lib/test/test_zipfile/test_core.py +++ b/Lib/test/test_zipfile/test_core.py @@ -2245,6 +2245,64 @@ def test_decompress_without_3rd_party_library(self): with zipfile.ZipFile(zip_file) as zf: self.assertRaises(RuntimeError, zf.extract, 'a.txt') + def test_full_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00a\xed' + b'\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\d\x0b`P' + b'K\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2' + b'\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00aPK' + b'\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0lH\x05\xe2\x1e' + b'8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00bPK\x05' + b'\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00\x00/\x00\x00' + b'\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + self.assertEqual(len(zipf.read('a')), 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'File name.*differ'): + zipf.read('b') + + def test_quoted_overlap(self): + data = ( + b'PK\x03\x04\x14\x00\x00\x00\x08\x00\xa0lH\x05Y\xfc' + b'8\x044\x00\x00\x00(\x04\x00\x00\x01\x00\x00\x00a\x00' + b'\x1f\x00\xe0\xffPK\x03\x04\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00b\xed\xc0\x81\x08\x00\x00\x00\xc00\xd6\xfbK\\' + b'd\x0b`PK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0' + b'lH\x05Y\xfc8\x044\x00\x00\x00(\x04\x00\x00\x01' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x00aPK\x01\x02\x14\x00\x14\x00\x00\x00\x08\x00\xa0l' + b'H\x05\xe2\x1e8\xbb\x10\x00\x00\x00\t\x04\x00\x00\x01\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00$\x00\x00\x00' + b'bPK\x05\x06\x00\x00\x00\x00\x02\x00\x02\x00^\x00\x00' + b'\x00S\x00\x00\x00\x00\x00' + ) + with zipfile.ZipFile(io.BytesIO(data), 'r') as zipf: + self.assertEqual(zipf.namelist(), ['a', 'b']) + zi = zipf.getinfo('a') + self.assertEqual(zi.header_offset, 0) + self.assertEqual(zi.compress_size, 52) + self.assertEqual(zi.file_size, 1064) + zi = zipf.getinfo('b') + self.assertEqual(zi.header_offset, 36) + self.assertEqual(zi.compress_size, 16) + self.assertEqual(zi.file_size, 1033) + with self.assertRaisesRegex(zipfile.BadZipFile, 'Overlapped entries'): + zipf.read('a') + self.assertEqual(len(zipf.read('b')), 1033) + def tearDown(self): unlink(TESTFN) unlink(TESTFN2) diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py index 2c963de18e4f95..e19823094a47c5 100644 --- a/Lib/zipfile/__init__.py +++ b/Lib/zipfile/__init__.py @@ -395,6 +395,7 @@ class ZipInfo (object): 'compress_size', 'file_size', '_raw_time', + '_end_offset', ) def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): @@ -429,6 +430,7 @@ def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): self.external_attr = 0 # External file attributes self.compress_size = 0 # Size of the compressed file self.file_size = 0 # Size of the uncompressed file + self._end_offset = None # Start of the next local header or central directory # Other attributes are set by class ZipFile: # header_offset Byte offset to the file header # CRC CRC-32 of the uncompressed file @@ -1487,6 +1489,12 @@ def _RealGetContents(self): if self.debug > 2: print("total", total) + end_offset = self.start_dir + for zinfo in sorted(self.filelist, + key=lambda zinfo: zinfo.header_offset, + reverse=True): + zinfo._end_offset = end_offset + end_offset = zinfo.header_offset def namelist(self): """Return a list of file names in the archive.""" @@ -1643,6 +1651,10 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) + if (zinfo._end_offset is not None and + zef_file.tell() + zinfo.compress_size > zinfo._end_offset): + raise BadZipFile(f"Overlapped entries: {zinfo.orig_filename!r} (possible zip bomb)") + # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & _MASK_ENCRYPTED if is_encrypted: diff --git a/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst new file mode 100644 index 00000000000000..be279caffc46ee --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-09-28-13-15-51.gh-issue-109858.43e2dg.rst @@ -0,0 +1,3 @@ +Protect :mod:`zipfile` from "quoted-overlap" zipbomb. It now raises +BadZipFile when try to read an entry that overlaps with other entry or +central directory.
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: