From cb3519590c62f9b1abf7f31b92ec37d4b725ce15 Mon Sep 17 00:00:00 2001 From: Alexander Urieles Date: Mon, 28 Jul 2025 17:37:26 +0200 Subject: [PATCH] gh-130577: tarfile now validates archives to ensure member offsets are non-negative (GH-137027) (cherry picked from commit 7040aa54f14676938970e10c5f74ea93cd56aa38) Co-authored-by: Alexander Urieles Co-authored-by: Gregory P. Smith --- Lib/tarfile.py | 3 + Lib/test/test_tarfile.py | 156 ++++++++++++++++++ ...-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 3 + 3 files changed, 162 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 2423e14bc540d8..c04c576ea22d2d 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1614,6 +1614,9 @@ def _block(self, count): """Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. """ + # Only non-negative offsets are allowed + if count < 0: + raise InvalidHeaderError("invalid offset") blocks, remainder = divmod(count, BLOCKSIZE) if remainder: blocks += 1 diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7377acdf398622..366aac781df1e7 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -50,6 +50,7 @@ def sha256sum(data): xzname = os.path.join(TEMPDIR, "testtar.tar.xz") tmpname = os.path.join(TEMPDIR, "tmp.tar") dotlessname = os.path.join(TEMPDIR, "testtar") +SPACE = b" " sha256_regtype = ( "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce" @@ -4386,6 +4387,161 @@ def extractall(self, ar): ar.extractall(self.testdir, filter='fully_trusted') +class OffsetValidationTests(unittest.TestCase): + tarname = tmpname + invalid_posix_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0011407" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 6 bytes, version: 2 bytes + + tarfile.POSIX_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # devminor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # prefix: 155 bytes + + tarfile.NUL * tarfile.LENGTH_PREFIX + # padding: 12 bytes + + tarfile.NUL * 12 + ) + invalid_gnu_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, null terminator: 8 bytes + + b"0000755" + tarfile.NUL + # uid, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0011327" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 8 bytes + + tarfile.GNU_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, null terminator: 8 bytes + + tarfile.NUL * 8 + # devminor, null terminator: 8 bytes + + tarfile.NUL * 8 + # padding: 167 bytes + + tarfile.NUL * 167 + ) + invalid_v7_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 bytes + + b"0010070" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # padding: 255 bytes + + tarfile.NUL * 255 + ) + valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT) + data_block = b"\xff" * tarfile.BLOCKSIZE + + def _write_buffer(self, buffer): + with open(self.tarname, "wb") as f: + f.write(buffer) + + def _get_members(self, ignore_zeros=None): + with open(self.tarname, "rb") as f: + with tarfile.open( + mode="r", fileobj=f, ignore_zeros=ignore_zeros + ) as tar: + return tar.getmembers() + + def _assert_raises_read_error_exception(self): + with self.assertRaisesRegex( + tarfile.ReadError, "file could not be opened successfully" + ): + self._get_members() + + def test_invalid_offset_header_validations(self): + for tar_format, invalid_header in ( + ("posix", self.invalid_posix_header), + ("gnu", self.invalid_gnu_header), + ("v7", self.invalid_v7_header), + ): + with self.subTest(format=tar_format): + self._write_buffer(invalid_header) + self._assert_raises_read_error_exception() + + def test_early_stop_at_invalid_offset_header(self): + buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header + self._write_buffer(buffer) + members = self._get_members() + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, 0) + + def test_ignore_invalid_archive(self): + # 3 invalid headers with their respective data + buffer = (self.invalid_gnu_header + self.data_block) * 3 + self._write_buffer(buffer) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 0) + + def test_ignore_invalid_offset_headers(self): + for first_block, second_block, expected_offset in ( + ( + (self.valid_gnu_header), + (self.invalid_gnu_header + self.data_block), + 0, + ), + ( + (self.invalid_gnu_header + self.data_block), + (self.valid_gnu_header), + 1024, + ), + ): + self._write_buffer(first_block + second_block) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, expected_offset) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst new file mode 100644 index 00000000000000..342cabbc865dc4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst @@ -0,0 +1,3 @@ +:mod:`tarfile` now validates archives to ensure member offsets are +non-negative. (Contributed by Alexander Enrique Urieles Nieto in +:gh:`130577`.) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy