From 235f9d47714eb49f1ab849f619403bf3c6e75c1a Mon Sep 17 00:00:00 2001 From: aeurielesn Date: Wed, 23 Jul 2025 00:17:56 +0200 Subject: [PATCH 1/3] gh-130577: tarfile now validates archives to ensure member offsets are non-negative --- Doc/whatsnew/3.15.rst | 2 + Lib/tarfile.py | 3 + Lib/test/test_tarfile.py | 156 ++++++++++++++++++ ...-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 2 + 4 files changed, 163 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index e8e2c1ed6047bf..cfd93aa2a97bec 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -310,6 +310,8 @@ tarfile :func:`~tarfile.TarFile.errorlevel` is zero. (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` and :cve:`2025-4435`.) +* :mod:`tarfile` now validates archives to ensure member offsets are non-negative. + (Contributed by Alexander Enrique Urieles Nieto in :gh:`130577`.) types diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 80d8644af86f74..45f58eb8ac93cf 100644 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1647,6 +1647,9 @@ def _block(self, count): """Round up a byte count by BLOCKSIZE and return it, e.g. _block(834) => 1024. """ + # Only non-negative offsets are allowed + if count < 0: + raise InvalidHeaderError("invalid offset") blocks, remainder = divmod(count, BLOCKSIZE) if remainder: blocks += 1 diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7055e1ed147a9e..7b7ad791e568fc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -55,6 +55,7 @@ def sha256sum(data): zstname = os.path.join(TEMPDIR, "testtar.tar.zst") tmpname = os.path.join(TEMPDIR, "tmp.tar") dotlessname = os.path.join(TEMPDIR, "testtar") +SPACE = b" " sha256_regtype = ( "e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce" @@ -4602,6 +4603,161 @@ def extractall(self, ar): ar.extractall(self.testdir, filter='fully_trusted') +class OffsetValidationTests(unittest.TestCase): + tarname = tmpname + invalid_posix_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0011407" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 6 bytes, version: 2 bytes + + tarfile.POSIX_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # devminor, space, null terminator: 8 bytes + + tarfile.NUL * 6 + SPACE + tarfile.NUL + # prefix: 155 bytes + + tarfile.NUL * tarfile.LENGTH_PREFIX + # padding: 12 bytes + + tarfile.NUL * 12 + ) + invalid_gnu_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, null terminator: 8 bytes + + b"0000755" + tarfile.NUL + # uid, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"0000001" + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0011327" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # magic: 8 bytes + + tarfile.GNU_MAGIC + # uname: 32 bytes + + tarfile.NUL * 32 + # gname: 32 bytes + + tarfile.NUL * 32 + # devmajor, null terminator: 8 bytes + + tarfile.NUL * 8 + # devminor, null terminator: 8 bytes + + tarfile.NUL * 8 + # padding: 167 bytes + + tarfile.NUL * 167 + ) + invalid_v7_header = ( + # name: 100 bytes + tarfile.NUL * tarfile.LENGTH_NAME + # mode, space, null terminator: 8 bytes + + b"000755" + SPACE + tarfile.NUL + # uid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # gid, space, null terminator: 8 bytes + + b"000001" + SPACE + tarfile.NUL + # size, space: 12 bytes + + b"\xff" * 11 + SPACE + # mtime, space: 12 bytes + + tarfile.NUL * 11 + SPACE + # chksum: 8 spaces + + b"0010070" + tarfile.NUL + # type: 1 byte + + tarfile.REGTYPE + # linkname: 100 bytes + + tarfile.NUL * tarfile.LENGTH_LINK + # padding: 255 bytes + + tarfile.NUL * 255 + ) + valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT) + data_block = b"\xff" * tarfile.BLOCKSIZE + + def _write_buffer(self, buffer): + with open(self.tarname, "wb") as f: + f.write(buffer) + + def _get_members(self, ignore_zeros=None): + with open(self.tarname, "rb") as f: + with tarfile.open( + mode="r", fileobj=f, ignore_zeros=ignore_zeros + ) as tar: + return tar.getmembers() + + def _assert_raises_read_error_exception(self): + with self.assertRaisesRegex( + tarfile.ReadError, "file could not be opened successfully" + ): + self._get_members() + + def test_invalid_offset_header_validations(self): + for tar_format, invalid_header in ( + ("posix", self.invalid_posix_header), + ("gnu", self.invalid_gnu_header), + ("v7", self.invalid_v7_header), + ): + with self.subTest(format=tar_format): + self._write_buffer(invalid_header) + self._assert_raises_read_error_exception() + + def test_early_stop_at_invalid_offset_header(self): + buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header + self._write_buffer(buffer) + members = self._get_members() + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, 0) + + def test_ignore_invalid_archive(self): + # 3 invalid headers with their respective data + buffer = (self.invalid_gnu_header + self.data_block) * 3 + self._write_buffer(buffer) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 0) + + def test_ignore_invalid_offset_headers(self): + for first_block, second_block, expected_offset in ( + ( + (self.valid_gnu_header), + (self.invalid_gnu_header + self.data_block), + 0, + ), + ( + (self.invalid_gnu_header + self.data_block), + (self.valid_gnu_header), + 1024, + ), + ): + self._write_buffer(first_block + second_block) + members = self._get_members(ignore_zeros=True) + self.assertEqual(len(members), 1) + self.assertEqual(members[0].name, "filename") + self.assertEqual(members[0].offset, expected_offset) + + def setUpModule(): os_helper.unlink(TEMPDIR) os.makedirs(TEMPDIR) diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst new file mode 100644 index 00000000000000..2d6968c12027fe --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst @@ -0,0 +1,2 @@ +:mod:`tarfile` now validates archives to ensure member offsets are +non-negative. From 04fd0852147363b118b1241cfbd4c843443a0ffd Mon Sep 17 00:00:00 2001 From: aeurielesn Date: Fri, 25 Jul 2025 12:15:04 +0200 Subject: [PATCH 2/3] Typo --- Lib/test/test_tarfile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 7b7ad791e568fc..28914df6b010d0 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -4618,7 +4618,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0011407" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE @@ -4652,7 +4652,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0011327" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE @@ -4684,7 +4684,7 @@ class OffsetValidationTests(unittest.TestCase): + b"\xff" * 11 + SPACE # mtime, space: 12 bytes + tarfile.NUL * 11 + SPACE - # chksum: 8 spaces + # chksum: 8 bytes + b"0010070" + tarfile.NUL # type: 1 byte + tarfile.REGTYPE From 6d794444416f44da7230b47a46fe5966f9533cba Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 25 Jul 2025 20:20:39 +0000 Subject: [PATCH 3/3] move credit to NEWS, and remove the whatsnew entry (painful for backports) --- Doc/whatsnew/3.15.rst | 2 -- .../Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index cfd93aa2a97bec..e8e2c1ed6047bf 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -310,8 +310,6 @@ tarfile :func:`~tarfile.TarFile.errorlevel` is zero. (Contributed by Matt Prodani and Petr Viktorin in :gh:`112887` and :cve:`2025-4435`.) -* :mod:`tarfile` now validates archives to ensure member offsets are non-negative. - (Contributed by Alexander Enrique Urieles Nieto in :gh:`130577`.) types diff --git a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst index 2d6968c12027fe..342cabbc865dc4 100644 --- a/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst +++ b/Misc/NEWS.d/next/Library/2025-07-23-00-35-29.gh-issue-130577.c7EITy.rst @@ -1,2 +1,3 @@ :mod:`tarfile` now validates archives to ensure member offsets are -non-negative. +non-negative. (Contributed by Alexander Enrique Urieles Nieto in +:gh:`130577`.) pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy