Skip to content

gh-130577: tarfile now validates archives to ensure member offsets are non-negative #137027

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Lib/tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,9 @@ def _block(self, count):
"""Round up a byte count by BLOCKSIZE and return it,
e.g. _block(834) => 1024.
"""
# Only non-negative offsets are allowed
if count < 0:
raise InvalidHeaderError("invalid offset")
blocks, remainder = divmod(count, BLOCKSIZE)
if remainder:
blocks += 1
Expand Down
156 changes: 156 additions & 0 deletions Lib/test/test_tarfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def sha256sum(data):
zstname = os.path.join(TEMPDIR, "testtar.tar.zst")
tmpname = os.path.join(TEMPDIR, "tmp.tar")
dotlessname = os.path.join(TEMPDIR, "testtar")
SPACE = b" "

sha256_regtype = (
"e09e4bc8b3c9d9177e77256353b36c159f5f040531bbd4b024a8f9b9196c71ce"
Expand Down Expand Up @@ -4602,6 +4603,161 @@ def extractall(self, ar):
ar.extractall(self.testdir, filter='fully_trusted')


class OffsetValidationTests(unittest.TestCase):
tarname = tmpname
invalid_posix_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011407" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 6 bytes, version: 2 bytes
+ tarfile.POSIX_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# devminor, space, null terminator: 8 bytes
+ tarfile.NUL * 6 + SPACE + tarfile.NUL
# prefix: 155 bytes
+ tarfile.NUL * tarfile.LENGTH_PREFIX
# padding: 12 bytes
+ tarfile.NUL * 12
)
invalid_gnu_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, null terminator: 8 bytes
+ b"0000755" + tarfile.NUL
# uid, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"0000001" + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0011327" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# magic: 8 bytes
+ tarfile.GNU_MAGIC
# uname: 32 bytes
+ tarfile.NUL * 32
# gname: 32 bytes
+ tarfile.NUL * 32
# devmajor, null terminator: 8 bytes
+ tarfile.NUL * 8
# devminor, null terminator: 8 bytes
+ tarfile.NUL * 8
# padding: 167 bytes
+ tarfile.NUL * 167
)
invalid_v7_header = (
# name: 100 bytes
tarfile.NUL * tarfile.LENGTH_NAME
# mode, space, null terminator: 8 bytes
+ b"000755" + SPACE + tarfile.NUL
# uid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# gid, space, null terminator: 8 bytes
+ b"000001" + SPACE + tarfile.NUL
# size, space: 12 bytes
+ b"\xff" * 11 + SPACE
# mtime, space: 12 bytes
+ tarfile.NUL * 11 + SPACE
# chksum: 8 bytes
+ b"0010070" + tarfile.NUL
# type: 1 byte
+ tarfile.REGTYPE
# linkname: 100 bytes
+ tarfile.NUL * tarfile.LENGTH_LINK
# padding: 255 bytes
+ tarfile.NUL * 255
)
valid_gnu_header = tarfile.TarInfo("filename").tobuf(tarfile.GNU_FORMAT)
data_block = b"\xff" * tarfile.BLOCKSIZE

def _write_buffer(self, buffer):
with open(self.tarname, "wb") as f:
f.write(buffer)

def _get_members(self, ignore_zeros=None):
with open(self.tarname, "rb") as f:
with tarfile.open(
mode="r", fileobj=f, ignore_zeros=ignore_zeros
) as tar:
return tar.getmembers()

def _assert_raises_read_error_exception(self):
with self.assertRaisesRegex(
tarfile.ReadError, "file could not be opened successfully"
):
self._get_members()

def test_invalid_offset_header_validations(self):
for tar_format, invalid_header in (
("posix", self.invalid_posix_header),
("gnu", self.invalid_gnu_header),
("v7", self.invalid_v7_header),
):
with self.subTest(format=tar_format):
self._write_buffer(invalid_header)
self._assert_raises_read_error_exception()

def test_early_stop_at_invalid_offset_header(self):
buffer = self.valid_gnu_header + self.invalid_gnu_header + self.valid_gnu_header
self._write_buffer(buffer)
members = self._get_members()
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, 0)

def test_ignore_invalid_archive(self):
# 3 invalid headers with their respective data
buffer = (self.invalid_gnu_header + self.data_block) * 3
self._write_buffer(buffer)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 0)

def test_ignore_invalid_offset_headers(self):
for first_block, second_block, expected_offset in (
(
(self.valid_gnu_header),
(self.invalid_gnu_header + self.data_block),
0,
),
(
(self.invalid_gnu_header + self.data_block),
(self.valid_gnu_header),
1024,
),
):
self._write_buffer(first_block + second_block)
members = self._get_members(ignore_zeros=True)
self.assertEqual(len(members), 1)
self.assertEqual(members[0].name, "filename")
self.assertEqual(members[0].offset, expected_offset)


def setUpModule():
os_helper.unlink(TEMPDIR)
os.makedirs(TEMPDIR)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:mod:`tarfile` now validates archives to ensure member offsets are
non-negative. (Contributed by Alexander Enrique Urieles Nieto in
:gh:`130577`.)
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy