Skip to content

Commit 50cd4b6

Browse files
authored
bpo-26253: Add compressionlevel to tarfile stream (GH-2962)
`tarfile` already accepts a compressionlevel argument for creating files. This patch adds the same for stream-based tarfile usage. The default is 9, the value that was previously hard-coded.
1 parent 81e91c9 commit 50cd4b6

File tree

4 files changed

+88
-11
lines changed

4 files changed

+88
-11
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ Some facts and figures:
9898
If *fileobj* is specified, it is used as an alternative to a :term:`file object`
9999
opened in binary mode for *name*. It is supposed to be at position 0.
100100

101-
For modes ``'w:gz'``, ``'r:gz'``, ``'w:bz2'``, ``'r:bz2'``, ``'x:gz'``,
102-
``'x:bz2'``, :func:`tarfile.open` accepts the keyword argument
101+
For modes ``'w:gz'``, ``'x:gz'``, ``'w|gz'``, ``'w:bz2'``, ``'x:bz2'``,
102+
``'w|bz2'``, :func:`tarfile.open` accepts the keyword argument
103103
*compresslevel* (default ``9``) to specify the compression level of the file.
104104

105105
For modes ``'w:xz'`` and ``'x:xz'``, :func:`tarfile.open` accepts the
@@ -152,6 +152,9 @@ Some facts and figures:
152152
.. versionchanged:: 3.6
153153
The *name* parameter accepts a :term:`path-like object`.
154154

155+
.. versionchanged:: 3.12
156+
The *compresslevel* keyword argument also works for streams.
157+
155158

156159
.. class:: TarFile
157160
:noindex:

Lib/tarfile.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,8 @@ class _Stream:
336336
_Stream is intended to be used only internally.
337337
"""
338338

339-
def __init__(self, name, mode, comptype, fileobj, bufsize):
339+
def __init__(self, name, mode, comptype, fileobj, bufsize,
340+
compresslevel):
340341
"""Construct a _Stream object.
341342
"""
342343
self._extfileobj = True
@@ -371,7 +372,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
371372
self._init_read_gz()
372373
self.exception = zlib.error
373374
else:
374-
self._init_write_gz()
375+
self._init_write_gz(compresslevel)
375376

376377
elif comptype == "bz2":
377378
try:
@@ -383,7 +384,7 @@ def __init__(self, name, mode, comptype, fileobj, bufsize):
383384
self.cmp = bz2.BZ2Decompressor()
384385
self.exception = OSError
385386
else:
386-
self.cmp = bz2.BZ2Compressor()
387+
self.cmp = bz2.BZ2Compressor(compresslevel)
387388

388389
elif comptype == "xz":
389390
try:
@@ -410,13 +411,14 @@ def __del__(self):
410411
if hasattr(self, "closed") and not self.closed:
411412
self.close()
412413

413-
def _init_write_gz(self):
414+
def _init_write_gz(self, compresslevel):
414415
"""Initialize for writing with gzip compression.
415416
"""
416-
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
417-
-self.zlib.MAX_WBITS,
418-
self.zlib.DEF_MEM_LEVEL,
419-
0)
417+
self.cmp = self.zlib.compressobj(compresslevel,
418+
self.zlib.DEFLATED,
419+
-self.zlib.MAX_WBITS,
420+
self.zlib.DEF_MEM_LEVEL,
421+
0)
420422
timestamp = struct.pack("<L", int(time.time()))
421423
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
422424
if self.name.endswith(".gz"):
@@ -1659,7 +1661,9 @@ def not_compressed(comptype):
16591661
if filemode not in ("r", "w"):
16601662
raise ValueError("mode must be 'r' or 'w'")
16611663

1662-
stream = _Stream(name, filemode, comptype, fileobj, bufsize)
1664+
compresslevel = kwargs.pop("compresslevel", 9)
1665+
stream = _Stream(name, filemode, comptype, fileobj, bufsize,
1666+
compresslevel)
16631667
try:
16641668
t = cls(name, filemode, stream, **kwargs)
16651669
except:

Lib/test/test_tarfile.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,6 +1554,74 @@ class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
15541554
class LzmaStreamWriteTest(LzmaTest, StreamWriteTest):
15551555
decompressor = lzma.LZMADecompressor if lzma else None
15561556

1557+
class _CompressedWriteTest(TarTest):
1558+
# This is not actually a standalone test.
1559+
# It does not inherit WriteTest because it only makes sense with gz,bz2
1560+
source = (b"And we move to Bristol where they have a special, " +
1561+
b"Very Silly candidate")
1562+
1563+
def _compressed_tar(self, compresslevel):
1564+
fobj = io.BytesIO()
1565+
with tarfile.open(tmpname, self.mode, fobj,
1566+
compresslevel=compresslevel) as tarfl:
1567+
tarfl.addfile(tarfile.TarInfo("foo"), io.BytesIO(self.source))
1568+
return fobj
1569+
1570+
def _test_bz2_header(self, compresslevel):
1571+
fobj = self._compressed_tar(compresslevel)
1572+
self.assertEqual(fobj.getvalue()[0:10],
1573+
b"BZh%d1AY&SY" % compresslevel)
1574+
1575+
def _test_gz_header(self, compresslevel):
1576+
fobj = self._compressed_tar(compresslevel)
1577+
self.assertEqual(fobj.getvalue()[:3], b"\x1f\x8b\x08")
1578+
1579+
class Bz2CompressWriteTest(Bz2Test, _CompressedWriteTest, unittest.TestCase):
1580+
prefix = "w:"
1581+
def test_compression_levels(self):
1582+
self._test_bz2_header(1)
1583+
self._test_bz2_header(5)
1584+
self._test_bz2_header(9)
1585+
1586+
class Bz2CompressStreamWriteTest(Bz2Test, _CompressedWriteTest,
1587+
unittest.TestCase):
1588+
prefix = "w|"
1589+
def test_compression_levels(self):
1590+
self._test_bz2_header(1)
1591+
self._test_bz2_header(5)
1592+
self._test_bz2_header(9)
1593+
1594+
class GzCompressWriteTest(GzipTest, _CompressedWriteTest, unittest.TestCase):
1595+
prefix = "w:"
1596+
def test_compression_levels(self):
1597+
self._test_gz_header(1)
1598+
self._test_gz_header(5)
1599+
self._test_gz_header(9)
1600+
1601+
class GzCompressStreamWriteTest(GzipTest, _CompressedWriteTest,
1602+
unittest.TestCase):
1603+
prefix = "w|"
1604+
def test_compression_levels(self):
1605+
self._test_gz_header(1)
1606+
self._test_gz_header(5)
1607+
self._test_gz_header(9)
1608+
1609+
class CompressLevelRaises(unittest.TestCase):
1610+
def test_compresslevel_wrong_modes(self):
1611+
compresslevel = 5
1612+
fobj = io.BytesIO()
1613+
with self.assertRaises(TypeError):
1614+
tarfile.open(tmpname, "w:", fobj, compresslevel=compresslevel)
1615+
1616+
def test_wrong_compresslevels(self):
1617+
# BZ2 checks that the compresslevel is in [1,9]. gz does not
1618+
fobj = io.BytesIO()
1619+
with self.assertRaises(ValueError):
1620+
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=0)
1621+
with self.assertRaises(ValueError):
1622+
tarfile.open(tmpname, "w:bz2", fobj, compresslevel=10)
1623+
with self.assertRaises(ValueError):
1624+
tarfile.open(tmpname, "w|bz2", fobj, compresslevel=10)
15571625

15581626
class GNUWriteTest(unittest.TestCase):
15591627
# This testcase checks for correct creation of GNU Longname
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow adjustable compression level for tarfile streams in
2+
:func:`tarfile.open`.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy