From 005e6e0ba2b09952399dadeb1cdcc6558a45d077 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Mon, 4 May 2020 14:20:29 +0200 Subject: [PATCH 01/13] bpo-40495: compileall option to hardlink duplicate pyc files Hardlink deduplication enables to prevent duplicates via hardlinks in cases when bytecode cache files are the same for multiple optimization levels. --- Doc/library/compileall.rst | 21 +- Lib/compileall.py | 38 +- Lib/test/test_compileall.py | 446 ++++++++++++++++++ Misc/ACKS | 1 + .../2020-05-04-11-20-49.bpo-40495.TyTc2O.rst | 2 + 5 files changed, 496 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index b1ae9d60e8ae14..337f75acd3b9af 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -113,6 +113,11 @@ compile Python sources. Ignore symlinks pointing outside the given directory. +.. cmdoption:: --hardlink-dupes + + Use hardlinks to prevent duplicates if ``.pyc`` files for multiple + optimization levels have the same content. + .. versionchanged:: 3.2 Added the ``-i``, ``-b`` and ``-h`` options. @@ -125,7 +130,7 @@ compile Python sources. Added the ``--invalidation-mode`` option. .. versionchanged:: 3.9 - Added the ``-s``, ``-p``, ``-e`` options. + Added the ``-s``, ``-p``, ``-e`` and ``--hardlink-dupes`` options. Raised the default recursion limit from 10 to :py:func:`sys.getrecursionlimit()`. Added the possibility to specify the ``-o`` option multiple times. @@ -143,7 +148,7 @@ runtime. Public functions ---------------- -.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None) +.. function:: compile_dir(dir, maxlevels=sys.getrecursionlimit(), ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False) Recursively descend the directory tree named by *dir*, compiling all :file:`.py` files along the way. Return a true value if all the files compiled successfully, @@ -193,6 +198,9 @@ Public functions the ``-s``, ``-p`` and ``-e`` options described above. They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`. + If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates + if ``.pyc`` files for multiple optimization levels have the same content. + .. versionchanged:: 3.2 Added the *legacy* and *optimize* parameter. @@ -219,9 +227,9 @@ Public functions Setting *workers* to 0 now chooses the optimal number of cores. .. versionchanged:: 3.9 - Added *stripdir*, *prependdir* and *limit_sl_dest* arguments. + Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments. -.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None) +.. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, \*, stripdir=None, prependdir=None, limit_sl_dest=None, hardlink_dupes=False) Compile the file with path *fullname*. Return a true value if the file compiled successfully, and a false value otherwise. @@ -257,6 +265,9 @@ Public functions the ``-s``, ``-p`` and ``-e`` options described above. They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`. + If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates + if ``.pyc`` files for multiple optimization levels have the same content. + .. versionadded:: 3.2 .. versionchanged:: 3.5 @@ -273,7 +284,7 @@ Public functions The *invalidation_mode* parameter's default value is updated to None. .. versionchanged:: 3.9 - Added *stripdir*, *prependdir* and *limit_sl_dest* arguments. + Added *stripdir*, *prependdir*, *limit_sl_dest* and *hardlink_dupes* arguments. .. function:: compile_path(skip_curdir=True, maxlevels=0, force=False, quiet=0, legacy=False, optimize=-1, invalidation_mode=None) diff --git a/Lib/compileall.py b/Lib/compileall.py index abe6cffce59c5f..5984058bdc9372 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -15,6 +15,7 @@ import importlib.util import py_compile import struct +import filecmp from functools import partial from pathlib import Path @@ -47,7 +48,7 @@ def _walk_dir(dir, maxlevels, quiet=0): def compile_dir(dir, maxlevels=None, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, workers=1, invalidation_mode=None, *, stripdir=None, - prependdir=None, limit_sl_dest=None): + prependdir=None, limit_sl_dest=None, hardlink_dupes=False): """Byte-compile all modules in the given directory tree. Arguments (only dir is required): @@ -70,6 +71,7 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, after stripdir limit_sl_dest: ignore symlinks if they are pointing outside of the defined path + hardlink_dupes: hardlink duplicated pyc files """ ProcessPoolExecutor = None if ddir is not None and (stripdir is not None or prependdir is not None): @@ -104,7 +106,8 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, invalidation_mode=invalidation_mode, stripdir=stripdir, prependdir=prependdir, - limit_sl_dest=limit_sl_dest), + limit_sl_dest=limit_sl_dest, + hardlink_dupes=hardlink_dupes), files) success = min(results, default=True) else: @@ -112,14 +115,15 @@ def compile_dir(dir, maxlevels=None, ddir=None, force=False, if not compile_file(file, ddir, force, rx, quiet, legacy, optimize, invalidation_mode, stripdir=stripdir, prependdir=prependdir, - limit_sl_dest=limit_sl_dest): + limit_sl_dest=limit_sl_dest, + hardlink_dupes=hardlink_dupes): success = False return success def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, legacy=False, optimize=-1, invalidation_mode=None, *, stripdir=None, prependdir=None, - limit_sl_dest=None): + limit_sl_dest=None, hardlink_dupes=False): """Byte-compile one file. Arguments (only fullname is required): @@ -140,6 +144,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, after stripdir limit_sl_dest: ignore symlinks if they are pointing outside of the defined path. + hardlink_dupes: hardlink duplicated pyc files """ if ddir is not None and (stripdir is not None or prependdir is not None): @@ -176,6 +181,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if isinstance(optimize, int): optimize = [optimize] + if hardlink_dupes: + raise ValueError(("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.")) + if rx is not None: mo = rx.search(fullname) if mo: @@ -220,10 +229,16 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if not quiet: print('Compiling {!r}...'.format(fullname)) try: - for opt_level, cfile in opt_cfiles.items(): + for index, opt_level in enumerate(sorted(optimize)): + cfile = opt_cfiles[opt_level] ok = py_compile.compile(fullname, cfile, dfile, True, optimize=opt_level, invalidation_mode=invalidation_mode) + if index > 0 and hardlink_dupes: + previous_cfile = opt_cfiles[optimize[index - 1]] + if filecmp.cmp(cfile, previous_cfile, shallow=False): + os.unlink(cfile) + os.link(previous_cfile, cfile) except py_compile.PyCompileError as err: success = False if quiet >= 2: @@ -352,6 +367,9 @@ def main(): 'Python interpreter itself (specified by -O).')) parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest', help='Ignore symlinks pointing outsite of the DIR') + parser.add_argument('--hardlink-dupes', action='store_true', + dest='hardlink_dupes', + help='Hardlink duplicated pyc files') args = parser.parse_args() compile_dests = args.compile_dest @@ -371,6 +389,10 @@ def main(): if args.opt_levels is None: args.opt_levels = [-1] + if len(args.opt_levels) == 1 and args.hardlink_dupes: + parser.error(("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.")) + if args.ddir is not None and ( args.stripdir is not None or args.prependdir is not None ): @@ -404,7 +426,8 @@ def main(): stripdir=args.stripdir, prependdir=args.prependdir, optimize=args.opt_levels, - limit_sl_dest=args.limit_sl_dest): + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False else: if not compile_dir(dest, maxlevels, args.ddir, @@ -414,7 +437,8 @@ def main(): stripdir=args.stripdir, prependdir=args.prependdir, optimize=args.opt_levels, - limit_sl_dest=args.limit_sl_dest): + limit_sl_dest=args.limit_sl_dest, + hardlink_dupes=args.hardlink_dupes): success = False return success else: diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index 72678945089f28..efc2d84f894f54 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -11,6 +11,7 @@ import time import unittest import io +import filecmp from unittest import mock, skipUnless try: @@ -360,6 +361,234 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) + def test_hardlink_deduplication_bad_args(self): + # Bad arguments combination, hardlink deduplication make sense + # only for more than one optimization level + with self.assertRaises(ValueError): + compileall.compile_dir(self.directory, quiet=True, optimize=0, + hardlink_dupes=True) + + def test_hardlink_deduplication_same_bytecode_all_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=True) + + # import pdb; pdb.set_trace() + + # All three files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=False) + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + # only two levels of optimization [0, 1] tested + path = os.path.join(self.directory, "test", "same_some") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 2], + hardlink_dupes=True) + + # Both files should have the same inode (hardlink) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, force=True, optimize=[0, 2], + hardlink_dupes=False) + + # Deduplication disabled, both pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt_2(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_some_2") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[1, 2], + hardlink_dupes=True) + + # Both files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[1, 2]) + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_all_opt(self): + # "'''string'''\nassert 1" produces a different bytecode for + # all optimization levels + path = os.path.join(self.directory, "test", "different_all") + os.makedirs(path) + + simple_script = script_helper.make_script( + path, "test_different_bytecode", "'''string'''\nassert 1" + ) + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=True) + + # No hardlinks, bytecodes are different + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=False) + + # Disabling hardlink deduplication makes no difference + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_one_hardlink(self): + # "'''string'''\na = 1" produces the same bytecode only + # for level 0 and 1 + path = os.path.join(self.directory, "test", "different_one") + os.makedirs(path) + + simple_script = script_helper.make_script( + path, "test_different_bytecode", "'''string'''\na = 1" + ) + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=True) + + # Only level 0 and 1 has the same inode, level 2 produces + # a different bytecode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=False) + + # Deduplication disabled, no hardlinks + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_recompilation(self): + path = os.path.join(self.directory, "test", "module_change") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module_change", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=True) + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module_change", + "print(0)") + + # Recompilation without -o 1 + compileall.compile_dir(path, force=True, quiet=True, optimize=[0, 2], + hardlink_dupes=True) + + # opt-1.pyc should have the same inode as before and others should not + self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + + def test_hardlink_deduplication_import(self): + path = os.path.join(self.directory, "test", "module_import") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], + hardlink_dupes=True) + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module", "print(0)") + + # Import the module in Python + script_helper.assert_python_ok( + "-O", "-c", "import module", __isolated=False, PYTHONPATH=path + ) + + # Only opt-1.pyc is changed + self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) + self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + class CompileallTestsWithSourceEpoch(CompileallTestsBase, unittest.TestCase, @@ -825,6 +1054,223 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) + def test_hardlink_deduplication_bad_args(self): + # Bad arguments combination, hardlink deduplication make sense + # only for more than one optimization level + self.assertRunNotOK(self.directory, "-o 1", "--hardlink_dupes") + + def test_hardlink_deduplication_same_bytecode_all_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", + "--hardlink-dupes") + + # All three files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt(self): + # 'a = 0' produces the same bytecode for all optimization levels + # only two levels of optimization [0, 1] tested + path = os.path.join(self.directory, "test", "same_some") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-q", "-o 0", "-o 2", "--hardlink-dupes") + + # Both files should have the same inode (hardlink) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 2") + + # Deduplication disabled, both pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_same_bytecode_some_opt_2(self): + # 'a = 0' produces the same bytecode for all optimization levels + path = os.path.join(self.directory, "test", "same_some_2") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "test_same_bytecode", + "a = 0") + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-q", "-o 1", "-o 2", "--hardlink-dupes") + + # Both files should have the same inode (hardlinks) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 1", "-o 2") + + # Deduplication disabled, all pyc files should have different inodes + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_all_opt(self): + # "'''string'''\nassert 1" produces a different bytecode for + # all optimization levels + path = os.path.join(self.directory, "test", "different_all") + os.makedirs(path) + + simple_script = script_helper.make_script(path, + "test_different_bytecode", + "'''string'''\nassert 1") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", + "--hardlink-dupes") + + # No hardlinks, bytecodes are different + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Disabling hardlink deduplication makes no difference + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_different_bytecode_one_hardlink(self): + # "'''string'''\na = 1" produces the same bytecode only + # for level 0 and 1 + path = os.path.join(self.directory, "test", "different_one") + os.makedirs(path) + + simple_script = script_helper.make_script( + path, "test_different_bytecode", "'''string'''\na = 1" + ) + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", + "--hardlink-dupes") + + # Only level 0 and 1 has the same inode, level 2 produces + # a different bytecode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: + os.unlink(pyc_file) + + self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + + # Deduplication disabled, no hardlinks + self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + def test_hardlink_deduplication_recompilation(self): + path = os.path.join(self.directory, "test", "module_change") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module_change", + "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", + "--hardlink-dupes") + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module_change", + "print(0)") + + # Recompilation without -o 1 + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 2", "--hardlink-dupes") + + # opt-1.pyc should have the same inode as before and others should not + self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + + def test_hardlink_deduplication_import(self): + path = os.path.join(self.directory, "test", "module_import") + os.makedirs(path) + + simple_script = script_helper.make_script(path, "module", "a = 0") + pyc_opt0 = importlib.util.cache_from_source(simple_script) + pyc_opt1 = importlib.util.cache_from_source(simple_script, + optimization=1) + pyc_opt2 = importlib.util.cache_from_source(simple_script, + optimization=2) + + self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", + "--hardlink-dupes") + + # All three levels have the same inode + self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) + self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + + previous_inode = os.stat(pyc_opt0).st_ino + + # Change of the module content + simple_script = script_helper.make_script(path, "module", "print(0)") + + # Import the module in Python + script_helper.assert_python_ok( + "-O", "-c", "import module", __isolated=False, PYTHONPATH=path + ) + + # Only opt-1.pyc is changed + self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) + self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) + self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) + class CommandLineTestsWithSourceEpoch(CommandLineTestsBase, unittest.TestCase, diff --git a/Misc/ACKS b/Misc/ACKS index 9221f6aae439ea..5602e881538ca6 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -86,6 +86,7 @@ Marcin Bachry Alfonso Baciero Dwayne Bailey Stig Bakken +Lumír Balhar Aleksandr Balezin Greg Ball Lewis Ball diff --git a/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst b/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst new file mode 100644 index 00000000000000..65ee4a724b6ad1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst @@ -0,0 +1,2 @@ +:mod:`compileall` is now able to use hardlinks to prevent duplicates in a +case when pyc files for different optimization levels have the same content. From 7f8b63fe305c969aa45bfc488e3b7f45b142c9c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Wed, 6 May 2020 13:07:29 +0200 Subject: [PATCH 02/13] Update Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Miro Hrončok --- .../next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst b/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst index 65ee4a724b6ad1..d3049b05a78b6c 100644 --- a/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst +++ b/Misc/NEWS.d/next/Library/2020-05-04-11-20-49.bpo-40495.TyTc2O.rst @@ -1,2 +1,2 @@ :mod:`compileall` is now able to use hardlinks to prevent duplicates in a -case when pyc files for different optimization levels have the same content. +case when ``.pyc`` files for different optimization levels have the same content. From 6a9efa2b16f0a716fe0bec53bb0c9e9a3764852a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Tue, 12 May 2020 06:50:08 +0200 Subject: [PATCH 03/13] Update Doc/library/compileall.rst Co-authored-by: Victor Stinner --- Doc/library/compileall.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index 337f75acd3b9af..90b659357f1b1e 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -198,7 +198,7 @@ Public functions the ``-s``, ``-p`` and ``-e`` options described above. They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`. - If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates + If *hardlink_dupes* is true, hardlinks are used to prevent duplicates if ``.pyc`` files for multiple optimization levels have the same content. .. versionchanged:: 3.2 From e1ef909b356fff381a96a8910135253469641872 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Tue, 12 May 2020 06:50:20 +0200 Subject: [PATCH 04/13] Update Lib/compileall.py Co-authored-by: Victor Stinner --- Lib/compileall.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/compileall.py b/Lib/compileall.py index 5984058bdc9372..f52fa786447f56 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -182,8 +182,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, optimize = [optimize] if hardlink_dupes: - raise ValueError(("Hardlinking of duplicated bytecode makes sense " - "only for more than one optimization level.")) + raise ValueError("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level.") if rx is not None: mo = rx.search(fullname) From b314c5fe7a870e3748043fe22217aeb9261553ad Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 12 May 2020 06:59:38 +0200 Subject: [PATCH 05/13] remove debug code --- Lib/test/test_compileall.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index efc2d84f894f54..024f1312c42679 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -384,8 +384,6 @@ def test_hardlink_deduplication_same_bytecode_all_opt(self): compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], hardlink_dupes=True) - # import pdb; pdb.set_trace() - # All three files should have the same inode (hardlinks) self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) From e2f3a5080298fba9aa16408bcac61efa7133867d Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 12 May 2020 07:09:15 +0200 Subject: [PATCH 06/13] docs update --- Doc/library/compileall.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/compileall.rst b/Doc/library/compileall.rst index 90b659357f1b1e..a511c7eda265b2 100644 --- a/Doc/library/compileall.rst +++ b/Doc/library/compileall.rst @@ -115,8 +115,8 @@ compile Python sources. .. cmdoption:: --hardlink-dupes - Use hardlinks to prevent duplicates if ``.pyc`` files for multiple - optimization levels have the same content. + If two ``.pyc`` files with different optimization level have + the same content, use hard links to consolidate duplicate files. .. versionchanged:: 3.2 Added the ``-i``, ``-b`` and ``-h`` options. @@ -198,8 +198,8 @@ Public functions the ``-s``, ``-p`` and ``-e`` options described above. They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`. - If *hardlink_dupes* is true, hardlinks are used to prevent duplicates - if ``.pyc`` files for multiple optimization levels have the same content. + If *hardlink_dupes* is true and two ``.pyc`` files with different optimization + level have the same content, use hard links to consolidate duplicate files. .. versionchanged:: 3.2 Added the *legacy* and *optimize* parameter. @@ -265,8 +265,8 @@ Public functions the ``-s``, ``-p`` and ``-e`` options described above. They may be specified as ``str``, ``bytes`` or :py:class:`os.PathLike`. - If *hardlink_dupes* is ``True``, hardlinks are used to prevent duplicates - if ``.pyc`` files for multiple optimization levels have the same content. + If *hardlink_dupes* is true and two ``.pyc`` files with different optimization + level have the same content, use hard links to consolidate duplicate files. .. versionadded:: 3.2 From 4607d08cca851d914d9eac16119592a95544ae6e Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 12 May 2020 07:45:05 +0200 Subject: [PATCH 07/13] use is_hardlink to check inodes instead of repeating code --- Lib/test/test_compileall.py | 50 +++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index 024f1312c42679..00b04f48e3acd1 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -74,6 +74,12 @@ def recreation_check(self, metadata): compileall.compile_dir(self.directory, force=False, quiet=True) self.assertTrue(*self.timestamp_metadata()) + def is_hardlink(self, filename1, filename2): + """Returns True if two files have the same inode (hardlink)""" + inode1 = os.stat(filename1).st_ino + inode2 = os.stat(filename2).st_ino + return inode1 == inode2 + def test_mtime(self): # Test a change in mtime leads to a new .pyc. self.recreation_check(struct.pack('<4sll', importlib.util.MAGIC_NUMBER, @@ -385,8 +391,8 @@ def test_hardlink_deduplication_same_bytecode_all_opt(self): hardlink_dupes=True) # All three files should have the same inode (hardlinks) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: os.unlink(pyc_file) @@ -395,8 +401,8 @@ def test_hardlink_deduplication_same_bytecode_all_opt(self): hardlink_dupes=False) # Deduplication disabled, all pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) def test_hardlink_deduplication_same_bytecode_some_opt(self): # 'a = 0' produces the same bytecode for all optimization levels @@ -414,7 +420,7 @@ def test_hardlink_deduplication_same_bytecode_some_opt(self): hardlink_dupes=True) # Both files should have the same inode (hardlink) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt2)) for pyc_file in {pyc_opt0, pyc_opt2}: os.unlink(pyc_file) @@ -423,7 +429,7 @@ def test_hardlink_deduplication_same_bytecode_some_opt(self): hardlink_dupes=False) # Deduplication disabled, both pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt2)) def test_hardlink_deduplication_same_bytecode_some_opt_2(self): # 'a = 0' produces the same bytecode for all optimization levels @@ -441,7 +447,7 @@ def test_hardlink_deduplication_same_bytecode_some_opt_2(self): hardlink_dupes=True) # Both files should have the same inode (hardlinks) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) for pyc_file in {pyc_opt1, pyc_opt2}: os.unlink(pyc_file) @@ -449,7 +455,7 @@ def test_hardlink_deduplication_same_bytecode_some_opt_2(self): compileall.compile_dir(path, quiet=True, optimize=[1, 2]) # Deduplication disabled, all pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) def test_hardlink_deduplication_different_bytecode_all_opt(self): # "'''string'''\nassert 1" produces a different bytecode for @@ -470,8 +476,8 @@ def test_hardlink_deduplication_different_bytecode_all_opt(self): hardlink_dupes=True) # No hardlinks, bytecodes are different - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: os.unlink(pyc_file) @@ -480,8 +486,8 @@ def test_hardlink_deduplication_different_bytecode_all_opt(self): hardlink_dupes=False) # Disabling hardlink deduplication makes no difference - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) def test_hardlink_deduplication_different_bytecode_one_hardlink(self): # "'''string'''\na = 1" produces the same bytecode only @@ -503,8 +509,8 @@ def test_hardlink_deduplication_different_bytecode_one_hardlink(self): # Only level 0 and 1 has the same inode, level 2 produces # a different bytecode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: os.unlink(pyc_file) @@ -513,8 +519,8 @@ def test_hardlink_deduplication_different_bytecode_one_hardlink(self): hardlink_dupes=False) # Deduplication disabled, no hardlinks - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) def test_hardlink_deduplication_recompilation(self): path = os.path.join(self.directory, "test", "module_change") @@ -532,8 +538,8 @@ def test_hardlink_deduplication_recompilation(self): hardlink_dupes=True) # All three levels have the same inode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) previous_inode = os.stat(pyc_opt0).st_ino @@ -547,7 +553,7 @@ def test_hardlink_deduplication_recompilation(self): # opt-1.pyc should have the same inode as before and others should not self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt2)) self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) # opt-1.pyc and opt-2.pyc have different content self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) @@ -567,8 +573,8 @@ def test_hardlink_deduplication_import(self): hardlink_dupes=True) # All three levels have the same inode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) + self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) previous_inode = os.stat(pyc_opt0).st_ino @@ -583,7 +589,7 @@ def test_hardlink_deduplication_import(self): # Only opt-1.pyc is changed self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) # opt-1.pyc and opt-2.pyc have different content self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) From 4fb779a6a2cda448f22424c9421593ea92226fb0 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 12 May 2020 08:09:12 +0200 Subject: [PATCH 08/13] use subTest to parametrize three tests with different combinations of opt levels --- Lib/test/test_compileall.py | 96 +++++++++++-------------------------- 1 file changed, 27 insertions(+), 69 deletions(-) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index 00b04f48e3acd1..9f006048f2ae96 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -12,6 +12,7 @@ import unittest import io import filecmp +import itertools from unittest import mock, skipUnless try: @@ -374,88 +375,45 @@ def test_hardlink_deduplication_bad_args(self): compileall.compile_dir(self.directory, quiet=True, optimize=0, hardlink_dupes=True) - def test_hardlink_deduplication_same_bytecode_all_opt(self): - # 'a = 0' produces the same bytecode for all optimization levels - path = os.path.join(self.directory, "test", "same_all") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=True) - - # All three files should have the same inode (hardlinks) - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) - - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=False) - - # Deduplication disabled, all pyc files should have different inodes - self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - - def test_hardlink_deduplication_same_bytecode_some_opt(self): + def test_hardlink_deduplication_same_bytecode(self): # 'a = 0' produces the same bytecode for all optimization levels - # only two levels of optimization [0, 1] tested - path = os.path.join(self.directory, "test", "same_some") + path = os.path.join(self.directory, "test", "same") os.makedirs(path) simple_script = script_helper.make_script(path, "test_same_bytecode", "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 2], - hardlink_dupes=True) - - # Both files should have the same inode (hardlink) - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt2)) - - for pyc_file in {pyc_opt0, pyc_opt2}: - os.unlink(pyc_file) - compileall.compile_dir(path, quiet=True, force=True, optimize=[0, 2], - hardlink_dupes=False) - - # Deduplication disabled, both pyc files should have different inodes - self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt2)) + opt_combinations = ((0, 1, 2), (1, 2), (0, 2)) - def test_hardlink_deduplication_same_bytecode_some_opt_2(self): - # 'a = 0' produces the same bytecode for all optimization levels - path = os.path.join(self.directory, "test", "same_some_2") - os.makedirs(path) + for opt_combination in opt_combinations: + with self.subTest(opt_combination=opt_combination): - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) + pycs = {} + for opt_level in opt_combination: + pycs[opt_level] = importlib.util.cache_from_source( + simple_script, optimization=opt_level + ) - compileall.compile_dir(path, quiet=True, optimize=[1, 2], - hardlink_dupes=True) + compileall.compile_dir( + path, quiet=True, optimize=opt_combination, + hardlink_dupes=True + ) - # Both files should have the same inode (hardlinks) - self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) + # All three files should have the same inode (hardlinks) + for pair in itertools.combinations(opt_combination, 2): + self.assertTrue(self.is_hardlink(pycs[pair[0]], pycs[pair[1]])) - for pyc_file in {pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) + for pyc_file in pycs.values(): + os.unlink(pyc_file) - compileall.compile_dir(path, quiet=True, optimize=[1, 2]) + compileall.compile_dir( + path, quiet=True, optimize=opt_combination, + hardlink_dupes=False + ) - # Deduplication disabled, all pyc files should have different inodes - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) + # Deduplication disabled, all pyc files should have different inodes + for pair in itertools.combinations(opt_combination, 2): + self.assertFalse(self.is_hardlink(pycs[pair[0]], pycs[pair[1]])) def test_hardlink_deduplication_different_bytecode_all_opt(self): # "'''string'''\nassert 1" produces a different bytecode for From 97b057edc7165492522ba0226cd40296c51c5ec5 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Tue, 12 May 2020 08:58:50 +0200 Subject: [PATCH 09/13] fix tests --- Lib/test/test_compileall.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index 9f006048f2ae96..d5b615e9b80c7c 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -390,8 +390,12 @@ def test_hardlink_deduplication_same_bytecode(self): pycs = {} for opt_level in opt_combination: + # We need this because importlib.util.cache_from_source + # produces different results when called with + # optimization=0 and without optimization + optimization_kwarg = {"optimization": opt_level} if opt_level > 0 else {} pycs[opt_level] = importlib.util.cache_from_source( - simple_script, optimization=opt_level + simple_script, **optimization_kwarg ) compileall.compile_dir( @@ -415,6 +419,9 @@ def test_hardlink_deduplication_same_bytecode(self): for pair in itertools.combinations(opt_combination, 2): self.assertFalse(self.is_hardlink(pycs[pair[0]], pycs[pair[1]])) + for pyc_file in pycs.values(): + os.unlink(pyc_file) + def test_hardlink_deduplication_different_bytecode_all_opt(self): # "'''string'''\nassert 1" produces a different bytecode for # all optimization levels From 9ca6eae44a8e13207caff3ea657dd6d8ebc5f2de Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 13 May 2020 16:24:47 +0200 Subject: [PATCH 10/13] Refactor tests * Add HardlinkDedupTestsBase test case. * Only keep two tests on the command line interface since most tests were duplicated with HardlinkDedupTestsBase. * Add helper functions and methods to factorize the code. * Sort imports * Replace 415 lines of tests with 205 lines: 2x smaller. --- Lib/test/test_compileall.py | 612 ++++++++++++------------------------ 1 file changed, 199 insertions(+), 413 deletions(-) diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index d5b615e9b80c7c..a277b638c5d917 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -1,18 +1,19 @@ -import sys import compileall +import contextlib +import filecmp import importlib.util -import test.test_importlib.util +import io +import itertools import os import pathlib import py_compile import shutil import struct +import sys import tempfile +import test.test_importlib.util import time import unittest -import io -import filecmp -import itertools from unittest import mock, skipUnless try: @@ -28,6 +29,24 @@ from .test_py_compile import SourceDateEpochTestMeta +def get_pyc(script, opt): + if not opt: + # Replace None and 0 with '' + opt = '' + return importlib.util.cache_from_source(script, optimization=opt) + + +def get_pycs(script): + return [get_pyc(script, opt) for opt in (0, 1, 2)] + + +def is_hardlink(filename1, filename2): + """Returns True if two files have the same inode (hardlink)""" + inode1 = os.stat(filename1).st_ino + inode2 = os.stat(filename2).st_ino + return inode1 == inode2 + + class CompileallTestsBase: def setUp(self): @@ -75,12 +94,6 @@ def recreation_check(self, metadata): compileall.compile_dir(self.directory, force=False, quiet=True) self.assertTrue(*self.timestamp_metadata()) - def is_hardlink(self, filename1, filename2): - """Returns True if two files have the same inode (hardlink)""" - inode1 = os.stat(filename1).st_ino - inode2 = os.stat(filename2).st_ino - return inode1 == inode2 - def test_mtime(self): # Test a change in mtime leads to a new .pyc. self.recreation_check(struct.pack('<4sll', importlib.util.MAGIC_NUMBER, @@ -368,196 +381,6 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) - def test_hardlink_deduplication_bad_args(self): - # Bad arguments combination, hardlink deduplication make sense - # only for more than one optimization level - with self.assertRaises(ValueError): - compileall.compile_dir(self.directory, quiet=True, optimize=0, - hardlink_dupes=True) - - def test_hardlink_deduplication_same_bytecode(self): - # 'a = 0' produces the same bytecode for all optimization levels - path = os.path.join(self.directory, "test", "same") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - - opt_combinations = ((0, 1, 2), (1, 2), (0, 2)) - - for opt_combination in opt_combinations: - with self.subTest(opt_combination=opt_combination): - - pycs = {} - for opt_level in opt_combination: - # We need this because importlib.util.cache_from_source - # produces different results when called with - # optimization=0 and without optimization - optimization_kwarg = {"optimization": opt_level} if opt_level > 0 else {} - pycs[opt_level] = importlib.util.cache_from_source( - simple_script, **optimization_kwarg - ) - - compileall.compile_dir( - path, quiet=True, optimize=opt_combination, - hardlink_dupes=True - ) - - # All three files should have the same inode (hardlinks) - for pair in itertools.combinations(opt_combination, 2): - self.assertTrue(self.is_hardlink(pycs[pair[0]], pycs[pair[1]])) - - for pyc_file in pycs.values(): - os.unlink(pyc_file) - - compileall.compile_dir( - path, quiet=True, optimize=opt_combination, - hardlink_dupes=False - ) - - # Deduplication disabled, all pyc files should have different inodes - for pair in itertools.combinations(opt_combination, 2): - self.assertFalse(self.is_hardlink(pycs[pair[0]], pycs[pair[1]])) - - for pyc_file in pycs.values(): - os.unlink(pyc_file) - - def test_hardlink_deduplication_different_bytecode_all_opt(self): - # "'''string'''\nassert 1" produces a different bytecode for - # all optimization levels - path = os.path.join(self.directory, "test", "different_all") - os.makedirs(path) - - simple_script = script_helper.make_script( - path, "test_different_bytecode", "'''string'''\nassert 1" - ) - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=True) - - # No hardlinks, bytecodes are different - self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=False) - - # Disabling hardlink deduplication makes no difference - self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - - def test_hardlink_deduplication_different_bytecode_one_hardlink(self): - # "'''string'''\na = 1" produces the same bytecode only - # for level 0 and 1 - path = os.path.join(self.directory, "test", "different_one") - os.makedirs(path) - - simple_script = script_helper.make_script( - path, "test_different_bytecode", "'''string'''\na = 1" - ) - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=True) - - # Only level 0 and 1 has the same inode, level 2 produces - # a different bytecode - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=False) - - # Deduplication disabled, no hardlinks - self.assertFalse(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - - def test_hardlink_deduplication_recompilation(self): - path = os.path.join(self.directory, "test", "module_change") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "module_change", - "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=True) - - # All three levels have the same inode - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) - - previous_inode = os.stat(pyc_opt0).st_ino - - # Change of the module content - simple_script = script_helper.make_script(path, "module_change", - "print(0)") - - # Recompilation without -o 1 - compileall.compile_dir(path, force=True, quiet=True, optimize=[0, 2], - hardlink_dupes=True) - - # opt-1.pyc should have the same inode as before and others should not - self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt2)) - self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) - # opt-1.pyc and opt-2.pyc have different content - self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) - - def test_hardlink_deduplication_import(self): - path = os.path.join(self.directory, "test", "module_import") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "module", "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - compileall.compile_dir(path, quiet=True, optimize=[0, 1, 2], - hardlink_dupes=True) - - # All three levels have the same inode - self.assertTrue(self.is_hardlink(pyc_opt0, pyc_opt1)) - self.assertTrue(self.is_hardlink(pyc_opt1, pyc_opt2)) - - previous_inode = os.stat(pyc_opt0).st_ino - - # Change of the module content - simple_script = script_helper.make_script(path, "module", "print(0)") - - # Import the module in Python - script_helper.assert_python_ok( - "-O", "-c", "import module", __isolated=False, PYTHONPATH=path - ) - - # Only opt-1.pyc is changed - self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) - self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) - self.assertFalse(self.is_hardlink(pyc_opt1, pyc_opt2)) - # opt-1.pyc and opt-2.pyc have different content - self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) - class CompileallTestsWithSourceEpoch(CompileallTestsBase, unittest.TestCase, @@ -1023,238 +846,201 @@ def test_ignore_symlink_destination(self): self.assertTrue(os.path.isfile(allowed_bc)) self.assertFalse(os.path.isfile(prohibited_bc)) - def test_hardlink_deduplication_bad_args(self): + def test_hardlink_bad_args(self): # Bad arguments combination, hardlink deduplication make sense # only for more than one optimization level - self.assertRunNotOK(self.directory, "-o 1", "--hardlink_dupes") - - def test_hardlink_deduplication_same_bytecode_all_opt(self): - # 'a = 0' produces the same bytecode for all optimization levels - path = os.path.join(self.directory, "test", "same_all") - os.makedirs(path) + self.assertRunNotOK(self.directory, "-o 1", "--hardlink-dupes") + + def test_hardlink(self): + # 'a = 0' code produces the same bytecode for the 3 optimization + # levels. All three .pyc files must have the same inode (hardlinks). + # + # If deduplication is disabled, all pyc files must have different + # inodes. + for dedup in (True, False): + with tempfile.TemporaryDirectory() as path: + with self.subTest(dedup=dedup): + script = script_helper.make_script(path, "script", "a = 0") + pycs = get_pycs(script) + + args = ["-q", "-o 0", "-o 1", "-o 2"] + if dedup: + args.append("--hardlink-dupes") + self.assertRunOK(path, *args) + + self.assertEqual(is_hardlink(pycs[0], pycs[1]), dedup) + self.assertEqual(is_hardlink(pycs[1], pycs[2]), dedup) + self.assertEqual(is_hardlink(pycs[0], pycs[2]), dedup) - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", - "--hardlink-dupes") - - # All three files should have the same inode (hardlinks) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") - - # Deduplication disabled, all pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - def test_hardlink_deduplication_same_bytecode_some_opt(self): - # 'a = 0' produces the same bytecode for all optimization levels - # only two levels of optimization [0, 1] tested - path = os.path.join(self.directory, "test", "same_some") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - self.assertRunOK(path, "-q", "-o 0", "-o 2", "--hardlink-dupes") - - # Both files should have the same inode (hardlink) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) - - for pyc_file in {pyc_opt0, pyc_opt2}: - os.unlink(pyc_file) - - self.assertRunOK(path, "-q", "-o 0", "-o 2") - - # Deduplication disabled, both pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) - - def test_hardlink_deduplication_same_bytecode_some_opt_2(self): - # 'a = 0' produces the same bytecode for all optimization levels - path = os.path.join(self.directory, "test", "same_some_2") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "test_same_bytecode", - "a = 0") - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - self.assertRunOK(path, "-q", "-o 1", "-o 2", "--hardlink-dupes") - - # Both files should have the same inode (hardlinks) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - for pyc_file in {pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - - self.assertRunOK(path, "-q", "-o 1", "-o 2") - - # Deduplication disabled, all pyc files should have different inodes - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - def test_hardlink_deduplication_different_bytecode_all_opt(self): - # "'''string'''\nassert 1" produces a different bytecode for - # all optimization levels - path = os.path.join(self.directory, "test", "different_all") - os.makedirs(path) - - simple_script = script_helper.make_script(path, - "test_different_bytecode", - "'''string'''\nassert 1") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", - "--hardlink-dupes") - - # No hardlinks, bytecodes are different - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) +class CommandLineTestsWithSourceEpoch(CommandLineTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): + pass - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") +class CommandLineTestsNoSourceEpoch(CommandLineTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=False): + pass - # Disabling hardlink deduplication makes no difference - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - def test_hardlink_deduplication_different_bytecode_one_hardlink(self): - # "'''string'''\na = 1" produces the same bytecode only - # for level 0 and 1 - path = os.path.join(self.directory, "test", "different_one") - os.makedirs(path) - simple_script = script_helper.make_script( - path, "test_different_bytecode", "'''string'''\na = 1" - ) - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) +class HardlinkDedupTestsBase: + # Test hardlink_dupes parameter of compileall.compile_dir() - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2", - "--hardlink-dupes") + def setUp(self): + self.path = None - # Only level 0 and 1 has the same inode, level 2 produces - # a different bytecode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) + @contextlib.contextmanager + def temporary_directory(self): + with tempfile.TemporaryDirectory() as path: + self.path = path + yield path + self.path = None - for pyc_file in {pyc_opt0, pyc_opt1, pyc_opt2}: - os.unlink(pyc_file) + def make_script(self, code, name="script"): + return script_helper.make_script(self.path, name, code) - self.assertRunOK(path, "-q", "-o 0", "-o 1", "-o 2") + def compile_dir(self, *, dedup=True, optimize=(0, 1, 2), force=False): + compileall.compile_dir(self.path, quiet=True, optimize=optimize, + hardlink_dupes=dedup, force=force) + def test_bad_args(self): + # Bad arguments combination, hardlink deduplication make sense + # only for more than one optimization level + with self.assertRaises(ValueError): + with self.temporary_directory(): + self.make_script("pass") + compileall.compile_dir(self.path, quiet=True, optimize=0, + hardlink_dupes=True) + + def create_code(self, docstring=False, assertion=False): + lines = [] + if docstring: + lines.append("'module docstring'") + lines.append('x = 1') + if assertion: + lines.append("assert x == 1") + return '\n'.join(lines) + + def iter_codes(self): + for docstring in (False, True): + for assertion in (False, True): + code = self.create_code(docstring=docstring, assertion=assertion) + yield (code, docstring, assertion) + + def test_disabled(self): # Deduplication disabled, no hardlinks - self.assertNotEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - def test_hardlink_deduplication_recompilation(self): - path = os.path.join(self.directory, "test", "module_change") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "module_change", - "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", - "--hardlink-dupes") - - # All three levels have the same inode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - previous_inode = os.stat(pyc_opt0).st_ino - - # Change of the module content - simple_script = script_helper.make_script(path, "module_change", - "print(0)") - - # Recompilation without -o 1 - self.assertRunOK(path, "-f", "-q", "-o 0", "-o 2", "--hardlink-dupes") - - # opt-1.pyc should have the same inode as before and others should not - self.assertEqual(previous_inode, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt2).st_ino) - self.assertNotEqual(previous_inode, os.stat(pyc_opt2).st_ino) - # opt-1.pyc and opt-2.pyc have different content - self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) - - def test_hardlink_deduplication_import(self): - path = os.path.join(self.directory, "test", "module_import") - os.makedirs(path) - - simple_script = script_helper.make_script(path, "module", "a = 0") - pyc_opt0 = importlib.util.cache_from_source(simple_script) - pyc_opt1 = importlib.util.cache_from_source(simple_script, - optimization=1) - pyc_opt2 = importlib.util.cache_from_source(simple_script, - optimization=2) - - self.assertRunOK(path, "-f", "-q", "-o 0", "-o 1", "-o 2", - "--hardlink-dupes") - - # All three levels have the same inode - self.assertEqual(os.stat(pyc_opt0).st_ino, os.stat(pyc_opt1).st_ino) - self.assertEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - - previous_inode = os.stat(pyc_opt0).st_ino - - # Change of the module content - simple_script = script_helper.make_script(path, "module", "print(0)") - - # Import the module in Python - script_helper.assert_python_ok( - "-O", "-c", "import module", __isolated=False, PYTHONPATH=path - ) - - # Only opt-1.pyc is changed - self.assertEqual(previous_inode, os.stat(pyc_opt0).st_ino) - self.assertEqual(previous_inode, os.stat(pyc_opt2).st_ino) - self.assertNotEqual(os.stat(pyc_opt1).st_ino, os.stat(pyc_opt2).st_ino) - # opt-1.pyc and opt-2.pyc have different content - self.assertFalse(filecmp.cmp(pyc_opt1, pyc_opt2, shallow=True)) - - -class CommandLineTestsWithSourceEpoch(CommandLineTestsBase, - unittest.TestCase, - metaclass=SourceDateEpochTestMeta, - source_date_epoch=True): + for code, docstring, assertion in self.iter_codes(): + with self.subTest(docstring=docstring, assertion=assertion): + with self.temporary_directory(): + script = self.make_script(code) + pycs = get_pycs(script) + self.compile_dir(dedup=False) + self.assertFalse(is_hardlink(pycs[0], pycs[1])) + self.assertFalse(is_hardlink(pycs[0], pycs[2])) + self.assertFalse(is_hardlink(pycs[1], pycs[2])) + + def check_hardlinks(self, script, docstring=False, assertion=False): + pycs = get_pycs(script) + self.assertEqual(is_hardlink(pycs[0], pycs[1]), + not assertion) + self.assertEqual(is_hardlink(pycs[0], pycs[2]), + not assertion and not docstring) + self.assertEqual(is_hardlink(pycs[1], pycs[2]), + not docstring) + + def test_hardlink(self): + # Test deduplication on all combinations + for code, docstring, assertion in self.iter_codes(): + with self.subTest(docstring=docstring, assertion=assertion): + with self.temporary_directory(): + script = self.make_script(code) + self.compile_dir() + self.check_hardlinks(script, docstring, assertion) + + def test_only_two_levels(self): + # Don't build the 3 optimization levels, but only 2 + for opts in ((0, 1), (1, 2), (0, 2)): + with self.subTest(opts=opts): + with self.temporary_directory(): + # code with no dostring and no assertion: + # same bytecode for all optimization levels + script = self.make_script(self.create_code()) + self.compile_dir(optimize=opts) + pyc1 = get_pyc(script, opts[0]) + pyc2 = get_pyc(script, opts[1]) + self.assertTrue(is_hardlink(pyc1, pyc2)) + + def test_recompilation(self): + # Test compile_dir() when pyc files already exists and the script + # content changed + with self.temporary_directory(): + script = self.make_script("a = 0") + self.compile_dir() + # All three levels have the same inode + self.check_hardlinks(script) + + pycs = get_pycs(script) + inode = os.stat(pycs[0]).st_ino + + # Change of the module content + script = self.make_script("print(0)") + + # Recompilation without -o 1 + self.compile_dir(optimize=[0, 2], force=True) + + # opt-1.pyc should have the same inode as before and others should not + self.assertEqual(inode, os.stat(pycs[1]).st_ino) + self.assertTrue(is_hardlink(pycs[0], pycs[2])) + self.assertNotEqual(inode, os.stat(pycs[2]).st_ino) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True)) + + def test_import(self): + # Test that import updates a single pyc file when pyc files already + # exists and the script content changed + with self.temporary_directory(): + script = self.make_script(self.create_code(), name="module") + self.compile_dir() + # All three levels have the same inode + self.check_hardlinks(script) + + pycs = get_pycs(script) + inode = os.stat(pycs[0]).st_ino + + # Change of the module content + script = self.make_script("print(0)", name="module") + + # Import the module in Python with -O (optimization level 1) + script_helper.assert_python_ok( + "-O", "-c", "import module", __isolated=False, PYTHONPATH=self.path + ) + + # Only opt-1.pyc is changed + self.assertEqual(inode, os.stat(pycs[0]).st_ino) + self.assertEqual(inode, os.stat(pycs[2]).st_ino) + self.assertFalse(is_hardlink(pycs[1], pycs[2])) + # opt-1.pyc and opt-2.pyc have different content + self.assertFalse(filecmp.cmp(pycs[1], pycs[2], shallow=True)) + + +class HardlinkDedupTestsWithSourceEpoch(HardlinkDedupTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=True): pass -class CommandLineTestsNoSourceEpoch(CommandLineTestsBase, - unittest.TestCase, - metaclass=SourceDateEpochTestMeta, - source_date_epoch=False): +class HardlinkDedupTestsNoSourceEpoch(HardlinkDedupTestsBase, + unittest.TestCase, + metaclass=SourceDateEpochTestMeta, + source_date_epoch=False): pass - if __name__ == "__main__": unittest.main() From b0063618535c5c4aeade1213af84c5d7b67de307 Mon Sep 17 00:00:00 2001 From: Lumir Balhar Date: Thu, 14 May 2020 14:50:09 +0200 Subject: [PATCH 11/13] Updated Whatsnew --- Doc/whatsnew/3.9.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index cefaf5715d4143..abfd8f85fc47a6 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -245,6 +245,16 @@ that schedules a shutdown for the default executor that waits on the Added :class:`asyncio.PidfdChildWatcher`, a Linux-specific child watcher implementation that polls process file descriptors. (:issue:`38692`) +compileall +---------- + +Added new possibility to use hardlinks for duplicated ``.pyc`` files: *hardlink_dupes* parameter and --hardlink-dupes command line option. +(Contributed by Lumír 'Frenzy' Balhar in :issue:`40495`.) + +Added new options for path manipulation in resulting ``.pyc`` files: *stripdir*, *prependdir*, *limit_sl_dest* parameters and -s, -p, -e command line options. +Added the possibility to specify the option for an optimization level multiple times. +(Contributed by Lumír 'Frenzy' Balhar in :issue:`38112`.) + concurrent.futures ------------------ From 45259b27f6bf66d7f31095cc10e0453090a3b52d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2020 15:12:38 +0200 Subject: [PATCH 12/13] Update Lib/compileall.py --- Lib/compileall.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/compileall.py b/Lib/compileall.py index f52fa786447f56..8f9ee2bcfdd52b 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -183,7 +183,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if hardlink_dupes: raise ValueError("Hardlinking of duplicated bytecode makes sense " - "only for more than one optimization level.") + "only for more than one optimization level") if rx is not None: mo = rx.search(fullname) From 7e92096fe6a6c675dbacff88c3e8e44596d8ce66 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2020 15:35:47 +0200 Subject: [PATCH 13/13] Remove duplicated optimization levels --- Lib/compileall.py | 12 ++++++++---- Lib/test/test_compileall.py | 23 ++++++++++++++++++++--- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/Lib/compileall.py b/Lib/compileall.py index 8f9ee2bcfdd52b..fe7f450c55e1c5 100644 --- a/Lib/compileall.py +++ b/Lib/compileall.py @@ -181,9 +181,13 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if isinstance(optimize, int): optimize = [optimize] - if hardlink_dupes: - raise ValueError("Hardlinking of duplicated bytecode makes sense " - "only for more than one optimization level") + # Use set() to remove duplicates. + # Use sorted() to create pyc files in a deterministic order. + optimize = sorted(set(optimize)) + + if hardlink_dupes and len(optimize) < 2: + raise ValueError("Hardlinking of duplicated bytecode makes sense " + "only for more than one optimization level") if rx is not None: mo = rx.search(fullname) @@ -229,7 +233,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0, if not quiet: print('Compiling {!r}...'.format(fullname)) try: - for index, opt_level in enumerate(sorted(optimize)): + for index, opt_level in enumerate(optimize): cfile = opt_cfiles[opt_level] ok = py_compile.compile(fullname, cfile, dfile, True, optimize=opt_level, diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py index a277b638c5d917..b4061b79357b87 100644 --- a/Lib/test/test_compileall.py +++ b/Lib/test/test_compileall.py @@ -911,11 +911,16 @@ def compile_dir(self, *, dedup=True, optimize=(0, 1, 2), force=False): def test_bad_args(self): # Bad arguments combination, hardlink deduplication make sense # only for more than one optimization level - with self.assertRaises(ValueError): - with self.temporary_directory(): - self.make_script("pass") + with self.temporary_directory(): + self.make_script("pass") + with self.assertRaises(ValueError): compileall.compile_dir(self.path, quiet=True, optimize=0, hardlink_dupes=True) + with self.assertRaises(ValueError): + # same optimization level specified twice: + # compile_dir() removes duplicates + compileall.compile_dir(self.path, quiet=True, optimize=[0, 0], + hardlink_dupes=True) def create_code(self, docstring=False, assertion=False): lines = [] @@ -975,6 +980,18 @@ def test_only_two_levels(self): pyc2 = get_pyc(script, opts[1]) self.assertTrue(is_hardlink(pyc1, pyc2)) + def test_duplicated_levels(self): + # compile_dir() must not fail if optimize contains duplicated + # optimization levels and/or if optimization levels are not sorted. + with self.temporary_directory(): + # code with no dostring and no assertion: + # same bytecode for all optimization levels + script = self.make_script(self.create_code()) + self.compile_dir(optimize=[1, 0, 1, 0]) + pyc1 = get_pyc(script, 0) + pyc2 = get_pyc(script, 1) + self.assertTrue(is_hardlink(pyc1, pyc2)) + def test_recompilation(self): # Test compile_dir() when pyc files already exists and the script # content changed pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy