From 00b93c411bf3c6cbfc3d4a40e6dfa5df6889591b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 30 Jun 2019 16:46:11 -0700 Subject: [PATCH 1/7] bpo-37424: Have subprocess use killpg for some timeout kills. When shell=True the user is often launching a process tree, if we kill only the shell and it hasn't set itself up as a pgrp leader, the grandchild processes still run and may have our output handles open. Leading us to wait forever when we should be timing out. This Adds a fractional timeout on the cleanup communicate phase _and_ will use killpg on the child process group _if_ it is different from our own as it should be when start_new_session=True was used. --- Lib/subprocess.py | 28 ++++++++++++++++++++++++++-- Lib/test/test_subprocess.py | 24 ++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 5bbeba47a37432..d1a36277dcbd48 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -491,7 +491,13 @@ def run(*popenargs, stdout, stderr = process.communicate(input, timeout=timeout) except TimeoutExpired: process.kill() - stdout, stderr = process.communicate() + # The timeout here is to avoid waiting around _forever_ if + # the kill failed to deal with all processes holding the + # output file handles open. Otherwise we could hang until + # those processes terminate before continuing with our timeout. + # See https://bugs.python.org/issue37424. + stdout, stderr = process.communicate( + timeout=_get_cleanup_timeout(timeout)) raise TimeoutExpired(process.args, timeout, output=stdout, stderr=stderr) except: # Including KeyboardInterrupt, communicate handled that. @@ -505,6 +511,11 @@ def run(*popenargs, return CompletedProcess(process.args, retcode, stdout, stderr) +def _get_cleanup_timeout(timeout): # For test injection. + if timeout is not None: + return timeout/20 + + def list2cmdline(seq): """ Translate a sequence of arguments into a command line @@ -743,6 +754,11 @@ def __init__(self, args, bufsize=-1, executable=None, if not isinstance(bufsize, int): raise TypeError("bufsize must be an integer") + # If True, kill/terminate/send_signal will send the signal to + # os.getpgid(self.pid) on platforms with that concept IF the + # group is not our own process group. + self.__signal_process_group = shell and hasattr(os, 'getpgid') + if _mswindows: if preexec_fn is not None: raise ValueError("preexec_fn is not supported on Windows " @@ -1910,7 +1926,15 @@ def send_signal(self, sig): """Send a signal to the process.""" # Skip signalling a process that we know has already died. if self.returncode is None: - os.kill(self.pid, sig) + if self.__signal_process_group: + pgid = os.getpgid(self.pid) + if pgid == os.getpgid(os.getpid()): + # Never killpg our own process group. + os.kill(self.pid, sig) + else: + os.killpg(pgid, sig) + else: + os.kill(self.pid, sig) def terminate(self): """Terminate the process with SIGTERM diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 6b8acb258ee36e..ebe785013bafce 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -11,6 +11,7 @@ import errno import tempfile import time +import traceback import selectors import sysconfig import select @@ -1564,6 +1565,29 @@ def test_stderr_with_capture_output_arg(self): self.assertIn('stderr', c.exception.args[0]) self.assertIn('capture_output', c.exception.args[0]) + @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") + @mock.patch("subprocess._get_cleanup_timeout") + def test_run_with_shell_timeout_and_capture_output( + self, mock_get_cleanup_timeout): + """Reproduce https://bugs.python.org/issue37424.""" + # This test is about ensuring that the cleanup_timeout was not + # needed, that the grandchild process holding the output handles + # open actually died. Thus we force a high cleanup time that'll + # obviously fail our timing test. + mock_get_cleanup_timeout.return_value = 3.1415926 + before_secs = time.monotonic() + try: + subprocess.run('sleep 4', shell=True, timeout=0.1, + capture_output=True, start_new_session=True) + except subprocess.TimeoutExpired as exc: + after_secs = time.monotonic() + stacks = traceback.format_exc() # assertRaises doesn't give this. + else: + self.fail("TimeoutExpired not raised.") + self.assertLess(after_secs - before_secs, 2, + msg="TimeoutExpired was delayed! Bad traceback:\n```\n" + f"{stacks}```") + @unittest.skipIf(mswindows, "POSIX specific tests") class POSIXProcessTestCase(BaseTestCase): From be807f6781547cbfcaec45cb580993391ad47484 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 30 Jun 2019 16:53:54 -0700 Subject: [PATCH 2/7] Adds a test for the no session scenario (common). --- Lib/test/test_subprocess.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index ebe785013bafce..72a95bbd4b843e 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1567,9 +1567,9 @@ def test_stderr_with_capture_output_arg(self): @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") @mock.patch("subprocess._get_cleanup_timeout") - def test_run_with_shell_timeout_and_capture_output( + def test_run_with_shell_timeout_and_capture_output_explicit_session( self, mock_get_cleanup_timeout): - """Reproduce https://bugs.python.org/issue37424.""" + """Test from https://bugs.python.org/issue37424 with a session.""" # This test is about ensuring that the cleanup_timeout was not # needed, that the grandchild process holding the output handles # open actually died. Thus we force a high cleanup time that'll @@ -1588,6 +1588,22 @@ def test_run_with_shell_timeout_and_capture_output( msg="TimeoutExpired was delayed! Bad traceback:\n```\n" f"{stacks}```") + @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") + def test_run_with_shell_timeout_and_capture_output(self): + """Output capturing after a timeout mustn't hang forever on open filehandles.""" + before_secs = time.monotonic() + try: + subprocess.run('sleep 3', shell=True, timeout=0.1, + capture_output=True) # New session unspecified. + except subprocess.TimeoutExpired as exc: + after_secs = time.monotonic() + stacks = traceback.format_exc() # assertRaises doesn't give this. + else: + self.fail("TimeoutExpired not raised.") + self.assertLess(after_secs - before_secs, 1.5, + msg="TimeoutExpired was delayed! Bad traceback:\n```\n" + f"{stacks}```") + @unittest.skipIf(mswindows, "POSIX specific tests") class POSIXProcessTestCase(BaseTestCase): From ef8406a16f3a260841dc160a773dfd141262325f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 4 Jul 2019 11:39:32 -0700 Subject: [PATCH 3/7] Simplify this a lot, do not use magic process groups. Do not have a post-timeout sub-timeout for output collection, we've already got all of that output within POSIX's _communicate() method as it is. --- Lib/subprocess.py | 47 +++++++++++++++++++++---------------- Lib/test/test_subprocess.py | 8 ++----- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/Lib/subprocess.py b/Lib/subprocess.py index d1a36277dcbd48..187fcdec4f197b 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -489,17 +489,20 @@ def run(*popenargs, with Popen(*popenargs, **kwargs) as process: try: stdout, stderr = process.communicate(input, timeout=timeout) - except TimeoutExpired: + except TimeoutExpired as exc: process.kill() - # The timeout here is to avoid waiting around _forever_ if - # the kill failed to deal with all processes holding the - # output file handles open. Otherwise we could hang until - # those processes terminate before continuing with our timeout. - # See https://bugs.python.org/issue37424. - stdout, stderr = process.communicate( - timeout=_get_cleanup_timeout(timeout)) - raise TimeoutExpired(process.args, timeout, output=stdout, - stderr=stderr) + if _mswindows: + # Windows accumulates the output in a single blocking + # read() call run on child threads, with the timeout + # being done in a join() on those threads. communicate() + # _after_ kill() is required to collect that and add it + # to the exception. + exc.stdout, exc.stderr = process.communicate() + else: + # POSIX _communicate already populated the output so + # far into the TimeourExpired exception. + process.wait() + raise except: # Including KeyboardInterrupt, communicate handled that. process.kill() # We don't call process.wait() as .__exit__ does that for us. @@ -511,11 +514,6 @@ def run(*popenargs, return CompletedProcess(process.args, retcode, stdout, stderr) -def _get_cleanup_timeout(timeout): # For test injection. - if timeout is not None: - return timeout/20 - - def list2cmdline(seq): """ Translate a sequence of arguments into a command line @@ -1066,12 +1064,16 @@ def _remaining_time(self, endtime): return endtime - _time() - def _check_timeout(self, endtime, orig_timeout): + def _check_timeout(self, endtime, orig_timeout, stdout_seq, stderr_seq, + skip_check_and_raise=False): """Convenience for checking if a timeout has expired.""" if endtime is None: return - if _time() > endtime: - raise TimeoutExpired(self.args, orig_timeout) + if skip_check_and_raise or _time() > endtime: + raise TimeoutExpired( + self.args, orig_timeout, + output=b''.join(stdout_seq) if stdout_seq else None, + stderr=b''.join(stderr_seq) if stderr_seq else None) def wait(self, timeout=None): @@ -1859,10 +1861,15 @@ def _communicate(self, input, endtime, orig_timeout): while selector.get_map(): timeout = self._remaining_time(endtime) if timeout is not None and timeout < 0: - raise TimeoutExpired(self.args, orig_timeout) + self._check_timeout(endtime, orig_timeout, + stdout, stderr, + skip_check_and_raise=True) + raise RuntimeError( # Impossible :) + '_check_timeout(..., skip_check_and_raise=True) ' + 'failed to raise TimeoutExpired.') ready = selector.select(timeout) - self._check_timeout(endtime, orig_timeout) + self._check_timeout(endtime, orig_timeout, stdout, stderr) # XXX Rewrite these to use non-blocking I/O on the file # objects; they are no longer using C stdio! diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 72a95bbd4b843e..563c5a42efc12c 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1566,15 +1566,11 @@ def test_stderr_with_capture_output_arg(self): self.assertIn('capture_output', c.exception.args[0]) @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") - @mock.patch("subprocess._get_cleanup_timeout") - def test_run_with_shell_timeout_and_capture_output_explicit_session( - self, mock_get_cleanup_timeout): + def test_run_with_shell_timeout_and_capture_output_explicit_session(self): """Test from https://bugs.python.org/issue37424 with a session.""" # This test is about ensuring that the cleanup_timeout was not # needed, that the grandchild process holding the output handles - # open actually died. Thus we force a high cleanup time that'll - # obviously fail our timing test. - mock_get_cleanup_timeout.return_value = 3.1415926 + # open actually died. before_secs = time.monotonic() try: subprocess.run('sleep 4', shell=True, timeout=0.1, From 871065226fb42c19ae7166a688a02510bbff32d9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 4 Jul 2019 12:08:54 -0700 Subject: [PATCH 4/7] Remove the magic shell pgrp detection killpg logic. that belongs in its own PR if at all. --- Lib/subprocess.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 187fcdec4f197b..52a89a6972da5d 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -752,11 +752,6 @@ def __init__(self, args, bufsize=-1, executable=None, if not isinstance(bufsize, int): raise TypeError("bufsize must be an integer") - # If True, kill/terminate/send_signal will send the signal to - # os.getpgid(self.pid) on platforms with that concept IF the - # group is not our own process group. - self.__signal_process_group = shell and hasattr(os, 'getpgid') - if _mswindows: if preexec_fn is not None: raise ValueError("preexec_fn is not supported on Windows " @@ -1933,15 +1928,7 @@ def send_signal(self, sig): """Send a signal to the process.""" # Skip signalling a process that we know has already died. if self.returncode is None: - if self.__signal_process_group: - pgid = os.getpgid(self.pid) - if pgid == os.getpgid(os.getpid()): - # Never killpg our own process group. - os.kill(self.pid, sig) - else: - os.killpg(pgid, sig) - else: - os.kill(self.pid, sig) + os.kill(self.pid, sig) def terminate(self): """Terminate the process with SIGTERM From f897d0bca60d8940944e13748210752e60bbd633 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 4 Jul 2019 12:11:05 -0700 Subject: [PATCH 5/7] Remove session pgrp logic related test. it wasn't actually testing what it described as it never checked the grandchild process explicitly. redo this better in a later PR. --- Lib/test/test_subprocess.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 563c5a42efc12c..8c2bc2de095fd6 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1565,25 +1565,6 @@ def test_stderr_with_capture_output_arg(self): self.assertIn('stderr', c.exception.args[0]) self.assertIn('capture_output', c.exception.args[0]) - @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") - def test_run_with_shell_timeout_and_capture_output_explicit_session(self): - """Test from https://bugs.python.org/issue37424 with a session.""" - # This test is about ensuring that the cleanup_timeout was not - # needed, that the grandchild process holding the output handles - # open actually died. - before_secs = time.monotonic() - try: - subprocess.run('sleep 4', shell=True, timeout=0.1, - capture_output=True, start_new_session=True) - except subprocess.TimeoutExpired as exc: - after_secs = time.monotonic() - stacks = traceback.format_exc() # assertRaises doesn't give this. - else: - self.fail("TimeoutExpired not raised.") - self.assertLess(after_secs - before_secs, 2, - msg="TimeoutExpired was delayed! Bad traceback:\n```\n" - f"{stacks}```") - @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") def test_run_with_shell_timeout_and_capture_output(self): """Output capturing after a timeout mustn't hang forever on open filehandles.""" From 814d81e859726d205517724d4d68dfe7c4e2f1e9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 4 Jul 2019 13:00:23 -0700 Subject: [PATCH 6/7] blurb --- .../next/Library/2019-07-04-13-00-20.bpo-37424.0i1MR-.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2019-07-04-13-00-20.bpo-37424.0i1MR-.rst diff --git a/Misc/NEWS.d/next/Library/2019-07-04-13-00-20.bpo-37424.0i1MR-.rst b/Misc/NEWS.d/next/Library/2019-07-04-13-00-20.bpo-37424.0i1MR-.rst new file mode 100644 index 00000000000000..b98a17e241e077 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-04-13-00-20.bpo-37424.0i1MR-.rst @@ -0,0 +1,5 @@ +Fixes a possible hang when using a timeout on `subprocess.run()` while +capturing output. If the child process spawned its own children or +otherwise connected its stdout or stderr handles with another process, we +could hang after the timeout was reached and our child was killed when +attempting to read final output from the pipes. From 38df3341567767e2a8111349a1852382058ef0c0 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 10 Sep 2019 09:46:29 -0700 Subject: [PATCH 7/7] Typo in comment, add a test comment. --- Lib/subprocess.py | 2 +- Lib/test/test_subprocess.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/subprocess.py b/Lib/subprocess.py index 52a89a6972da5d..760fe8d8ecf8f6 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -500,7 +500,7 @@ def run(*popenargs, exc.stdout, exc.stderr = process.communicate() else: # POSIX _communicate already populated the output so - # far into the TimeourExpired exception. + # far into the TimeoutExpired exception. process.wait() raise except: # Including KeyboardInterrupt, communicate handled that. diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 8c2bc2de095fd6..1345d0dd208b58 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -1565,6 +1565,10 @@ def test_stderr_with_capture_output_arg(self): self.assertIn('stderr', c.exception.args[0]) self.assertIn('capture_output', c.exception.args[0]) + # This test _might_ wind up a bit fragile on loaded build+test machines + # as it depends on the timing with wide enough margins for normal situations + # but does assert that it happened "soon enough" to believe the right thing + # happened. @unittest.skipIf(mswindows, "requires posix like 'sleep' shell command") def test_run_with_shell_timeout_and_capture_output(self): """Output capturing after a timeout mustn't hang forever on open filehandles.""" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy