From 2115c244d73fe2e54f533440b9118f8c3c4a7725 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Sun, 20 Jul 2025 08:19:01 +0200 Subject: [PATCH 01/11] draft: impl lazy input consumption in mp.Pool.imap(_unordered) --- Lib/multiprocessing/pool.py | 109 ++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 24 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index f979890170b1a1..a0b50a53745368 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -390,21 +390,57 @@ def _guarded_task_generation(self, result_job, func, iterable): i = -1 for i, x in enumerate(iterable): yield (result_job, i, func, (x,), {}) + + except Exception as e: + yield (result_job, i+1, _helper_reraises_exception, (e,), {}) + + def _guarded_task_generation_lazy(self, result_job, func, iterable, + lazy_task_gen_helper): + '''Provides a generator of tasks for imap and imap_unordered with + appropriate handling for iterables which throw exceptions during + iteration.''' + if not lazy_task_gen_helper.feature_enabled: + yield from self._guarded_task_generation(result_job, func, iterable) + return + + try: + i = -1 + enumerated_iter = iter(enumerate(iterable)) + thread = threading.current_thread() + max_generated_tasks = self._processes + lazy_task_gen_helper.buffersize + + while thread._state == RUN: + with lazy_task_gen_helper.iterator_cond: + if lazy_task_gen_helper.not_finished_tasks >= max_generated_tasks: + continue # wait for some task to be (picked up and) finished + + try: + i, x = enumerated_iter.__next__() + except StopIteration: + break + + yield (result_job, i, func, (x,), {}) + lazy_task_gen_helper.tasks_generated += 1 + except Exception as e: yield (result_job, i+1, _helper_reraises_exception, (e,), {}) - def imap(self, func, iterable, chunksize=1): + def imap(self, func, iterable, chunksize=1, buffersize=None): ''' Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. ''' self._check_running() if chunksize == 1: - result = IMapIterator(self) + result = IMapIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation(result._job, func, iterable), - result._set_length - )) + self._guarded_task_generation_lazy(result._job, + func, + iterable, + result._lazy_task_gen_helper), + result._set_length, + ) + ) return result else: if chunksize < 1: @@ -412,42 +448,50 @@ def imap(self, func, iterable, chunksize=1): "Chunksize must be 1+, not {0:n}".format( chunksize)) task_batches = Pool._get_tasks(func, iterable, chunksize) - result = IMapIterator(self) + result = IMapIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation(result._job, - mapstar, - task_batches), - result._set_length - )) + self._guarded_task_generation_lazy(result._job, + mapstar, + task_batches, + result._lazy_task_gen_helper), + result._set_length, + ) + ) return (item for chunk in result for item in chunk) - def imap_unordered(self, func, iterable, chunksize=1): + def imap_unordered(self, func, iterable, chunksize=1, buffersize=None): ''' Like `imap()` method but ordering of results is arbitrary. ''' self._check_running() if chunksize == 1: - result = IMapUnorderedIterator(self) + result = IMapUnorderedIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation(result._job, func, iterable), - result._set_length - )) + self._guarded_task_generation_lazy(result._job, + func, + iterable, + result._lazy_task_gen_helper), + result._set_length, + ) + ) return result else: if chunksize < 1: raise ValueError( "Chunksize must be 1+, not {0!r}".format(chunksize)) task_batches = Pool._get_tasks(func, iterable, chunksize) - result = IMapUnorderedIterator(self) + result = IMapUnorderedIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation(result._job, - mapstar, - task_batches), - result._set_length - )) + self._guarded_task_generation_lazy(result._job, + mapstar, + task_batches, + result._lazy_task_gen_helper), + result._set_length, + ) + ) return (item for chunk in result for item in chunk) def apply_async(self, func, args=(), kwds={}, callback=None, @@ -835,8 +879,7 @@ def _set(self, i, success_result): # class IMapIterator(object): - - def __init__(self, pool): + def __init__(self, pool, buffersize): self._pool = pool self._cond = threading.Condition(threading.Lock()) self._job = next(job_counter) @@ -846,6 +889,7 @@ def __init__(self, pool): self._length = None self._unsorted = {} self._cache[self._job] = self + self._lazy_task_gen_helper = _LazyTaskGenHelper(buffersize, self._cond) def __iter__(self): return self @@ -866,6 +910,7 @@ def next(self, timeout=None): self._pool = None raise StopIteration from None raise TimeoutError from None + self._lazy_task_gen_helper.tasks_finished += 1 success, value = item if success: @@ -914,6 +959,22 @@ def _set(self, i, obj): del self._cache[self._job] self._pool = None +# +# Class to store stats for lazy task generation and share them +# between the main thread and `_guarded_task_generation()` thread. +# +class _LazyTaskGenHelper(object): + def __init__(self, buffersize, iterator_cond): + self.feature_enabled = buffersize is not None + self.buffersize = buffersize + self.tasks_generated = 0 + self.tasks_finished = 0 + self.iterator_cond = iterator_cond + + @property + def not_finished_tasks(self): + return self.tasks_generated - self.tasks_finished + # # # From ec37be8b63b095a3be871841ae3b78bc7db852c4 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Sun, 20 Jul 2025 14:41:16 +0200 Subject: [PATCH 02/11] Use semaphore to synchronize threads Using `threading.Semaphore` makes it easier to cap the number of concurrently ran tasks. It also makes it possible to remove busy wait in child thread by waiting for semaphore. Also I've updated code to use the backpressure pattern - the new tasks are scheduled as soon as the user consumes the old ones. --- Lib/multiprocessing/pool.py | 112 ++++++++++++++++-------------------- 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index a0b50a53745368..abdd512980c849 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -14,6 +14,7 @@ # import collections +import functools import itertools import os import queue @@ -395,32 +396,20 @@ def _guarded_task_generation(self, result_job, func, iterable): yield (result_job, i+1, _helper_reraises_exception, (e,), {}) def _guarded_task_generation_lazy(self, result_job, func, iterable, - lazy_task_gen_helper): - '''Provides a generator of tasks for imap and imap_unordered with + backpressure_sema): + """Provides a generator of tasks for imap and imap_unordered with appropriate handling for iterables which throw exceptions during - iteration.''' - if not lazy_task_gen_helper.feature_enabled: - yield from self._guarded_task_generation(result_job, func, iterable) - return - + iteration.""" try: i = -1 enumerated_iter = iter(enumerate(iterable)) - thread = threading.current_thread() - max_generated_tasks = self._processes + lazy_task_gen_helper.buffersize - - while thread._state == RUN: - with lazy_task_gen_helper.iterator_cond: - if lazy_task_gen_helper.not_finished_tasks >= max_generated_tasks: - continue # wait for some task to be (picked up and) finished - + while True: + backpressure_sema.acquire() try: - i, x = enumerated_iter.__next__() + i, x = next(enumerated_iter) except StopIteration: break - yield (result_job, i, func, (x,), {}) - lazy_task_gen_helper.tasks_generated += 1 except Exception as e: yield (result_job, i+1, _helper_reraises_exception, (e,), {}) @@ -430,31 +419,32 @@ def imap(self, func, iterable, chunksize=1, buffersize=None): Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. ''' self._check_running() + if chunksize < 1: + raise ValueError("Chunksize must be 1+, not {0:n}".format(chunksize)) + + result = IMapIterator(self, buffersize) + + if result._backpressure_sema is None: + task_generation = self._guarded_task_generation + else: + task_generation = functools.partial( + self._guarded_task_generation_lazy, + backpressure_sema=result._backpressure_sema, + ) + if chunksize == 1: - result = IMapIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation_lazy(result._job, - func, - iterable, - result._lazy_task_gen_helper), + task_generation(result._job, func, iterable), result._set_length, ) ) return result else: - if chunksize < 1: - raise ValueError( - "Chunksize must be 1+, not {0:n}".format( - chunksize)) task_batches = Pool._get_tasks(func, iterable, chunksize) - result = IMapIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation_lazy(result._job, - mapstar, - task_batches, - result._lazy_task_gen_helper), + task_generation(result._job, mapstar, task_batches), result._set_length, ) ) @@ -465,30 +455,34 @@ def imap_unordered(self, func, iterable, chunksize=1, buffersize=None): Like `imap()` method but ordering of results is arbitrary. ''' self._check_running() + if chunksize < 1: + raise ValueError( + "Chunksize must be 1+, not {0!r}".format(chunksize) + ) + + result = IMapUnorderedIterator(self, buffersize) + + if result._backpressure_sema is None: + task_generation = self._guarded_task_generation + else: + task_generation = functools.partial( + self._guarded_task_generation_lazy, + backpressure_sema=result._backpressure_sema, + ) + if chunksize == 1: - result = IMapUnorderedIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation_lazy(result._job, - func, - iterable, - result._lazy_task_gen_helper), + task_generation(result._job, func, iterable), result._set_length, ) ) return result else: - if chunksize < 1: - raise ValueError( - "Chunksize must be 1+, not {0!r}".format(chunksize)) task_batches = Pool._get_tasks(func, iterable, chunksize) - result = IMapUnorderedIterator(self, buffersize) self._taskqueue.put( ( - self._guarded_task_generation_lazy(result._job, - mapstar, - task_batches, - result._lazy_task_gen_helper), + task_generation(result._job, mapstar, task_batches), result._set_length, ) ) @@ -889,7 +883,13 @@ def __init__(self, pool, buffersize): self._length = None self._unsorted = {} self._cache[self._job] = self - self._lazy_task_gen_helper = _LazyTaskGenHelper(buffersize, self._cond) + + if buffersize is None: + self._backpressure_sema = None + else: + self._backpressure_sema = threading.Semaphore( + value=self._pool._processes + buffersize + ) def __iter__(self): return self @@ -910,7 +910,9 @@ def next(self, timeout=None): self._pool = None raise StopIteration from None raise TimeoutError from None - self._lazy_task_gen_helper.tasks_finished += 1 + + if self._backpressure_sema: + self._backpressure_sema.release() success, value = item if success: @@ -959,22 +961,6 @@ def _set(self, i, obj): del self._cache[self._job] self._pool = None -# -# Class to store stats for lazy task generation and share them -# between the main thread and `_guarded_task_generation()` thread. -# -class _LazyTaskGenHelper(object): - def __init__(self, buffersize, iterator_cond): - self.feature_enabled = buffersize is not None - self.buffersize = buffersize - self.tasks_generated = 0 - self.tasks_finished = 0 - self.iterator_cond = iterator_cond - - @property - def not_finished_tasks(self): - return self.tasks_generated - self.tasks_finished - # # # From 6bbe0f273c35d471cc85b99e44645d5d89546819 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Mon, 21 Jul 2025 23:39:42 +0200 Subject: [PATCH 03/11] Update buffersize behavior to match concurrent.futures.Executor behavior This new behavior allow smaller real concurrency number than number of running processes. Previously, it was not allowed since we implicitly incremented buffersize by `self._processes`. --- Lib/multiprocessing/pool.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index abdd512980c849..9aaaaf55dab594 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -27,7 +27,7 @@ # If threading is available then ThreadPool should be provided. Therefore # we avoid top-level imports which are liable to fail on some systems. from . import util -from . import get_context, TimeoutError +from . import TimeoutError, get_context from .connection import wait # @@ -421,6 +421,11 @@ def imap(self, func, iterable, chunksize=1, buffersize=None): self._check_running() if chunksize < 1: raise ValueError("Chunksize must be 1+, not {0:n}".format(chunksize)) + if buffersize is not None: + if not isinstance(buffersize, int): + raise TypeError("buffersize must be an integer or None") + if buffersize < 1: + raise ValueError("buffersize must be None or > 0") result = IMapIterator(self, buffersize) @@ -459,6 +464,11 @@ def imap_unordered(self, func, iterable, chunksize=1, buffersize=None): raise ValueError( "Chunksize must be 1+, not {0!r}".format(chunksize) ) + if buffersize is not None: + if not isinstance(buffersize, int): + raise TypeError("buffersize must be an integer or None") + if buffersize < 1: + raise ValueError("buffersize must be None or > 0") result = IMapUnorderedIterator(self, buffersize) @@ -887,9 +897,7 @@ def __init__(self, pool, buffersize): if buffersize is None: self._backpressure_sema = None else: - self._backpressure_sema = threading.Semaphore( - value=self._pool._processes + buffersize - ) + self._backpressure_sema = threading.Semaphore(buffersize) def __iter__(self): return self @@ -911,7 +919,7 @@ def next(self, timeout=None): raise StopIteration from None raise TimeoutError from None - if self._backpressure_sema: + if self._backpressure_sema is not None: self._backpressure_sema.release() success, value = item From 3065a96d052cec90f6305cf762170a16f74f753f Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:10:05 +0200 Subject: [PATCH 04/11] Release all `buffersize_lock` obj from the parent thread when terminate --- Lib/multiprocessing/pool.py | 104 ++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 53 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index 9aaaaf55dab594..b8caac82e00b0c 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -191,6 +191,11 @@ def __init__(self, processes=None, initializer=None, initargs=(), self._ctx = context or get_context() self._setup_queues() self._taskqueue = queue.SimpleQueue() + # The _taskqueue_buffersize_semaphores exist to allow calling .release() + # on every active semaphore when the pool is terminating to let task_handler + # wake up to stop. It's a dict so that each iterator object can efficiently + # deregister its semaphore when iterator finishes. + self._taskqueue_buffersize_semaphores = {} # The _change_notifier queue exist to wake up self._handle_workers() # when the cache (self._cache) is empty or when there is a change in # the _state variable of the thread that runs _handle_workers. @@ -257,7 +262,8 @@ def __init__(self, processes=None, initializer=None, initargs=(), self, self._terminate_pool, args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, self._change_notifier, self._worker_handler, self._task_handler, - self._result_handler, self._cache), + self._result_handler, self._cache, + self._taskqueue_buffersize_semaphores), exitpriority=15 ) self._state = RUN @@ -383,33 +389,27 @@ def starmap_async(self, func, iterable, chunksize=None, callback=None, return self._map_async(func, iterable, starmapstar, chunksize, callback, error_callback) - def _guarded_task_generation(self, result_job, func, iterable): + def _guarded_task_generation(self, result_job, func, iterable, + buffersize_sema=None): '''Provides a generator of tasks for imap and imap_unordered with appropriate handling for iterables which throw exceptions during iteration.''' try: i = -1 - for i, x in enumerate(iterable): - yield (result_job, i, func, (x,), {}) - except Exception as e: - yield (result_job, i+1, _helper_reraises_exception, (e,), {}) + if buffersize_sema is None: + for i, x in enumerate(iterable): + yield (result_job, i, func, (x,), {}) - def _guarded_task_generation_lazy(self, result_job, func, iterable, - backpressure_sema): - """Provides a generator of tasks for imap and imap_unordered with - appropriate handling for iterables which throw exceptions during - iteration.""" - try: - i = -1 - enumerated_iter = iter(enumerate(iterable)) - while True: - backpressure_sema.acquire() - try: - i, x = next(enumerated_iter) - except StopIteration: - break - yield (result_job, i, func, (x,), {}) + else: + enumerated_iter = iter(enumerate(iterable)) + while True: + buffersize_sema.acquire() + try: + i, x = next(enumerated_iter) + except StopIteration: + break + yield (result_job, i, func, (x,), {}) except Exception as e: yield (result_job, i+1, _helper_reraises_exception, (e,), {}) @@ -428,19 +428,11 @@ def imap(self, func, iterable, chunksize=1, buffersize=None): raise ValueError("buffersize must be None or > 0") result = IMapIterator(self, buffersize) - - if result._backpressure_sema is None: - task_generation = self._guarded_task_generation - else: - task_generation = functools.partial( - self._guarded_task_generation_lazy, - backpressure_sema=result._backpressure_sema, - ) - if chunksize == 1: self._taskqueue.put( ( - task_generation(result._job, func, iterable), + self._guarded_task_generation(result._job, func, iterable, + result._buffersize_sema), result._set_length, ) ) @@ -449,7 +441,8 @@ def imap(self, func, iterable, chunksize=1, buffersize=None): task_batches = Pool._get_tasks(func, iterable, chunksize) self._taskqueue.put( ( - task_generation(result._job, mapstar, task_batches), + self._guarded_task_generation(result._job, mapstar, task_batches, + result._buffersize_sema), result._set_length, ) ) @@ -471,19 +464,11 @@ def imap_unordered(self, func, iterable, chunksize=1, buffersize=None): raise ValueError("buffersize must be None or > 0") result = IMapUnorderedIterator(self, buffersize) - - if result._backpressure_sema is None: - task_generation = self._guarded_task_generation - else: - task_generation = functools.partial( - self._guarded_task_generation_lazy, - backpressure_sema=result._backpressure_sema, - ) - if chunksize == 1: self._taskqueue.put( ( - task_generation(result._job, func, iterable), + self._guarded_task_generation(result._job, func, iterable, + result._buffersize_sema), result._set_length, ) ) @@ -492,7 +477,8 @@ def imap_unordered(self, func, iterable, chunksize=1, buffersize=None): task_batches = Pool._get_tasks(func, iterable, chunksize) self._taskqueue.put( ( - task_generation(result._job, mapstar, task_batches), + self._guarded_task_generation(result._job, mapstar, task_batches, + result._buffersize_sema), result._set_length, ) ) @@ -727,7 +713,8 @@ def _help_stuff_finish(inqueue, task_handler, size): @classmethod def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, change_notifier, - worker_handler, task_handler, result_handler, cache): + worker_handler, task_handler, result_handler, cache, + taskqueue_buffersize_semaphores): # this is guaranteed to only be called once util.debug('finalizing pool') @@ -738,6 +725,10 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, change_notifier, change_notifier.put(None) task_handler._state = TERMINATE + # Release all semaphores to wake up task_handler to stop. + for job_id, sema in tuple(taskqueue_buffersize_semaphores.items()): + taskqueue_buffersize_semaphores.pop(job_id) + sema.release() util.debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) @@ -893,11 +884,13 @@ def __init__(self, pool, buffersize): self._length = None self._unsorted = {} self._cache[self._job] = self - if buffersize is None: - self._backpressure_sema = None + self._buffersize_sema = None else: - self._backpressure_sema = threading.Semaphore(buffersize) + self._buffersize_sema = threading.Semaphore(buffersize) + self._pool._taskqueue_buffersize_semaphores[self] = ( + self._buffersize_sema + ) def __iter__(self): return self @@ -908,25 +901,30 @@ def next(self, timeout=None): item = self._items.popleft() except IndexError: if self._index == self._length: - self._pool = None - raise StopIteration from None + self._stop_iterator() self._cond.wait(timeout) try: item = self._items.popleft() except IndexError: if self._index == self._length: - self._pool = None - raise StopIteration from None + self._stop_iterator() raise TimeoutError from None - if self._backpressure_sema is not None: - self._backpressure_sema.release() + if self._buffersize_sema is not None: + self._buffersize_sema.release() success, value = item if success: return value raise value + def _stop_iterator(self): + if self._pool is not None: + # could be deleted in previous `.next()` calls + self._pool._taskqueue_buffersize_semaphores.pop(self._job) + self._pool = None + raise StopIteration from None + __next__ = next # XXX def _set(self, i, obj): From ef5f59c081527b776ca646a9ded559f809235c3d Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:34:03 +0200 Subject: [PATCH 05/11] Add 2 basic `ThreadPool.imap()` tests w/ and w/o buffersize --- Lib/test/_test_multiprocessing.py | 58 +++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index a1259ff1d63d18..414c90725c4391 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -2929,6 +2929,64 @@ def test_imap(self): self.assertEqual(next(it), i*i) self.assertRaises(StopIteration, it.__next__) + def test_imap_inf_iterable_with_slow_task(self): + if self.TYPE in ("processes", "manager"): + self.skipTest("test not appropriate for {}".format(self.TYPE)) + + processes = 4 + p = self.Pool(processes) + + tasks_started_later = 2 + last_produced_task_arg = Value("i") + + def produce_args(): + for arg in range(1, processes + tasks_started_later + 1): + last_produced_task_arg.value = arg + yield arg + + it = p.imap(functools.partial(sqr, wait=0.2), produce_args()) + + next(it) + time.sleep(0.2) + # `iterable` should've been advanced only up by `processes` times, + # but in fact advances further (by `>=processes+1`). + # In this case, it advances to the maximum value. + self.assertGreater(last_produced_task_arg.value, processes + 1) + + p.terminate() + p.join() + + def test_imap_inf_iterable_with_slow_task_and_buffersize(self): + if self.TYPE in ("processes", "manager"): + self.skipTest("test not appropriate for {}".format(self.TYPE)) + + processes = 4 + p = self.Pool(processes) + + tasks_started_later = 2 + last_produced_task_arg = Value("i") + + def produce_args(): + for arg in range(1, processes + tasks_started_later + 1): + last_produced_task_arg.value = arg + yield arg + + it = p.imap( + functools.partial(sqr, wait=0.2), + produce_args(), + buffersize=processes, + ) + + time.sleep(0.2) + self.assertEqual(last_produced_task_arg.value, processes) + + next(it) + time.sleep(0.2) + self.assertEqual(last_produced_task_arg.value, processes + 1) + + p.terminate() + p.join() + def test_imap_handle_iterable_exception(self): if self.TYPE == 'manager': self.skipTest('test not appropriate for {}'.format(self.TYPE)) From bb85e0e07a7fb09cc9a2df5b600463ec0d073895 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:37:56 +0200 Subject: [PATCH 06/11] Fix accidental swap in imports --- Lib/multiprocessing/pool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index b8caac82e00b0c..836846d00c3ebc 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -27,7 +27,7 @@ # If threading is available then ThreadPool should be provided. Therefore # we avoid top-level imports which are liable to fail on some systems. from . import util -from . import TimeoutError, get_context +from . import get_context, TimeoutError from .connection import wait # From 35e93a9e336f2ce994da67b56478c12b5e741f37 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:41:48 +0200 Subject: [PATCH 07/11] clear Pool._taskqueue_buffersize_semaphores safely --- Lib/multiprocessing/pool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index 836846d00c3ebc..79148f835b4b2c 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -727,7 +727,7 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, change_notifier, task_handler._state = TERMINATE # Release all semaphores to wake up task_handler to stop. for job_id, sema in tuple(taskqueue_buffersize_semaphores.items()): - taskqueue_buffersize_semaphores.pop(job_id) + taskqueue_buffersize_semaphores.pop(job_id, None) sema.release() util.debug('helping task handler/workers to finish') @@ -920,8 +920,8 @@ def next(self, timeout=None): def _stop_iterator(self): if self._pool is not None: - # could be deleted in previous `.next()` calls - self._pool._taskqueue_buffersize_semaphores.pop(self._job) + # `self._pool` could be set to `None` in previous `.next()` calls + self._pool._taskqueue_buffersize_semaphores.pop(self._job, None) self._pool = None raise StopIteration from None From 5b8db624bd533fe8d626e61254cbd364fc628b48 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:51:23 +0200 Subject: [PATCH 08/11] Slightly optimize Pool._taskqueue_buffersize_semaphores terminate --- Lib/multiprocessing/pool.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index 79148f835b4b2c..36951cfae9ed31 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -726,9 +726,10 @@ def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, change_notifier, task_handler._state = TERMINATE # Release all semaphores to wake up task_handler to stop. - for job_id, sema in tuple(taskqueue_buffersize_semaphores.items()): - taskqueue_buffersize_semaphores.pop(job_id, None) - sema.release() + for job_id in tuple(taskqueue_buffersize_semaphores.keys()): + sema = taskqueue_buffersize_semaphores.pop(job_id, None) + if sema is not None: + sema.release() util.debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) From dd6977576378cfb376bc66a3866f21fd6dc153e7 Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 01:55:01 +0200 Subject: [PATCH 09/11] Rename `Pool.imap()` buffersize-related tests --- Lib/test/_test_multiprocessing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 414c90725c4391..8eebe100fc3adf 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -2929,7 +2929,7 @@ def test_imap(self): self.assertEqual(next(it), i*i) self.assertRaises(StopIteration, it.__next__) - def test_imap_inf_iterable_with_slow_task(self): + def test_imap_fast_iterable_with_slow_task(self): if self.TYPE in ("processes", "manager"): self.skipTest("test not appropriate for {}".format(self.TYPE)) @@ -2956,7 +2956,7 @@ def produce_args(): p.terminate() p.join() - def test_imap_inf_iterable_with_slow_task_and_buffersize(self): + def test_imap_fast_iterable_with_slow_task_and_buffersize(self): if self.TYPE in ("processes", "manager"): self.skipTest("test not appropriate for {}".format(self.TYPE)) From db7a8a5e43799e5912495b61d78454e7413fbb8f Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 08:05:41 +0200 Subject: [PATCH 10/11] Fix typo in `IMapIterator.__init__()` --- Lib/multiprocessing/pool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/multiprocessing/pool.py b/Lib/multiprocessing/pool.py index 36951cfae9ed31..caaf9497b1f169 100644 --- a/Lib/multiprocessing/pool.py +++ b/Lib/multiprocessing/pool.py @@ -889,7 +889,7 @@ def __init__(self, pool, buffersize): self._buffersize_sema = None else: self._buffersize_sema = threading.Semaphore(buffersize) - self._pool._taskqueue_buffersize_semaphores[self] = ( + self._pool._taskqueue_buffersize_semaphores[self._job] = ( self._buffersize_sema ) From 0ed9a0f3a83d40ffc19221c739c52df500a5b1be Mon Sep 17 00:00:00 2001 From: Oleksandr Baltian Date: Tue, 22 Jul 2025 09:06:09 +0200 Subject: [PATCH 11/11] Add tests for buffersize combinations with other kwargs --- Lib/test/_test_multiprocessing.py | 79 ++++++++++++++++++++++++------- 1 file changed, 62 insertions(+), 17 deletions(-) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 8eebe100fc3adf..0c67f625643b1b 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -2916,21 +2916,45 @@ def test_async_timeout(self): p.join() def test_imap(self): - it = self.pool.imap(sqr, list(range(10))) - self.assertEqual(list(it), list(map(sqr, list(range(10))))) - - it = self.pool.imap(sqr, list(range(10))) - for i in range(10): - self.assertEqual(next(it), i*i) - self.assertRaises(StopIteration, it.__next__) + optimal_buffersize = 4 # `self.pool` size + buffersize_variants = [ + {"buffersize": None}, + {"buffersize": 1}, + {"buffersize": optimal_buffersize}, + {"buffersize": optimal_buffersize * 2}, + ] - it = self.pool.imap(sqr, list(range(1000)), chunksize=100) - for i in range(1000): - self.assertEqual(next(it), i*i) - self.assertRaises(StopIteration, it.__next__) + for kwargs in ({}, *buffersize_variants): + with self.subTest(**kwargs): + iterable = range(10) + if self.TYPE != "threads": + iterable = list(iterable) + it = self.pool.imap(sqr, iterable, **kwargs) + self.assertEqual(list(it), list(map(sqr, list(range(10))))) + + iterable = range(10) + if self.TYPE != "threads": + iterable = list(iterable) + it = self.pool.imap(sqr, iterable, **kwargs) + for i in range(10): + self.assertEqual(next(it), i * i) + self.assertRaises(StopIteration, it.__next__) + + for kwargs in ( + {"chunksize": 100}, + {"chunksize": 100, "buffersize": optimal_buffersize}, + ): + with self.subTest(**kwargs): + iterable = range(1000) + if self.TYPE != "threads": + iterable = list(iterable) + it = self.pool.imap(sqr, iterable, **kwargs) + for i in range(1000): + self.assertEqual(next(it), i * i) + self.assertRaises(StopIteration, it.__next__) def test_imap_fast_iterable_with_slow_task(self): - if self.TYPE in ("processes", "manager"): + if self.TYPE != "threads": self.skipTest("test not appropriate for {}".format(self.TYPE)) processes = 4 @@ -2957,7 +2981,7 @@ def produce_args(): p.join() def test_imap_fast_iterable_with_slow_task_and_buffersize(self): - if self.TYPE in ("processes", "manager"): + if self.TYPE != "threads": self.skipTest("test not appropriate for {}".format(self.TYPE)) processes = 4 @@ -3014,11 +3038,32 @@ def test_imap_handle_iterable_exception(self): self.assertRaises(SayWhenError, it.__next__) def test_imap_unordered(self): - it = self.pool.imap_unordered(sqr, list(range(10))) - self.assertEqual(sorted(it), list(map(sqr, list(range(10))))) + optimal_buffersize = 4 # `self.pool` size + buffersize_variants = [ + {"buffersize": None}, + {"buffersize": 1}, + {"buffersize": optimal_buffersize}, + {"buffersize": optimal_buffersize * 2}, + ] - it = self.pool.imap_unordered(sqr, list(range(1000)), chunksize=100) - self.assertEqual(sorted(it), list(map(sqr, list(range(1000))))) + for kwargs in ({}, *buffersize_variants): + with self.subTest(**kwargs): + iterable = range(10) + if self.TYPE != "threads": + iterable = list(iterable) + it = self.pool.imap_unordered(sqr, iterable, **kwargs) + self.assertEqual(sorted(it), list(map(sqr, list(range(10))))) + + for kwargs in ( + {"chunksize": 100}, + {"chunksize": 100, "buffersize": optimal_buffersize}, + ): + with self.subTest(**kwargs): + iterable = range(1000) + if self.TYPE != "threads": + iterable = list(iterable) + it = self.pool.imap_unordered(sqr, iterable, **kwargs) + self.assertEqual(sorted(it), list(map(sqr, list(range(1000))))) def test_imap_unordered_handle_iterable_exception(self): if self.TYPE == 'manager': pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy