Skip to content

gh-64192: Make imap()/imap_unordered() in multiprocessing.pool actually lazy #136871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d8e8a02
draft: impl lazy input consumption in mp.Pool.imap(_unordered)
obaltian Jul 20, 2025
f6f423c
Use semaphore to synchronize threads
obaltian Jul 20, 2025
937862d
Update buffersize behavior to match concurrent.futures.Executor behavior
obaltian Jul 21, 2025
b6f6caa
Release all `buffersize_lock` obj from the parent thread when terminate
obaltian Jul 21, 2025
3bafd5d
Add 2 basic `ThreadPool.imap()` tests w/ and w/o buffersize
obaltian Jul 21, 2025
e43232b
Fix accidental swap in imports
obaltian Jul 21, 2025
dd416e0
clear Pool._taskqueue_buffersize_semaphores safely
obaltian Jul 21, 2025
99f5a8c
Slightly optimize Pool._taskqueue_buffersize_semaphores terminate
obaltian Jul 21, 2025
2a53398
Rename `Pool.imap()` buffersize-related tests
obaltian Jul 21, 2025
f8878eb
Fix typo in `IMapIterator.__init__()`
obaltian Jul 22, 2025
2ca51e3
Add tests for buffersize combinations with other kwargs
obaltian Jul 22, 2025
bf27d5d
Remove if-branch in `_terminate_pool`
obaltian Jul 27, 2025
508c765
Add more edge-case tests for `imap` and `imap_unodered`
obaltian Jul 27, 2025
dff1167
Split inf iterable test for `imap` and `imap_unordered`
obaltian Jul 27, 2025
94cc0b9
Add doc for `buffersize` argument of `imap` and `imap_unordered`
obaltian Jul 27, 2025
816fb6c
add *versionadded* for `imap_unordered`
obaltian Jul 28, 2025
88cc10a
Remove ambiguity in `buffersize` description.
obaltian Jul 28, 2025
05e3b24
Set *versionadded* as next in docs
obaltian Jul 28, 2025
503982f
Add whatsnew entry
obaltian Jul 28, 2025
b92cad9
Fix aggreed comments on code formatting/minor refactoring
obaltian Jul 28, 2025
02ebc6a
Remove `imap` and `imap_unordered` body code duplication
obaltian Jul 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Use semaphore to synchronize threads
Using `threading.Semaphore` makes it easier to cap the number of
concurrently ran tasks. It also makes it possible to remove busy
wait in child thread by waiting for semaphore.

Also I've updated code to use the backpressure pattern - the new
tasks are scheduled as soon as the user consumes the old ones.
  • Loading branch information
obaltian committed Jul 27, 2025
commit f6f423cad66cec8901a51f47ea8570b01a525310
112 changes: 49 additions & 63 deletions Lib/multiprocessing/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#

import collections
import functools
import itertools
import os
import queue
Expand Down Expand Up @@ -395,32 +396,20 @@ def _guarded_task_generation(self, result_job, func, iterable):
yield (result_job, i+1, _helper_reraises_exception, (e,), {})

def _guarded_task_generation_lazy(self, result_job, func, iterable,
lazy_task_gen_helper):
'''Provides a generator of tasks for imap and imap_unordered with
backpressure_sema):
"""Provides a generator of tasks for imap and imap_unordered with
appropriate handling for iterables which throw exceptions during
iteration.'''
if not lazy_task_gen_helper.feature_enabled:
yield from self._guarded_task_generation(result_job, func, iterable)
return

iteration."""
try:
i = -1
enumerated_iter = iter(enumerate(iterable))
thread = threading.current_thread()
max_generated_tasks = self._processes + lazy_task_gen_helper.buffersize

while thread._state == RUN:
with lazy_task_gen_helper.iterator_cond:
if lazy_task_gen_helper.not_finished_tasks >= max_generated_tasks:
continue # wait for some task to be (picked up and) finished

while True:
backpressure_sema.acquire()
try:
i, x = enumerated_iter.__next__()
i, x = next(enumerated_iter)
except StopIteration:
break

yield (result_job, i, func, (x,), {})
lazy_task_gen_helper.tasks_generated += 1

except Exception as e:
yield (result_job, i+1, _helper_reraises_exception, (e,), {})
Expand All @@ -430,31 +419,32 @@ def imap(self, func, iterable, chunksize=1, buffersize=None):
Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
'''
self._check_running()
if chunksize < 1:
raise ValueError("Chunksize must be 1+, not {0:n}".format(chunksize))

result = IMapIterator(self, buffersize)

if result._backpressure_sema is None:
task_generation = self._guarded_task_generation
else:
task_generation = functools.partial(
self._guarded_task_generation_lazy,
backpressure_sema=result._backpressure_sema,
)

if chunksize == 1:
result = IMapIterator(self, buffersize)
self._taskqueue.put(
(
self._guarded_task_generation_lazy(result._job,
func,
iterable,
result._lazy_task_gen_helper),
task_generation(result._job, func, iterable),
result._set_length,
)
)
return result
else:
if chunksize < 1:
raise ValueError(
"Chunksize must be 1+, not {0:n}".format(
chunksize))
task_batches = Pool._get_tasks(func, iterable, chunksize)
result = IMapIterator(self, buffersize)
self._taskqueue.put(
(
self._guarded_task_generation_lazy(result._job,
mapstar,
task_batches,
result._lazy_task_gen_helper),
task_generation(result._job, mapstar, task_batches),
result._set_length,
)
)
Expand All @@ -465,30 +455,34 @@ def imap_unordered(self, func, iterable, chunksize=1, buffersize=None):
Like `imap()` method but ordering of results is arbitrary.
'''
self._check_running()
if chunksize < 1:
raise ValueError(
"Chunksize must be 1+, not {0!r}".format(chunksize)
)

result = IMapUnorderedIterator(self, buffersize)

if result._backpressure_sema is None:
task_generation = self._guarded_task_generation
else:
task_generation = functools.partial(
self._guarded_task_generation_lazy,
backpressure_sema=result._backpressure_sema,
)

if chunksize == 1:
result = IMapUnorderedIterator(self, buffersize)
self._taskqueue.put(
(
self._guarded_task_generation_lazy(result._job,
func,
iterable,
result._lazy_task_gen_helper),
task_generation(result._job, func, iterable),
result._set_length,
)
)
return result
else:
if chunksize < 1:
raise ValueError(
"Chunksize must be 1+, not {0!r}".format(chunksize))
task_batches = Pool._get_tasks(func, iterable, chunksize)
result = IMapUnorderedIterator(self, buffersize)
self._taskqueue.put(
(
self._guarded_task_generation_lazy(result._job,
mapstar,
task_batches,
result._lazy_task_gen_helper),
task_generation(result._job, mapstar, task_batches),
result._set_length,
)
)
Expand Down Expand Up @@ -889,7 +883,13 @@ def __init__(self, pool, buffersize):
self._length = None
self._unsorted = {}
self._cache[self._job] = self
self._lazy_task_gen_helper = _LazyTaskGenHelper(buffersize, self._cond)

if buffersize is None:
self._backpressure_sema = None
else:
self._backpressure_sema = threading.Semaphore(
value=self._pool._processes + buffersize
)

def __iter__(self):
return self
Expand All @@ -910,7 +910,9 @@ def next(self, timeout=None):
self._pool = None
raise StopIteration from None
raise TimeoutError from None
self._lazy_task_gen_helper.tasks_finished += 1

if self._backpressure_sema:
self._backpressure_sema.release()

success, value = item
if success:
Expand Down Expand Up @@ -959,22 +961,6 @@ def _set(self, i, obj):
del self._cache[self._job]
self._pool = None

#
# Class to store stats for lazy task generation and share them
# between the main thread and `_guarded_task_generation()` thread.
#
class _LazyTaskGenHelper(object):
def __init__(self, buffersize, iterator_cond):
self.feature_enabled = buffersize is not None
self.buffersize = buffersize
self.tasks_generated = 0
self.tasks_finished = 0
self.iterator_cond = iterator_cond

@property
def not_finished_tasks(self):
return self.tasks_generated - self.tasks_finished

#
#
#
Expand Down
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy