@@ -390,64 +390,108 @@ def _guarded_task_generation(self, result_job, func, iterable):
390
390
i = - 1
391
391
for i , x in enumerate (iterable ):
392
392
yield (result_job , i , func , (x ,), {})
393
+
394
+ except Exception as e :
395
+ yield (result_job , i + 1 , _helper_reraises_exception , (e ,), {})
396
+
397
+ def _guarded_task_generation_lazy (self , result_job , func , iterable ,
398
+ lazy_task_gen_helper ):
399
+ '''Provides a generator of tasks for imap and imap_unordered with
400
+ appropriate handling for iterables which throw exceptions during
401
+ iteration.'''
402
+ if not lazy_task_gen_helper .feature_enabled :
403
+ yield from self ._guarded_task_generation (result_job , func , iterable )
404
+ return
405
+
406
+ try :
407
+ i = - 1
408
+ enumerated_iter = iter (enumerate (iterable ))
409
+ thread = threading .current_thread ()
410
+ max_generated_tasks = self ._processes + lazy_task_gen_helper .buffersize
411
+
412
+ while thread ._state == RUN :
413
+ with lazy_task_gen_helper .iterator_cond :
414
+ if lazy_task_gen_helper .not_finished_tasks >= max_generated_tasks :
415
+ continue # wait for some task to be (picked up and) finished
416
+
417
+ try :
418
+ i , x = enumerated_iter .__next__ ()
419
+ except StopIteration :
420
+ break
421
+
422
+ yield (result_job , i , func , (x ,), {})
423
+ lazy_task_gen_helper .tasks_generated += 1
424
+
393
425
except Exception as e :
394
426
yield (result_job , i + 1 , _helper_reraises_exception , (e ,), {})
395
427
396
- def imap (self , func , iterable , chunksize = 1 ):
428
+ def imap (self , func , iterable , chunksize = 1 , buffersize = None ):
397
429
'''
398
430
Equivalent of `map()` -- can be MUCH slower than `Pool.map()`.
399
431
'''
400
432
self ._check_running ()
401
433
if chunksize == 1 :
402
- result = IMapIterator (self )
434
+ result = IMapIterator (self , buffersize )
403
435
self ._taskqueue .put (
404
436
(
405
- self ._guarded_task_generation (result ._job , func , iterable ),
406
- result ._set_length
407
- ))
437
+ self ._guarded_task_generation_lazy (result ._job ,
438
+ func ,
439
+ iterable ,
440
+ result ._lazy_task_gen_helper ),
441
+ result ._set_length ,
442
+ )
443
+ )
408
444
return result
409
445
else :
410
446
if chunksize < 1 :
411
447
raise ValueError (
412
448
"Chunksize must be 1+, not {0:n}" .format (
413
449
chunksize ))
414
450
task_batches = Pool ._get_tasks (func , iterable , chunksize )
415
- result = IMapIterator (self )
451
+ result = IMapIterator (self , buffersize )
416
452
self ._taskqueue .put (
417
453
(
418
- self ._guarded_task_generation (result ._job ,
419
- mapstar ,
420
- task_batches ),
421
- result ._set_length
422
- ))
454
+ self ._guarded_task_generation_lazy (result ._job ,
455
+ mapstar ,
456
+ task_batches ,
457
+ result ._lazy_task_gen_helper ),
458
+ result ._set_length ,
459
+ )
460
+ )
423
461
return (item for chunk in result for item in chunk )
424
462
425
- def imap_unordered (self , func , iterable , chunksize = 1 ):
463
+ def imap_unordered (self , func , iterable , chunksize = 1 , buffersize = None ):
426
464
'''
427
465
Like `imap()` method but ordering of results is arbitrary.
428
466
'''
429
467
self ._check_running ()
430
468
if chunksize == 1 :
431
- result = IMapUnorderedIterator (self )
469
+ result = IMapUnorderedIterator (self , buffersize )
432
470
self ._taskqueue .put (
433
471
(
434
- self ._guarded_task_generation (result ._job , func , iterable ),
435
- result ._set_length
436
- ))
472
+ self ._guarded_task_generation_lazy (result ._job ,
473
+ func ,
474
+ iterable ,
475
+ result ._lazy_task_gen_helper ),
476
+ result ._set_length ,
477
+ )
478
+ )
437
479
return result
438
480
else :
439
481
if chunksize < 1 :
440
482
raise ValueError (
441
483
"Chunksize must be 1+, not {0!r}" .format (chunksize ))
442
484
task_batches = Pool ._get_tasks (func , iterable , chunksize )
443
- result = IMapUnorderedIterator (self )
485
+ result = IMapUnorderedIterator (self , buffersize )
444
486
self ._taskqueue .put (
445
487
(
446
- self ._guarded_task_generation (result ._job ,
447
- mapstar ,
448
- task_batches ),
449
- result ._set_length
450
- ))
488
+ self ._guarded_task_generation_lazy (result ._job ,
489
+ mapstar ,
490
+ task_batches ,
491
+ result ._lazy_task_gen_helper ),
492
+ result ._set_length ,
493
+ )
494
+ )
451
495
return (item for chunk in result for item in chunk )
452
496
453
497
def apply_async (self , func , args = (), kwds = {}, callback = None ,
@@ -835,8 +879,7 @@ def _set(self, i, success_result):
835
879
#
836
880
837
881
class IMapIterator (object ):
838
-
839
- def __init__ (self , pool ):
882
+ def __init__ (self , pool , buffersize ):
840
883
self ._pool = pool
841
884
self ._cond = threading .Condition (threading .Lock ())
842
885
self ._job = next (job_counter )
@@ -846,6 +889,7 @@ def __init__(self, pool):
846
889
self ._length = None
847
890
self ._unsorted = {}
848
891
self ._cache [self ._job ] = self
892
+ self ._lazy_task_gen_helper = _LazyTaskGenHelper (buffersize , self ._cond )
849
893
850
894
def __iter__ (self ):
851
895
return self
@@ -866,6 +910,7 @@ def next(self, timeout=None):
866
910
self ._pool = None
867
911
raise StopIteration from None
868
912
raise TimeoutError from None
913
+ self ._lazy_task_gen_helper .tasks_finished += 1
869
914
870
915
success , value = item
871
916
if success :
@@ -914,6 +959,22 @@ def _set(self, i, obj):
914
959
del self ._cache [self ._job ]
915
960
self ._pool = None
916
961
962
+ #
963
+ # Class to store stats for lazy task generation and share them
964
+ # between the main thread and `_guarded_task_generation()` thread.
965
+ #
966
+ class _LazyTaskGenHelper (object ):
967
+ def __init__ (self , buffersize , iterator_cond ):
968
+ self .feature_enabled = buffersize is not None
969
+ self .buffersize = buffersize
970
+ self .tasks_generated = 0
971
+ self .tasks_finished = 0
972
+ self .iterator_cond = iterator_cond
973
+
974
+ @property
975
+ def not_finished_tasks (self ):
976
+ return self .tasks_generated - self .tasks_finished
977
+
917
978
#
918
979
#
919
980
#
0 commit comments