ndarray/parallel/impl_par_methods.rs
1use crate::AssignElem;
2use crate::{Array, ArrayBase, DataMut, Dimension, IntoNdProducer, NdProducer, Zip};
3
4use super::send_producer::SendProducer;
5use crate::parallel::par::ParallelSplits;
6use crate::parallel::prelude::*;
7
8use crate::partial::Partial;
9
10/// # Parallel methods
11///
12/// These methods require crate feature `rayon`.
13impl<A, S, D> ArrayBase<S, D>
14where
15 S: DataMut<Elem = A>,
16 D: Dimension,
17 A: Send + Sync,
18{
19 /// Parallel version of `map_inplace`.
20 ///
21 /// Modify the array in place by calling `f` by mutable reference on each element.
22 ///
23 /// Elements are visited in arbitrary order.
24 pub fn par_map_inplace<F>(&mut self, f: F)
25 where F: Fn(&mut A) + Sync + Send
26 {
27 self.view_mut().into_par_iter().for_each(f)
28 }
29
30 /// Parallel version of `mapv_inplace`.
31 ///
32 /// Modify the array in place by calling `f` by **v**alue on each element.
33 /// The array is updated with the new values.
34 ///
35 /// Elements are visited in arbitrary order.
36 pub fn par_mapv_inplace<F>(&mut self, f: F)
37 where
38 F: Fn(A) -> A + Sync + Send,
39 A: Clone,
40 {
41 self.view_mut()
42 .into_par_iter()
43 .for_each(move |x| *x = f(x.clone()))
44 }
45}
46
47// Zip
48
49const COLLECT_MAX_SPLITS: usize = 10;
50
51macro_rules! zip_impl {
52 ($([$notlast:ident $($p:ident)*],)+) => {
53 $(
54 #[allow(non_snake_case)]
55 impl<D, $($p),*> Zip<($($p,)*), D>
56 where $($p::Item : Send , )*
57 $($p : Send , )*
58 D: Dimension,
59 $($p: NdProducer<Dim=D> ,)*
60 {
61 /// The `par_for_each` method for `Zip`.
62 ///
63 /// This is a shorthand for using `.into_par_iter().for_each()` on
64 /// `Zip`.
65 ///
66 /// Requires crate feature `rayon`.
67 pub fn par_for_each<F>(self, function: F)
68 where F: Fn($($p::Item),*) + Sync + Send
69 {
70 self.into_par_iter().for_each(move |($($p,)*)| function($($p),*))
71 }
72
73 expand_if!(@bool [$notlast]
74
75 /// Map and collect the results into a new array, which has the same size as the
76 /// inputs.
77 ///
78 /// If all inputs are c- or f-order respectively, that is preserved in the output.
79 pub fn par_map_collect<R>(self, f: impl Fn($($p::Item,)* ) -> R + Sync + Send)
80 -> Array<R, D>
81 where R: Send
82 {
83 let mut output = self.uninitialized_for_current_layout::<R>();
84 let total_len = output.len();
85
86 // Create a parallel iterator that produces chunks of the zip with the output
87 // array. It's crucial that both parts split in the same way, and in a way
88 // so that the chunks of the output are still contig.
89 //
90 // Use a raw view so that we can alias the output data here and in the partial
91 // result.
92 let splits = unsafe {
93 ParallelSplits {
94 iter: self.and(SendProducer::new(output.raw_view_mut().cast::<R>())),
95 // Keep it from splitting the Zip down too small
96 max_splits: COLLECT_MAX_SPLITS,
97 }
98 };
99
100 let collect_result = splits.map(move |zip| {
101 // Apply the mapping function on this chunk of the zip
102 // Create a partial result for the contiguous slice of data being written to
103 unsafe {
104 zip.collect_with_partial(&f)
105 }
106 })
107 .reduce(Partial::stub, Partial::try_merge);
108
109 if std::mem::needs_drop::<R>() {
110 debug_assert_eq!(total_len, collect_result.len,
111 "collect len is not correct, expected {}", total_len);
112 assert!(collect_result.len == total_len,
113 "Collect: Expected number of writes not completed");
114 }
115
116 // Here the collect result is complete, and we release its ownership and transfer
117 // it to the output array.
118 collect_result.release_ownership();
119 unsafe {
120 output.assume_init()
121 }
122 }
123
124 /// Map and assign the results into the producer `into`, which should have the same
125 /// size as the other inputs.
126 ///
127 /// The producer should have assignable items as dictated by the `AssignElem` trait,
128 /// for example `&mut R`.
129 pub fn par_map_assign_into<R, Q>(self, into: Q, f: impl Fn($($p::Item,)* ) -> R + Sync + Send)
130 where Q: IntoNdProducer<Dim=D>,
131 Q::Item: AssignElem<R> + Send,
132 Q::Output: Send,
133 {
134 self.and(into)
135 .par_for_each(move |$($p, )* output_| {
136 output_.assign_elem(f($($p ),*));
137 });
138 }
139
140 /// Parallel version of `fold`.
141 ///
142 /// Splits the producer in multiple tasks which each accumulate a single value
143 /// using the `fold` closure. Those tasks are executed in parallel and their results
144 /// are then combined to a single value using the `reduce` closure.
145 ///
146 /// The `identity` closure provides the initial values for each of the tasks and
147 /// for the final reduction.
148 ///
149 /// This is a shorthand for calling `self.into_par_iter().fold(...).reduce(...)`.
150 ///
151 /// Note that it is often more efficient to parallelize not per-element but rather
152 /// based on larger chunks of an array like generalized rows and operating on each chunk
153 /// using a sequential variant of the accumulation.
154 /// For example, sum each row sequentially and in parallel, taking advantage of locality
155 /// and vectorization within each task, and then reduce their sums to the sum of the matrix.
156 ///
157 /// Also note that the splitting of the producer into multiple tasks is _not_ deterministic
158 /// which needs to be considered when the accuracy of such an operation is analyzed.
159 ///
160 /// ## Examples
161 ///
162 /// ```rust
163 /// use ndarray::{Array, Zip};
164 ///
165 /// let a = Array::<usize, _>::ones((128, 1024));
166 /// let b = Array::<usize, _>::ones(128);
167 ///
168 /// let weighted_sum = Zip::from(a.rows()).and(&b).par_fold(
169 /// || 0,
170 /// |sum, row, factor| sum + row.sum() * factor,
171 /// |sum, other_sum| sum + other_sum,
172 /// );
173 ///
174 /// assert_eq!(weighted_sum, a.len());
175 /// ```
176 pub fn par_fold<ID, F, R, T>(self, identity: ID, fold: F, reduce: R) -> T
177 where
178 ID: Fn() -> T + Send + Sync + Clone,
179 F: Fn(T, $($p::Item),*) -> T + Send + Sync,
180 R: Fn(T, T) -> T + Send + Sync,
181 T: Send
182 {
183 self.into_par_iter()
184 .fold(identity.clone(), move |accumulator, ($($p,)*)| {
185 fold(accumulator, $($p),*)
186 })
187 .reduce(identity, reduce)
188 }
189
190 );
191 }
192 )+
193 };
194}
195
196zip_impl! {
197 [true P1],
198 [true P1 P2],
199 [true P1 P2 P3],
200 [true P1 P2 P3 P4],
201 [true P1 P2 P3 P4 P5],
202 [false P1 P2 P3 P4 P5 P6],
203}