ndarray/parallel/
impl_par_methods.rs

1use crate::AssignElem;
2use crate::{Array, ArrayBase, DataMut, Dimension, IntoNdProducer, NdProducer, Zip};
3
4use super::send_producer::SendProducer;
5use crate::parallel::par::ParallelSplits;
6use crate::parallel::prelude::*;
7
8use crate::partial::Partial;
9
10/// # Parallel methods
11///
12/// These methods require crate feature `rayon`.
13impl<A, S, D> ArrayBase<S, D>
14where
15    S: DataMut<Elem = A>,
16    D: Dimension,
17    A: Send + Sync,
18{
19    /// Parallel version of `map_inplace`.
20    ///
21    /// Modify the array in place by calling `f` by mutable reference on each element.
22    ///
23    /// Elements are visited in arbitrary order.
24    pub fn par_map_inplace<F>(&mut self, f: F)
25    where F: Fn(&mut A) + Sync + Send
26    {
27        self.view_mut().into_par_iter().for_each(f)
28    }
29
30    /// Parallel version of `mapv_inplace`.
31    ///
32    /// Modify the array in place by calling `f` by **v**alue on each element.
33    /// The array is updated with the new values.
34    ///
35    /// Elements are visited in arbitrary order.
36    pub fn par_mapv_inplace<F>(&mut self, f: F)
37    where
38        F: Fn(A) -> A + Sync + Send,
39        A: Clone,
40    {
41        self.view_mut()
42            .into_par_iter()
43            .for_each(move |x| *x = f(x.clone()))
44    }
45}
46
47// Zip
48
49const COLLECT_MAX_SPLITS: usize = 10;
50
51macro_rules! zip_impl {
52    ($([$notlast:ident $($p:ident)*],)+) => {
53        $(
54        #[allow(non_snake_case)]
55        impl<D, $($p),*> Zip<($($p,)*), D>
56            where $($p::Item : Send , )*
57                  $($p : Send , )*
58                  D: Dimension,
59                  $($p: NdProducer<Dim=D> ,)*
60        {
61            /// The `par_for_each` method for `Zip`.
62            ///
63            /// This is a shorthand for using `.into_par_iter().for_each()` on
64            /// `Zip`.
65            ///
66            /// Requires crate feature `rayon`.
67            pub fn par_for_each<F>(self, function: F)
68                where F: Fn($($p::Item),*) + Sync + Send
69            {
70                self.into_par_iter().for_each(move |($($p,)*)| function($($p),*))
71            }
72
73            expand_if!(@bool [$notlast]
74
75            /// Map and collect the results into a new array, which has the same size as the
76            /// inputs.
77            ///
78            /// If all inputs are c- or f-order respectively, that is preserved in the output.
79            pub fn par_map_collect<R>(self, f: impl Fn($($p::Item,)* ) -> R + Sync + Send)
80                -> Array<R, D>
81                where R: Send
82            {
83                let mut output = self.uninitialized_for_current_layout::<R>();
84                let total_len = output.len();
85
86                // Create a parallel iterator that produces chunks of the zip with the output
87                // array.  It's crucial that both parts split in the same way, and in a way
88                // so that the chunks of the output are still contig.
89                //
90                // Use a raw view so that we can alias the output data here and in the partial
91                // result.
92                let splits = unsafe {
93                    ParallelSplits {
94                        iter: self.and(SendProducer::new(output.raw_view_mut().cast::<R>())),
95                        // Keep it from splitting the Zip down too small
96                        max_splits: COLLECT_MAX_SPLITS,
97                    }
98                };
99
100                let collect_result = splits.map(move |zip| {
101                    // Apply the mapping function on this chunk of the zip
102                    // Create a partial result for the contiguous slice of data being written to
103                    unsafe {
104                        zip.collect_with_partial(&f)
105                    }
106                })
107                .reduce(Partial::stub, Partial::try_merge);
108
109                if std::mem::needs_drop::<R>() {
110                    debug_assert_eq!(total_len, collect_result.len,
111                        "collect len is not correct, expected {}", total_len);
112                    assert!(collect_result.len == total_len,
113                        "Collect: Expected number of writes not completed");
114                }
115
116                // Here the collect result is complete, and we release its ownership and transfer
117                // it to the output array.
118                collect_result.release_ownership();
119                unsafe {
120                    output.assume_init()
121                }
122            }
123
124            /// Map and assign the results into the producer `into`, which should have the same
125            /// size as the other inputs.
126            ///
127            /// The producer should have assignable items as dictated by the `AssignElem` trait,
128            /// for example `&mut R`.
129            pub fn par_map_assign_into<R, Q>(self, into: Q, f: impl Fn($($p::Item,)* ) -> R + Sync + Send)
130                where Q: IntoNdProducer<Dim=D>,
131                      Q::Item: AssignElem<R> + Send,
132                      Q::Output: Send,
133            {
134                self.and(into)
135                    .par_for_each(move |$($p, )* output_| {
136                        output_.assign_elem(f($($p ),*));
137                    });
138            }
139
140            /// Parallel version of `fold`.
141            ///
142            /// Splits the producer in multiple tasks which each accumulate a single value
143            /// using the `fold` closure. Those tasks are executed in parallel and their results
144            /// are then combined to a single value using the `reduce` closure.
145            ///
146            /// The `identity` closure provides the initial values for each of the tasks and
147            /// for the final reduction.
148            ///
149            /// This is a shorthand for calling `self.into_par_iter().fold(...).reduce(...)`.
150            ///
151            /// Note that it is often more efficient to parallelize not per-element but rather
152            /// based on larger chunks of an array like generalized rows and operating on each chunk
153            /// using a sequential variant of the accumulation.
154            /// For example, sum each row sequentially and in parallel, taking advantage of locality
155            /// and vectorization within each task, and then reduce their sums to the sum of the matrix.
156            ///
157            /// Also note that the splitting of the producer into multiple tasks is _not_ deterministic
158            /// which needs to be considered when the accuracy of such an operation is analyzed.
159            ///
160            /// ## Examples
161            ///
162            /// ```rust
163            /// use ndarray::{Array, Zip};
164            ///
165            /// let a = Array::<usize, _>::ones((128, 1024));
166            /// let b = Array::<usize, _>::ones(128);
167            ///
168            /// let weighted_sum = Zip::from(a.rows()).and(&b).par_fold(
169            ///     || 0,
170            ///     |sum, row, factor| sum + row.sum() * factor,
171            ///     |sum, other_sum| sum + other_sum,
172            /// );
173            ///
174            /// assert_eq!(weighted_sum, a.len());
175            /// ```
176            pub fn par_fold<ID, F, R, T>(self, identity: ID, fold: F, reduce: R) -> T
177            where
178                ID: Fn() -> T + Send + Sync + Clone,
179                F: Fn(T, $($p::Item),*) -> T + Send + Sync,
180                R: Fn(T, T) -> T + Send + Sync,
181                T: Send
182            {
183                self.into_par_iter()
184                    .fold(identity.clone(), move |accumulator, ($($p,)*)| {
185                        fold(accumulator, $($p),*)
186                    })
187                    .reduce(identity, reduce)
188            }
189
190            );
191        }
192        )+
193    };
194}
195
196zip_impl! {
197    [true P1],
198    [true P1 P2],
199    [true P1 P2 P3],
200    [true P1 P2 P3 P4],
201    [true P1 P2 P3 P4 P5],
202    [false P1 P2 P3 P4 P5 P6],
203}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy