diff --git a/Cargo.toml b/Cargo.toml index 8de0064b4..81da3bd0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ num-traits = "0.2" num-complex = "0.2" itertools = { version = "0.7.0", default-features = false } +rayon = { version = "1.0.3", optional = true } + # Use via the `blas` crate feature! cblas-sys = { version = "0.1.4", optional = true, default-features = false } blas-src = { version = "0.2.0", optional = true, default-features = false } @@ -57,7 +59,7 @@ test-blas-openblas-sys = ["blas"] test = ["test-blas-openblas-sys"] # This feature is used for docs -docs = ["serde-1"] +docs = ["serde-1", "rayon"] [profile.release] [profile.bench] diff --git a/README.rst b/README.rst index 3e8959a9a..1fa147a62 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,11 @@ your `Cargo.toml`. - Optional, compatible with Rust stable - Enables serialization support for serde 1.0 +- ``rayon`` + + - Optional, compatible with Rust stable + - Enables parallel iterators, parallelized methods and ``par_azip!``. + - ``blas`` - Optional and experimental, compatible with Rust stable diff --git a/benches/par_rayon.rs b/benches/par_rayon.rs new file mode 100644 index 000000000..e207a65aa --- /dev/null +++ b/benches/par_rayon.rs @@ -0,0 +1,157 @@ +#![cfg(feature="rayon")] +#![feature(test)] + +extern crate rayon; + +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; + +extern crate test; +use test::Bencher; + +use ndarray::Zip; + +const EXP_N: usize = 256; +const ADDN: usize = 512; + +use std::cmp::max; + +fn set_threads() { + // Consider setting a fixed number of threads here, for example to avoid + // oversubscribing on hyperthreaded cores. + // let n = 4; + // let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global(); +} + +#[bench] +fn map_exp_regular(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((EXP_N, EXP_N)); + a.swap_axes(0, 1); + bench.iter(|| { + a.mapv_inplace(|x| x.exp()); + }); +} + +#[bench] +fn rayon_exp_regular(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((EXP_N, EXP_N)); + a.swap_axes(0, 1); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = x.exp()); + }); +} + +const FASTEXP: usize = EXP_N; + +#[inline] +fn fastexp(x: f64) -> f64 { + let x = 1. + x/1024.; + x.powi(1024) +} + +#[bench] +fn map_fastexp_regular(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.mapv_inplace(|x| fastexp(x)) + }); +} + +#[bench] +fn rayon_fastexp_regular(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x)); + }); +} + +#[bench] +fn map_fastexp_cut(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + let mut a = a.slice_mut(s![.., ..-1]); + bench.iter(|| { + a.mapv_inplace(|x| fastexp(x)) + }); +} + +#[bench] +fn rayon_fastexp_cut(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + let mut a = a.slice_mut(s![.., ..-1]); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x)); + }); +} + +#[bench] +fn map_fastexp_by_axis(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + for mut sheet in a.axis_iter_mut(Axis(0)) { + sheet.mapv_inplace(fastexp) + } + }); +} + +#[bench] +fn rayon_fastexp_by_axis(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.axis_iter_mut(Axis(0)).into_par_iter() + .for_each(|mut sheet| sheet.mapv_inplace(fastexp)); + }); +} + +#[bench] +fn rayon_fastexp_zip(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + Zip::from(&mut a).into_par_iter().for_each(|(elt, )| *elt = fastexp(*elt)); + }); +} + +#[bench] +fn add(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((ADDN, ADDN)); + let b = Array2::::zeros((ADDN, ADDN)); + let c = Array2::::zeros((ADDN, ADDN)); + let d = Array2::::zeros((ADDN, ADDN)); + bench.iter(|| { + azip!(mut a, b, c, d in { + *a += b.exp() + c.exp() + d.exp(); + }); + }); +} + +#[bench] +fn rayon_add(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((ADDN, ADDN)); + let b = Array2::::zeros((ADDN, ADDN)); + let c = Array2::::zeros((ADDN, ADDN)); + let d = Array2::::zeros((ADDN, ADDN)); + bench.iter(|| { + par_azip!(mut a, b, c, d in { + *a += b.exp() + c.exp() + d.exp(); + }); + }); +} diff --git a/scripts/all-tests.sh b/scripts/all-tests.sh index e2e00b9d6..5ad7306ab 100755 --- a/scripts/all-tests.sh +++ b/scripts/all-tests.sh @@ -11,7 +11,6 @@ cargo test --verbose --no-default-features cargo test --release --verbose --no-default-features cargo build --verbose --features "$FEATURES" cargo test --verbose --features "$FEATURES" -cargo test --manifest-path=parallel/Cargo.toml --verbose cargo test --manifest-path=serialization-tests/Cargo.toml --verbose cargo test --manifest-path=blas-tests/Cargo.toml --verbose CARGO_TARGET_DIR=target/ cargo test --manifest-path=numeric-tests/Cargo.toml --verbose diff --git a/src/lib.rs b/src/lib.rs index c56df3d02..1c884e0a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,11 +55,8 @@ //! needs matching memory layout to be efficient (with some exceptions). //! + Efficient floating point matrix multiplication even for very large //! matrices; can optionally use BLAS to improve it further. -//! + See also the [`ndarray-parallel`] crate for integration with rayon. //! - **Requires Rust 1.30** //! -//! [`ndarray-parallel`]: https://docs.rs/ndarray-parallel -//! //! ## Crate Feature Flags //! //! The following crate feature flags are available. They are configured in your @@ -68,6 +65,9 @@ //! - `serde-1` //! - Optional, compatible with Rust stable //! - Enables serialization support for serde 1.0 +//! - `rayon` +//! - Optional, compatible with Rust stable +//! - Enables parallel iterators, parallelized methods and [`par_azip!`]. //! - `blas` //! - Optional and experimental, compatible with Rust stable //! - Enable transparent BLAS support for matrix multiplication. @@ -87,6 +87,9 @@ #[cfg(feature = "serde-1")] extern crate serde; +#[cfg(feature="rayon")] +extern crate rayon; + #[cfg(feature="blas")] extern crate cblas_sys; #[cfg(feature="blas")] @@ -1333,6 +1336,10 @@ impl ArrayBase } +// parallel methods +#[cfg(feature="rayon")] +pub mod parallel; + mod impl_1d; mod impl_2d; mod impl_dyn; diff --git a/src/parallel/impl_par_methods.rs b/src/parallel/impl_par_methods.rs new file mode 100644 index 000000000..462eb8cc8 --- /dev/null +++ b/src/parallel/impl_par_methods.rs @@ -0,0 +1,85 @@ + +use { + Dimension, + NdProducer, + Zip, + ArrayBase, + DataMut, +}; + +use parallel::prelude::*; + + +/// # Parallel methods +/// +/// These methods require crate feature `rayon`. +impl ArrayBase + where S: DataMut, + D: Dimension, + A: Send + Sync, +{ + /// Parallel version of `map_inplace`. + /// + /// Modify the array in place by calling `f` by mutable reference on each element. + /// + /// Elements are visited in arbitrary order. + pub fn par_map_inplace(&mut self, f: F) + where F: Fn(&mut A) + Sync + Send + { + self.view_mut().into_par_iter().for_each(f) + } + + /// Parallel version of `mapv_inplace`. + /// + /// Modify the array in place by calling `f` by **v**alue on each element. + /// The array is updated with the new values. + /// + /// Elements are visited in arbitrary order. + pub fn par_mapv_inplace(&mut self, f: F) + where F: Fn(A) -> A + Sync + Send, + A: Clone, + { + self.view_mut().into_par_iter() + .for_each(move |x| *x = f(x.clone())) + } +} + + + + +// Zip + +macro_rules! zip_impl { + ($([$($p:ident)*],)+) => { + $( + #[allow(non_snake_case)] + impl Zip<($($p,)*), D> + where $($p::Item : Send , )* + $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* + { + /// The `par_apply` method for `Zip`. + /// + /// This is a shorthand for using `.into_par_iter().for_each()` on + /// `Zip`. + /// + /// Requires crate feature `rayon`. + pub fn par_apply(self, function: F) + where F: Fn($($p::Item),*) + Sync + Send + { + self.into_par_iter().for_each(move |($($p,)*)| function($($p),*)) + } + } + )+ + } +} + +zip_impl!{ + [P1], + [P1 P2], + [P1 P2 P3], + [P1 P2 P3 P4], + [P1 P2 P3 P4 P5], + [P1 P2 P3 P4 P5 P6], +} diff --git a/src/parallel/into_impls.rs b/src/parallel/into_impls.rs new file mode 100644 index 000000000..4d38939fd --- /dev/null +++ b/src/parallel/into_impls.rs @@ -0,0 +1,54 @@ +use {Array, ArcArray, Dimension, ArrayView, ArrayViewMut}; + +use super::prelude::IntoParallelIterator; +use super::Parallel; + +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a Array + where D: Dimension, + A: Sync +{ + type Item = &'a A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view().into_par_iter() + } +} + +// This is allowed: goes through `.view()` +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a ArcArray + where D: Dimension, + A: Sync +{ + type Item = &'a A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view().into_par_iter() + } +} + +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a mut Array + where D: Dimension, + A: Sync + Send +{ + type Item = &'a mut A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view_mut().into_par_iter() + } +} + +// This is allowed: goes through `.view_mut()`, which is unique access +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a mut ArcArray + where D: Dimension, + A: Sync + Send + Clone, +{ + type Item = &'a mut A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view_mut().into_par_iter() + } +} diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs new file mode 100644 index 000000000..dfed8a636 --- /dev/null +++ b/src/parallel/mod.rs @@ -0,0 +1,122 @@ +//! Parallelization features for ndarray. +//! +//! Parallelization features are based on the crate [rayon] and its parallel +//! iterators. Ndarray implements the parallel iterable traits for arrays +//! and array views, for some of its iterators and for [Zip]. +//! There are also directly parallelized methods on arrays and on [Zip]. +//! +//! This requires the crate feature `rayon` to be enabled. +//! +//! The following types implement parallel iterators, accessed using these +//! methods: +//! +//! - [`Array`], [`ArcArray`]: `.par_iter()` and `.par_iter_mut()` +//! - [`ArrayView`](ArrayView): `.into_par_iter()` +//! - [`ArrayViewMut`](ArrayViewMut): `.into_par_iter()` +//! - [`AxisIter`](iter::AxisIter), [`AxisIterMut`](iter::AxisIterMut): `.into_par_iter()` +//! - [`Zip`] `.into_par_iter()` +//! +//! The following other parallelized methods exist: +//! +//! - [`ArrayBase::par_map_inplace()`] +//! - [`ArrayBase::par_mapv_inplace()`] +//! - [`Zip::par_apply()`] (all arities) +//! +//! Note that you can use the parallel iterator for [Zip] to access all other +//! rayon parallel iterator methods. +//! +//! Only the axis iterators are indexed parallel iterators, the rest are all +//! “unindexed”. Use ndarray’s [Zip] for lock step parallel iteration of +//! multiple arrays or producers at a time. +//! +//! # Examples +//! +//! ## Arrays and array views +//! +//! Compute the exponential of each element in an array, parallelized. +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array2; +//! use ndarray::parallel::prelude::*; +//! +//! fn main() { +//! let mut a = Array2::::zeros((128, 128)); +//! +//! // Parallel versions of regular array methods +//! a.par_map_inplace(|x| *x = x.exp()); +//! a.par_mapv_inplace(f64::exp); +//! +//! // You can also use the parallel iterator directly +//! a.par_iter_mut().for_each(|x| *x = x.exp()); +//! } +//! ``` +//! +//! ## Axis iterators +//! +//! Use the parallel `.axis_iter()` to compute the sum of each row. +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array; +//! use ndarray::Axis; +//! use ndarray::parallel::prelude::*; +//! +//! fn main() { +//! let a = Array::linspace(0., 63., 64).into_shape((4, 16)).unwrap(); +//! let mut sums = Vec::new(); +//! a.axis_iter(Axis(0)) +//! .into_par_iter() +//! .map(|row| row.sum()) +//! .collect_into_vec(&mut sums); +//! +//! assert_eq!(sums, [120., 376., 632., 888.]); +//! } +//! ``` +//! +//! ## Zip +//! +//! Use zip for lock step function application across several arrays +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array3; +//! use ndarray::Zip; +//! +//! type Array3f64 = Array3; +//! +//! fn main() { +//! const N: usize = 128; +//! let a = Array3f64::from_elem((N, N, N), 1.); +//! let b = Array3f64::from_elem(a.dim(), 2.); +//! let mut c = Array3f64::zeros(a.dim()); +//! +//! Zip::from(&mut c) +//! .and(&a) +//! .and(&b) +//! .par_apply(|c, &a, &b| { +//! *c += a - b; +//! }); +//! } +//! ``` + + +/// Into- traits for creating parallelized iterators and/or using [`par_azip!`] +pub mod prelude { + #[doc(no_inline)] + pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator, + IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator}; + + pub use super::par_azip; +} + +pub use self::par::Parallel; +pub use par_azip; + +mod par; +mod impl_par_methods; +mod into_impls; +mod zipmacro; diff --git a/src/parallel/par.rs b/src/parallel/par.rs new file mode 100644 index 000000000..c5d2da0bf --- /dev/null +++ b/src/parallel/par.rs @@ -0,0 +1,278 @@ + +use rayon::iter::ParallelIterator; +use rayon::prelude::IntoParallelIterator; +use rayon::iter::IndexedParallelIterator; +use rayon::iter::plumbing::{Consumer, UnindexedConsumer}; +use rayon::iter::plumbing::bridge; +use rayon::iter::plumbing::ProducerCallback; +use rayon::iter::plumbing::Producer; +use rayon::iter::plumbing::UnindexedProducer; +use rayon::iter::plumbing::bridge_unindexed; +use rayon::iter::plumbing::Folder; + +use iter::AxisIter; +use iter::AxisIterMut; +use {Dimension}; +use {ArrayView, ArrayViewMut}; + +/// Parallel iterator wrapper. +#[derive(Copy, Clone, Debug)] +pub struct Parallel { + iter: I, +} + +/// Parallel producer wrapper. +#[derive(Copy, Clone, Debug)] +struct ParallelProducer(I); + +macro_rules! par_iter_wrapper { + // thread_bounds are either Sync or Send + Sync + ($iter_name:ident, [$($thread_bounds:tt)*]) => { + /// Requires crate feature `rayon`. + impl<'a, A, D> IntoParallelIterator for $iter_name<'a, A, D> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = ::Item; + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + impl<'a, A, D> ParallelIterator for Parallel<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$iter_name<'a, A, D> as Iterator>::Item; + fn drive_unindexed(self, consumer: C) -> C::Result + where C: UnindexedConsumer + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.iter.len()) + } + } + + impl<'a, A, D> IndexedParallelIterator for Parallel<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + fn with_producer(self, callback: Cb) -> Cb::Output + where Cb: ProducerCallback + { + callback.callback(ParallelProducer(self.iter)) + } + + fn len(&self) -> usize { + ExactSizeIterator::len(&self.iter) + } + + fn drive(self, consumer: C) -> C::Result + where C: Consumer + { + bridge(self, consumer) + } + } + + impl<'a, A, D> IntoIterator for ParallelProducer<$iter_name<'a, A, D>> + where D: Dimension, + { + type IntoIter = $iter_name<'a, A, D>; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0 + } + } + + // This is the real magic, I guess + impl<'a, A, D> Producer for ParallelProducer<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type IntoIter = $iter_name<'a, A, D>; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0 + } + + fn split_at(self, i: usize) -> (Self, Self) { + let (a, b) = self.0.split_at(i); + (ParallelProducer(a), ParallelProducer(b)) + } + } + + } +} + + +par_iter_wrapper!(AxisIter, [Sync]); +par_iter_wrapper!(AxisIterMut, [Send + Sync]); + + + +macro_rules! par_iter_view_wrapper { + // thread_bounds are either Sync or Send + Sync + ($view_name:ident, [$($thread_bounds:tt)*]) => { + /// Requires crate feature `rayon`. + impl<'a, A, D> IntoParallelIterator for $view_name<'a, A, D> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = ::Item; + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + + impl<'a, A, D> ParallelIterator for Parallel<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + fn drive_unindexed(self, consumer: C) -> C::Result + where C: UnindexedConsumer + { + bridge_unindexed(ParallelProducer(self.iter), consumer) + } + + fn opt_len(&self) -> Option { + None + } + } + + impl<'a, A, D> UnindexedProducer for ParallelProducer<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + fn split(self) -> (Self, Option) { + if self.0.len() <= 1 { + return (self, None) + } + let array = self.0; + let max_axis = array.max_stride_axis(); + let mid = array.len_of(max_axis) / 2; + let (a, b) = array.split_at(max_axis, mid); + (ParallelProducer(a), Some(ParallelProducer(b))) + } + + fn fold_with(self, folder: F) -> F + where F: Folder, + { + self.into_iter().fold(folder, move |f, elt| f.consume(elt)) + } + } + + impl<'a, A, D> IntoIterator for ParallelProducer<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + type IntoIter = <$view_name<'a, A, D> as IntoIterator>::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } + } + + } +} + +par_iter_view_wrapper!(ArrayView, [Sync]); +par_iter_view_wrapper!(ArrayViewMut, [Sync + Send]); + + +use {Zip, NdProducer, FoldWhile}; + +macro_rules! zip_impl { + ($([$($p:ident)*],)+) => { + $( + /// Requires crate feature `rayon`. + #[allow(non_snake_case)] + impl IntoParallelIterator for Zip<($($p,)*), D> + where $($p::Item : Send , )* + $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* + { + type Item = ($($p::Item ,)*); + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + #[allow(non_snake_case)] + impl ParallelIterator for Parallel> + where $($p::Item : Send , )* + $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* + { + type Item = ($($p::Item ,)*); + + fn drive_unindexed(self, consumer: Cons) -> Cons::Result + where Cons: UnindexedConsumer + { + bridge_unindexed(ParallelProducer(self.iter), consumer) + } + + fn opt_len(&self) -> Option { + None + } + } + + #[allow(non_snake_case)] + impl UnindexedProducer for ParallelProducer> + where $($p : Send , )* + $($p::Item : Send , )* + D: Dimension, + $($p: NdProducer ,)* + { + type Item = ($($p::Item ,)*); + + fn split(self) -> (Self, Option) { + if self.0.size() <= 1 { + return (self, None) + } + let (a, b) = self.0.split(); + (ParallelProducer(a), Some(ParallelProducer(b))) + } + + fn fold_with(self, folder: Fold) -> Fold + where Fold: Folder, + { + self.0.fold_while(folder, |mut folder, $($p),*| { + folder = folder.consume(($($p ,)*)); + if folder.full() { + FoldWhile::Done(folder) + } else { + FoldWhile::Continue(folder) + } + }).into_inner() + } + } + )+ + } +} + +zip_impl!{ + [P1], + [P1 P2], + [P1 P2 P3], + [P1 P2 P3 P4], + [P1 P2 P3 P4 P5], + [P1 P2 P3 P4 P5 P6], +} diff --git a/src/parallel/zipmacro.rs b/src/parallel/zipmacro.rs new file mode 100644 index 000000000..a61ae88d4 --- /dev/null +++ b/src/parallel/zipmacro.rs @@ -0,0 +1,106 @@ +// Copyright 2017 bluss and ndarray developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_export] +/// Parallelized array zip macro: lock step function application across several +/// arrays and producers. +/// +/// This is a version of the [`azip`] macro that requires the crate feature +/// `rayon` to be enabled. +/// +/// This example: +/// +/// ```rust,ignore +/// par_azip!(mut a, b, c in { *a = b + c }) +/// ``` +/// +/// Is equivalent to: +/// +/// ```rust,ignore +/// Zip::from(&mut a).and(&b).and(&c).par_apply(|a, &b, &c| { +/// *a = b + c; +/// }); +/// ``` +/// +/// **Panics** if any of the arrays are not of the same shape. +/// +/// ## Examples +/// +/// ```rust +/// extern crate ndarray; +/// +/// use ndarray::Array2; +/// use ndarray::parallel::par_azip; +/// +/// type M = Array2; +/// +/// fn main() { +/// let mut a = M::zeros((16, 16)); +/// let b = M::from_elem(a.dim(), 1.); +/// let c = M::from_elem(a.dim(), 2.); +/// +/// // Compute a simple ternary operation: +/// // elementwise addition of b and c, stored in a +/// +/// par_azip!(mut a, b, c in { *a = b + c }); +/// +/// assert_eq!(a, &b + &c); +/// } +/// ``` +macro_rules! par_azip { + // Build Zip Rule (index) + (@parse [index => $a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { + $crate::par_azip!(@finish ($crate::Zip::indexed($a)) [$($aa,)*] $t1 in $t2) + }; + // Build Zip Rule (no index) + (@parse [$a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { + $crate::par_azip!(@finish ($crate::Zip::from($a)) [$($aa,)*] $t1 in $t2) + }; + // Build Finish Rule (both) + (@finish ($z:expr) [$($aa:expr,)*] [$($p:pat,)+] in { $($t:tt)*}) => { + use $crate::parallel::prelude::*; + #[allow(unused_mut)] + ($z) + $( + .and($aa) + )* + .par_apply(|$($p),+| { + $($t)* + }) + }; + // parsing stack: [expressions] [patterns] (one per operand) + // index uses empty [] -- must be first + (@parse [] [] index $i:pat, $($t:tt)*) => { + $crate::par_azip!(@parse [index =>] [$i,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident ($e:expr) $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* mut $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* &mut $x,] [$($pats)* mut $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] , $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)*] [$($pats)*] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident ($e:expr) $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* &$x,] [$($pats)* $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident ($e:expr) $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* &$x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident $($t:tt)*) => { + $crate::par_azip!(@parse [$($exprs)* &$x,] [$($pats)* &$x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $($t:tt)*) => { }; + ($($t:tt)*) => { + $crate::par_azip!(@parse [] [] $($t)*); + } +} diff --git a/src/zip/mod.rs b/src/zip/mod.rs index 366a85c0c..877c97a20 100644 --- a/src/zip/mod.rs +++ b/src/zip/mod.rs @@ -708,7 +708,10 @@ macro_rules! map_impl { ($([$notlast:ident $($p:ident)*],)+) => { $( #[allow(non_snake_case)] - impl),*> Zip<($($p,)*), D> { + impl Zip<($($p,)*), D> + where D: Dimension, + $($p: NdProducer ,)* + { /// Apply a function to all elements of the input arrays, /// visiting elements in lock step. pub fn apply(mut self, mut function: F) diff --git a/tests/par_azip.rs b/tests/par_azip.rs new file mode 100644 index 000000000..93caee62e --- /dev/null +++ b/tests/par_azip.rs @@ -0,0 +1,68 @@ +#![cfg(feature="rayon")] + +#[macro_use] +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; +use itertools::{enumerate}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +#[test] +fn test_par_azip1() { + let mut a = Array::zeros(62); + let b = Array::from_elem(62, 42); + par_azip!(mut a in { *a = 42 }); + assert_eq!(a, b); +} + +#[test] +fn test_par_azip2() { + let mut a = Array::zeros((5, 7)); + let b = Array::from_shape_fn(a.dim(), |(i, j)| 1. / (i + 2*j) as f32); + par_azip!(mut a, b in { *a = b; }); + assert_eq!(a, b); +} + +#[test] +fn test_par_azip3() { + let mut a = [0.; 32]; + let mut b = [0.; 32]; + let mut c = [0.; 32]; + for (i, elt) in enumerate(&mut b) { + *elt = i as f32; + } + + par_azip!(mut a (&mut a[..]), b (&b[..]), mut c (&mut c[..]) in { + *a += b / 10.; + *c = a.sin(); + }); + let res = Array::linspace(0., 3.1, 32).mapv_into(f32::sin); + assert!(res.all_close(&ArrayView::from(&c), 1e-4)); +} + +#[should_panic] +#[test] +fn test_zip_dim_mismatch_1() { + let mut a = Array::zeros((5, 7)); + let mut d = a.raw_dim(); + d[0] += 1; + let b = Array::from_shape_fn(d, |(i, j)| 1. / (i + 2*j) as f32); + par_azip!(mut a, b in { *a = b; }); +} + +#[test] +fn test_indices_1() { + let mut a1 = Array::default(12); + for (i, elt) in a1.indexed_iter_mut() { + *elt = i; + } + + let count = AtomicUsize::new(0); + par_azip!(index i, elt (&a1) in { + count.fetch_add(1, Ordering::SeqCst); + assert_eq!(elt, i); + }); + assert_eq!(count.load(Ordering::SeqCst), a1.len()); +} diff --git a/tests/par_rayon.rs b/tests/par_rayon.rs new file mode 100644 index 000000000..3f457c46f --- /dev/null +++ b/tests/par_rayon.rs @@ -0,0 +1,55 @@ +#![cfg(feature="rayon")] + +extern crate rayon; + +#[macro_use] +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; + +const M: usize = 1024 * 10; +const N: usize = 100; + +#[test] +fn test_axis_iter() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + assert_eq!(a.axis_iter(Axis(0)).len(), M); + let s: f64 = a.axis_iter(Axis(0)).into_par_iter().map(|x| x.sum()).sum(); + println!("{:?}", a.slice(s![..10, ..5])); + assert_eq!(s, a.sum()); +} + +#[test] +fn test_axis_iter_mut() { + let mut a = Array::linspace(0., 1.0f64, M * N).into_shape((M, N)).unwrap(); + let b = a.mapv(|x| x.exp()); + a.axis_iter_mut(Axis(0)).into_par_iter().for_each(|mut v| v.mapv_inplace(|x| x.exp())); + println!("{:?}", a.slice(s![..10, ..5])); + assert!(a.all_close(&b, 0.001)); +} + +#[test] +fn test_regular_iter() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + let s: f64 = a.view().into_par_iter().map(|&x| x).sum(); + println!("{:?}", a.slice(s![..10, ..5])); + assert_eq!(s, a.sum()); +} + +#[test] +fn test_regular_iter_collect() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + let v = a.view().into_par_iter().map(|&x| x).collect::>(); + assert_eq!(v.len(), a.len()); +} diff --git a/tests/par_zip.rs b/tests/par_zip.rs new file mode 100644 index 000000000..9cba9888c --- /dev/null +++ b/tests/par_zip.rs @@ -0,0 +1,83 @@ +#![cfg(feature="rayon")] + +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; + +use ndarray::Zip; + +const M: usize = 1024 * 10; +const N: usize = 100; + +#[test] +fn test_zip_1() { + let mut a = Array2::::zeros((M, N)); + + Zip::from(&mut a) + .par_apply(|x| { + *x = x.exp() + }); +} + +#[test] +fn test_zip_index_1() { + let mut a = Array2::default((10, 10)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_2() { + let mut a = Array2::default((M, N)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_3() { + let mut a = Array::default((1, 2, 1, 2, 3)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_4() { + let mut a = Array2::zeros((M, N)); + let mut b = Array2::zeros((M, N)); + + Zip::indexed(&mut a) + .and(&mut b) + .par_apply(|(i, j), x, y| { + *x = i; + *y = j; + }); + + for ((i, _), elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } + for ((_, j), elt) in b.indexed_iter() { + assert_eq!(*elt, j); + } +}