From ce457262c2209922da16c000ebe3cf5adcd7c7ee Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 18:55:14 +0100 Subject: [PATCH 01/24] FEAT: Add ndarray::parallel module and optional feature rayon Put the parallel module down in the crate file, to fix inherent method order. The order in docs is important and is decided by the order of the modules in the main crate file. --- Cargo.toml | 2 + src/lib.rs | 7 + src/parallel/ext_traits.rs | 83 +++++++++++ src/parallel/into_impls.rs | 50 +++++++ src/parallel/into_traits.rs | 42 ++++++ src/parallel/mod.rs | 126 +++++++++++++++++ src/parallel/par.rs | 270 ++++++++++++++++++++++++++++++++++++ src/parallel/zipmacro.rs | 104 ++++++++++++++ 8 files changed, 684 insertions(+) create mode 100644 src/parallel/ext_traits.rs create mode 100644 src/parallel/into_impls.rs create mode 100644 src/parallel/into_traits.rs create mode 100644 src/parallel/mod.rs create mode 100644 src/parallel/par.rs create mode 100644 src/parallel/zipmacro.rs diff --git a/Cargo.toml b/Cargo.toml index 8de0064b4..6246b78df 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,8 @@ num-traits = "0.2" num-complex = "0.2" itertools = { version = "0.7.0", default-features = false } +rayon = { version = "1.0.3", optional = true } + # Use via the `blas` crate feature! cblas-sys = { version = "0.1.4", optional = true, default-features = false } blas-src = { version = "0.2.0", optional = true, default-features = false } diff --git a/src/lib.rs b/src/lib.rs index c56df3d02..4e872e0ba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -87,6 +87,9 @@ #[cfg(feature = "serde-1")] extern crate serde; +#[cfg(feature="rayon")] +extern crate rayon; + #[cfg(feature="blas")] extern crate cblas_sys; #[cfg(feature="blas")] @@ -1350,6 +1353,10 @@ mod impl_views; // Array raw view methods mod impl_raw_views; +// parallel methods +#[cfg(feature="rayon")] +pub mod parallel; + /// A contiguous array shape of n dimensions. /// /// Either c- or f- memory ordered (*c* a.k.a *row major* is the default). diff --git a/src/parallel/ext_traits.rs b/src/parallel/ext_traits.rs new file mode 100644 index 000000000..b649cf82c --- /dev/null +++ b/src/parallel/ext_traits.rs @@ -0,0 +1,83 @@ + +use { + Dimension, + NdProducer, + Zip, + ArrayBase, + DataMut, +}; + +use parallel::prelude::*; + +// Arrays + +/// Parallel versions of `map_inplace` and `mapv_inplace`. +pub trait ParMap { + type Item; + fn par_map_inplace(&mut self, f: F) + where F: Fn(&mut Self::Item) + Sync + Send; + fn par_mapv_inplace(&mut self, f: F) + where F: Fn(Self::Item) -> Self::Item + Sync + Send, + Self::Item: Clone; +} + +impl ParMap for ArrayBase + where S: DataMut, + D: Dimension, + A: Send + Sync, +{ + type Item = A; + fn par_map_inplace(&mut self, f: F) + where F: Fn(&mut Self::Item) + Sync + Send + { + self.view_mut().into_par_iter().for_each(f) + } + fn par_mapv_inplace(&mut self, f: F) + where F: Fn(Self::Item) -> Self::Item + Sync + Send, + Self::Item: Clone + { + self.view_mut().into_par_iter() + .for_each(move |x| *x = f(x.clone())) + } +} + + + + +// Zip + +macro_rules! zip_impl { + ($([$name:ident $($p:ident)*],)+) => { + $( + /// The `par_apply` method for `Zip`. + /// + /// This is a shorthand for using `.into_par_iter().for_each()` on + /// `Zip`. + pub trait $name<$($p),*> { + fn par_apply(self, function: F) + where F: Fn($($p),*) + Sync + Send; + } + + #[allow(non_snake_case)] + impl),*> $name<$($p::Item),*> for Zip<($($p,)*), Dim> + where $($p::Item : Send , )* + $($p : Send , )* + { + fn par_apply(self, function: F) + where F: Fn($($p::Item),*) + Sync + Send + { + self.into_par_iter().for_each(move |($($p,)*)| function($($p),*)) + } + } + )+ + } +} + +zip_impl!{ + [ParApply1 P1], + [ParApply2 P1 P2], + [ParApply3 P1 P2 P3], + [ParApply4 P1 P2 P3 P4], + [ParApply5 P1 P2 P3 P4 P5], + [ParApply6 P1 P2 P3 P4 P5 P6], +} diff --git a/src/parallel/into_impls.rs b/src/parallel/into_impls.rs new file mode 100644 index 000000000..02e3f5f8c --- /dev/null +++ b/src/parallel/into_impls.rs @@ -0,0 +1,50 @@ +use {Array, RcArray, Dimension, ArrayView, ArrayViewMut}; + +use super::NdarrayIntoParallelIterator; +use super::Parallel; + +impl<'a, A, D> NdarrayIntoParallelIterator for &'a Array + where D: Dimension, + A: Sync +{ + type Item = &'a A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view().into_par_iter() + } +} + +// This is allowed: goes through `.view()` +impl<'a, A, D> NdarrayIntoParallelIterator for &'a RcArray + where D: Dimension, + A: Sync +{ + type Item = &'a A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view().into_par_iter() + } +} + +impl<'a, A, D> NdarrayIntoParallelIterator for &'a mut Array + where D: Dimension, + A: Sync + Send +{ + type Item = &'a mut A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view_mut().into_par_iter() + } +} + +// This is allowed: goes through `.view_mut()`, which is unique access +impl<'a, A, D> NdarrayIntoParallelIterator for &'a mut RcArray + where D: Dimension, + A: Sync + Send + Clone, +{ + type Item = &'a mut A; + type Iter = Parallel>; + fn into_par_iter(self) -> Self::Iter { + self.view_mut().into_par_iter() + } +} diff --git a/src/parallel/into_traits.rs b/src/parallel/into_traits.rs new file mode 100644 index 000000000..9abc630ba --- /dev/null +++ b/src/parallel/into_traits.rs @@ -0,0 +1,42 @@ + +use rayon::iter::ParallelIterator; + +pub trait NdarrayIntoParallelIterator { + type Iter: ParallelIterator; + type Item: Send; + fn into_par_iter(self) -> Self::Iter; +} + +pub trait NdarrayIntoParallelRefIterator<'x> { + type Iter: ParallelIterator; + type Item: Send + 'x; + fn par_iter(&'x self) -> Self::Iter; +} + +pub trait NdarrayIntoParallelRefMutIterator<'x> { + type Iter: ParallelIterator; + type Item: Send + 'x; + fn par_iter_mut(&'x mut self) -> Self::Iter; +} + +impl<'data, I: 'data + ?Sized> NdarrayIntoParallelRefIterator<'data> for I + where &'data I: NdarrayIntoParallelIterator +{ + type Iter = <&'data I as NdarrayIntoParallelIterator>::Iter; + type Item = <&'data I as NdarrayIntoParallelIterator>::Item; + + fn par_iter(&'data self) -> Self::Iter { + self.into_par_iter() + } +} + +impl<'data, I: 'data + ?Sized> NdarrayIntoParallelRefMutIterator<'data> for I + where &'data mut I: NdarrayIntoParallelIterator +{ + type Iter = <&'data mut I as NdarrayIntoParallelIterator>::Iter; + type Item = <&'data mut I as NdarrayIntoParallelIterator>::Item; + + fn par_iter_mut(&'data mut self) -> Self::Iter { + self.into_par_iter() + } +} diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs new file mode 100644 index 000000000..f9e95e1ee --- /dev/null +++ b/src/parallel/mod.rs @@ -0,0 +1,126 @@ +//! Parallelization features for ndarray. +//! +//! The array views and references to owned arrays all implement +//! `NdarrayIntoParallelIterator` (*); the default parallel iterators (each element +//! by reference or mutable reference) have no ordering guarantee in their +//! parallel implementations. +//! +//! `.axis_iter()` and `.axis_iter_mut()` also have parallel counterparts, +//! and their parallel iterators are indexed (and thus ordered) and exact length. +//! +//! `Zip` also implements `NdarrayIntoParallelIterator`, and there is an +//! extension trait so that it can use a method `.par_apply` directly. +//! +//! (*) This regime of a custom trait instead of rayon’s own is since we +//! use this intermediate ndarray-parallel crate. +//! +//! # Examples +//! +//! +//! ## Arrays and array views +//! +//! Compute the exponential of each element in an array, parallelized. +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array2; +//! use ndarray::parallel::prelude::*; +//! +//! fn main() { +//! let mut a = Array2::::zeros((128, 128)); +//! +//! // Parallel versions of regular array methods (ParMap trait) +//! a.par_map_inplace(|x| *x = x.exp()); +//! a.par_mapv_inplace(f64::exp); +//! +//! // You can also use the parallel iterator directly +//! a.par_iter_mut().for_each(|x| *x = x.exp()); +//! } +//! ``` +//! +//! ## Axis iterators +//! +//! Use the parallel `.axis_iter()` to compute the sum of each row. +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array; +//! use ndarray::Axis; +//! use ndarray::parallel::prelude::*; +//! +//! fn main() { +//! let a = Array::linspace(0., 63., 64).into_shape((4, 16)).unwrap(); +//! let mut sums = Vec::new(); +//! a.axis_iter(Axis(0)) +//! .into_par_iter() +//! .map(|row| row.sum()) +//! .collect_into_vec(&mut sums); +//! +//! assert_eq!(sums, [120., 376., 632., 888.]); +//! } +//! ``` +//! +//! ## Zip +//! +//! Use zip for lock step function application across several arrays +//! +//! ``` +//! extern crate ndarray; +//! +//! use ndarray::Array3; +//! use ndarray::Zip; +//! use ndarray::parallel::prelude::*; +//! +//! type Array3f64 = Array3; +//! +//! fn main() { +//! const N: usize = 128; +//! let a = Array3f64::from_elem((N, N, N), 1.); +//! let b = Array3f64::from_elem(a.dim(), 2.); +//! let mut c = Array3f64::zeros(a.dim()); +//! +//! Zip::from(&mut c) +//! .and(&a) +//! .and(&b) +//! .par_apply(|c, &a, &b| { +//! *c += a - b; +//! }); +//! } +//! ``` + + +/// Into- traits for creating parallelized iterators. +pub mod prelude { + // happy and insane; ignorance is bluss + pub use super::NdarrayIntoParallelIterator; + pub use super::NdarrayIntoParallelRefIterator; + pub use super::NdarrayIntoParallelRefMutIterator; + + #[doc(no_inline)] + pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator}; + + pub use super::ext_traits::{ + ParApply1, + ParApply2, + ParApply3, + ParApply4, + ParApply5, + ParApply6, + }; + pub use super::ext_traits::ParMap; +} + +pub use self::par::Parallel; +pub use self::into_traits::{ + NdarrayIntoParallelIterator, + NdarrayIntoParallelRefIterator, + NdarrayIntoParallelRefMutIterator, +}; + +mod par; +mod ext_traits; +mod into_traits; +mod into_impls; +mod zipmacro; diff --git a/src/parallel/par.rs b/src/parallel/par.rs new file mode 100644 index 000000000..b7bcf4d5e --- /dev/null +++ b/src/parallel/par.rs @@ -0,0 +1,270 @@ + +use rayon::iter::ParallelIterator; +use rayon::iter::IndexedParallelIterator; +use rayon::iter::plumbing::{Consumer, UnindexedConsumer}; +use rayon::iter::plumbing::bridge; +use rayon::iter::plumbing::ProducerCallback; +use rayon::iter::plumbing::Producer; +use rayon::iter::plumbing::UnindexedProducer; +use rayon::iter::plumbing::bridge_unindexed; +use rayon::iter::plumbing::Folder; + +use iter::AxisIter; +use iter::AxisIterMut; +use {Dimension}; +use {ArrayView, ArrayViewMut}; + +use super::NdarrayIntoParallelIterator; + +/// Parallel iterator wrapper. +#[derive(Copy, Clone, Debug)] +pub struct Parallel { + iter: I, +} + +/// Parallel producer wrapper. +#[derive(Copy, Clone, Debug)] +struct ParallelProducer(I); + +macro_rules! par_iter_wrapper { + // thread_bounds are either Sync or Send + Sync + ($iter_name:ident, [$($thread_bounds:tt)*]) => { + impl<'a, A, D> NdarrayIntoParallelIterator for $iter_name<'a, A, D> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = ::Item; + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + impl<'a, A, D> ParallelIterator for Parallel<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$iter_name<'a, A, D> as Iterator>::Item; + fn drive_unindexed(self, consumer: C) -> C::Result + where C: UnindexedConsumer + { + bridge(self, consumer) + } + + fn opt_len(&self) -> Option { + Some(self.iter.len()) + } + } + + impl<'a, A, D> IndexedParallelIterator for Parallel<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + fn with_producer(self, callback: Cb) -> Cb::Output + where Cb: ProducerCallback + { + callback.callback(ParallelProducer(self.iter)) + } + + fn len(&self) -> usize { + ExactSizeIterator::len(&self.iter) + } + + fn drive(self, consumer: C) -> C::Result + where C: Consumer + { + bridge(self, consumer) + } + } + + impl<'a, A, D> IntoIterator for ParallelProducer<$iter_name<'a, A, D>> + where D: Dimension, + { + type IntoIter = $iter_name<'a, A, D>; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0 + } + } + + // This is the real magic, I guess + impl<'a, A, D> Producer for ParallelProducer<$iter_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type IntoIter = $iter_name<'a, A, D>; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + self.0 + } + + fn split_at(self, i: usize) -> (Self, Self) { + let (a, b) = self.0.split_at(i); + (ParallelProducer(a), ParallelProducer(b)) + } + } + + } +} + + +par_iter_wrapper!(AxisIter, [Sync]); +par_iter_wrapper!(AxisIterMut, [Send + Sync]); + + + +macro_rules! par_iter_view_wrapper { + // thread_bounds are either Sync or Send + Sync + ($view_name:ident, [$($thread_bounds:tt)*]) => { + impl<'a, A, D> NdarrayIntoParallelIterator for $view_name<'a, A, D> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = ::Item; + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + + impl<'a, A, D> ParallelIterator for Parallel<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + fn drive_unindexed(self, consumer: C) -> C::Result + where C: UnindexedConsumer + { + bridge_unindexed(ParallelProducer(self.iter), consumer) + } + + fn opt_len(&self) -> Option { + None + } + } + + impl<'a, A, D> UnindexedProducer for ParallelProducer<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + fn split(self) -> (Self, Option) { + if self.0.len() <= 1 { + return (self, None) + } + let array = self.0; + let max_axis = array.max_stride_axis(); + let mid = array.len_of(max_axis) / 2; + let (a, b) = array.split_at(max_axis, mid); + (ParallelProducer(a), Some(ParallelProducer(b))) + } + + fn fold_with(self, folder: F) -> F + where F: Folder, + { + self.into_iter().fold(folder, move |f, elt| f.consume(elt)) + } + } + + impl<'a, A, D> IntoIterator for ParallelProducer<$view_name<'a, A, D>> + where D: Dimension, + A: $($thread_bounds)*, + { + type Item = <$view_name<'a, A, D> as IntoIterator>::Item; + type IntoIter = <$view_name<'a, A, D> as IntoIterator>::IntoIter; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } + } + + } +} + +par_iter_view_wrapper!(ArrayView, [Sync]); +par_iter_view_wrapper!(ArrayViewMut, [Sync + Send]); + + +use {Zip, NdProducer, FoldWhile}; + +macro_rules! zip_impl { + ($([$($p:ident)*],)+) => { + $( + #[allow(non_snake_case)] + impl),*> NdarrayIntoParallelIterator for Zip<($($p,)*), Dim> + where $($p::Item : Send , )* + $($p : Send , )* + { + type Item = ($($p::Item ,)*); + type Iter = Parallel; + fn into_par_iter(self) -> Self::Iter { + Parallel { + iter: self, + } + } + } + + #[allow(non_snake_case)] + impl),*> ParallelIterator for Parallel> + where $($p::Item : Send , )* + $($p : Send , )* + { + type Item = ($($p::Item ,)*); + + fn drive_unindexed(self, consumer: Cons) -> Cons::Result + where Cons: UnindexedConsumer + { + bridge_unindexed(ParallelProducer(self.iter), consumer) + } + + fn opt_len(&self) -> Option { + None + } + } + + #[allow(non_snake_case)] + impl),*> UnindexedProducer for ParallelProducer> + where $($p : Send , )* + $($p::Item : Send , )* + { + type Item = ($($p::Item ,)*); + + fn split(self) -> (Self, Option) { + if self.0.size() <= 1 { + return (self, None) + } + let (a, b) = self.0.split(); + (ParallelProducer(a), Some(ParallelProducer(b))) + } + + fn fold_with(self, folder: Fold) -> Fold + where Fold: Folder, + { + self.0.fold_while(folder, |mut folder, $($p),*| { + folder = folder.consume(($($p ,)*)); + if folder.full() { + FoldWhile::Done(folder) + } else { + FoldWhile::Continue(folder) + } + }).into_inner() + } + } + )+ + } +} + +zip_impl!{ + [P1], + [P1 P2], + [P1 P2 P3], + [P1 P2 P3 P4], + [P1 P2 P3 P4 P5], + [P1 P2 P3 P4 P5 P6], +} diff --git a/src/parallel/zipmacro.rs b/src/parallel/zipmacro.rs new file mode 100644 index 000000000..1c69b1d9d --- /dev/null +++ b/src/parallel/zipmacro.rs @@ -0,0 +1,104 @@ +// Copyright 2017 bluss and ndarray developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#[macro_export] +/// Parallel version of the `azip!` macro. +/// +/// See the `azip!` documentation for more details. +/// +/// This example: +/// +/// ```rust,ignore +/// par_azip!(mut a, b, c in { *a = b + c }) +/// ``` +/// +/// Is equivalent to: +/// +/// ```rust,ignore +/// Zip::from(&mut a).and(&b).and(&c).par_apply(|a, &b, &c| { +/// *a = b + c; +/// }); +/// ``` +/// +/// **Panics** if any of the arrays are not of the same shape. +/// +/// ## Examples +/// +/// ```rust +/// #[macro_use(par_azip)] +/// extern crate ndarray; +/// +/// use ndarray::Array2; +/// +/// type M = Array2; +/// +/// fn main() { +/// let mut a = M::zeros((16, 16)); +/// let b = M::from_elem(a.dim(), 1.); +/// let c = M::from_elem(a.dim(), 2.); +/// +/// // Compute a simple ternary operation: +/// // elementwise addition of b and c, stored in a +/// +/// par_azip!(mut a, b, c in { *a = b + c }); +/// +/// assert_eq!(a, &b + &c); +/// } +/// ``` +macro_rules! par_azip { + // Build Zip Rule (index) + (@parse [index => $a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { + par_azip!(@finish ($crate::Zip::indexed($a)) [$($aa,)*] $t1 in $t2) + }; + // Build Zip Rule (no index) + (@parse [$a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { + par_azip!(@finish ($crate::Zip::from($a)) [$($aa,)*] $t1 in $t2) + }; + // Build Finish Rule (both) + (@finish ($z:expr) [$($aa:expr,)*] [$($p:pat,)+] in { $($t:tt)*}) => { + use $crate::parallel::prelude::*; + #[allow(unused_mut)] + ($z) + $( + .and($aa) + )* + .par_apply(|$($p),+| { + $($t)* + }) + }; + // parsing stack: [expressions] [patterns] (one per operand) + // index uses empty [] -- must be first + (@parse [] [] index $i:pat, $($t:tt)*) => { + par_azip!(@parse [index =>] [$i,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident ($e:expr) $($t:tt)*) => { + par_azip!(@parse [$($exprs)* $e,] [$($pats)* mut $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident $($t:tt)*) => { + par_azip!(@parse [$($exprs)* &mut $x,] [$($pats)* mut $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] , $($t:tt)*) => { + par_azip!(@parse [$($exprs)*] [$($pats)*] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident ($e:expr) $($t:tt)*) => { + par_azip!(@parse [$($exprs)* $e,] [$($pats)* $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident $($t:tt)*) => { + par_azip!(@parse [$($exprs)* &$x,] [$($pats)* $x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident ($e:expr) $($t:tt)*) => { + par_azip!(@parse [$($exprs)* $e,] [$($pats)* &$x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident $($t:tt)*) => { + par_azip!(@parse [$($exprs)* &$x,] [$($pats)* &$x,] $($t)*); + }; + (@parse [$($exprs:tt)*] [$($pats:tt)*] $($t:tt)*) => { }; + ($($t:tt)*) => { + par_azip!(@parse [] [] $($t)*); + } +} From f59969a460e2c2e2caaf1d5dc3037e7469b23623 Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:01:21 +0100 Subject: [PATCH 02/24] FEAT: Parallel, make the extension array and zip methods inherent --- src/parallel/ext_traits.rs | 44 +++++++++++++++----------------------- src/parallel/mod.rs | 10 --------- 2 files changed, 17 insertions(+), 37 deletions(-) diff --git a/src/parallel/ext_traits.rs b/src/parallel/ext_traits.rs index b649cf82c..d10b560ce 100644 --- a/src/parallel/ext_traits.rs +++ b/src/parallel/ext_traits.rs @@ -11,30 +11,23 @@ use parallel::prelude::*; // Arrays -/// Parallel versions of `map_inplace` and `mapv_inplace`. -pub trait ParMap { - type Item; - fn par_map_inplace(&mut self, f: F) - where F: Fn(&mut Self::Item) + Sync + Send; - fn par_mapv_inplace(&mut self, f: F) - where F: Fn(Self::Item) -> Self::Item + Sync + Send, - Self::Item: Clone; -} -impl ParMap for ArrayBase +impl ArrayBase where S: DataMut, D: Dimension, A: Send + Sync, { - type Item = A; - fn par_map_inplace(&mut self, f: F) - where F: Fn(&mut Self::Item) + Sync + Send + /// Parallel version of `map_inplace` + pub fn par_map_inplace(&mut self, f: F) + where F: Fn(&mut A) + Sync + Send { self.view_mut().into_par_iter().for_each(f) } - fn par_mapv_inplace(&mut self, f: F) - where F: Fn(Self::Item) -> Self::Item + Sync + Send, - Self::Item: Clone + + /// Parallel version of `mapv_inplace`. + pub fn par_mapv_inplace(&mut self, f: F) + where F: Fn(A) -> A + Sync + Send, + A: Clone, { self.view_mut().into_par_iter() .for_each(move |x| *x = f(x.clone())) @@ -49,21 +42,18 @@ impl ParMap for ArrayBase macro_rules! zip_impl { ($([$name:ident $($p:ident)*],)+) => { $( - /// The `par_apply` method for `Zip`. - /// - /// This is a shorthand for using `.into_par_iter().for_each()` on - /// `Zip`. - pub trait $name<$($p),*> { - fn par_apply(self, function: F) - where F: Fn($($p),*) + Sync + Send; - } - #[allow(non_snake_case)] - impl),*> $name<$($p::Item),*> for Zip<($($p,)*), Dim> + impl),*> Zip<($($p,)*), Dim> where $($p::Item : Send , )* $($p : Send , )* { - fn par_apply(self, function: F) + /// The `par_apply` method for `Zip`. + /// + /// This is a shorthand for using `.into_par_iter().for_each()` on + /// `Zip`. + /// + /// Requires crate feature `rayon`. + pub fn par_apply(self, function: F) where F: Fn($($p::Item),*) + Sync + Send { self.into_par_iter().for_each(move |($($p,)*)| function($($p),*)) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index f9e95e1ee..def0da6e3 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -100,16 +100,6 @@ pub mod prelude { #[doc(no_inline)] pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator}; - - pub use super::ext_traits::{ - ParApply1, - ParApply2, - ParApply3, - ParApply4, - ParApply5, - ParApply6, - }; - pub use super::ext_traits::ParMap; } pub use self::par::Parallel; From a4d60e78121423d17f4a798d677445dfdebc4994 Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:08:56 +0100 Subject: [PATCH 03/24] FEAT: Switch parallel to rayon's IntoParallelIterator traits --- src/parallel/into_impls.rs | 10 ++++----- src/parallel/into_traits.rs | 42 ------------------------------------- src/parallel/mod.rs | 13 ++---------- src/parallel/par.rs | 9 ++++---- 4 files changed, 11 insertions(+), 63 deletions(-) delete mode 100644 src/parallel/into_traits.rs diff --git a/src/parallel/into_impls.rs b/src/parallel/into_impls.rs index 02e3f5f8c..72f8d5b0b 100644 --- a/src/parallel/into_impls.rs +++ b/src/parallel/into_impls.rs @@ -1,9 +1,9 @@ use {Array, RcArray, Dimension, ArrayView, ArrayViewMut}; -use super::NdarrayIntoParallelIterator; +use super::prelude::IntoParallelIterator; use super::Parallel; -impl<'a, A, D> NdarrayIntoParallelIterator for &'a Array +impl<'a, A, D> IntoParallelIterator for &'a Array where D: Dimension, A: Sync { @@ -15,7 +15,7 @@ impl<'a, A, D> NdarrayIntoParallelIterator for &'a Array } // This is allowed: goes through `.view()` -impl<'a, A, D> NdarrayIntoParallelIterator for &'a RcArray +impl<'a, A, D> IntoParallelIterator for &'a RcArray where D: Dimension, A: Sync { @@ -26,7 +26,7 @@ impl<'a, A, D> NdarrayIntoParallelIterator for &'a RcArray } } -impl<'a, A, D> NdarrayIntoParallelIterator for &'a mut Array +impl<'a, A, D> IntoParallelIterator for &'a mut Array where D: Dimension, A: Sync + Send { @@ -38,7 +38,7 @@ impl<'a, A, D> NdarrayIntoParallelIterator for &'a mut Array } // This is allowed: goes through `.view_mut()`, which is unique access -impl<'a, A, D> NdarrayIntoParallelIterator for &'a mut RcArray +impl<'a, A, D> IntoParallelIterator for &'a mut RcArray where D: Dimension, A: Sync + Send + Clone, { diff --git a/src/parallel/into_traits.rs b/src/parallel/into_traits.rs deleted file mode 100644 index 9abc630ba..000000000 --- a/src/parallel/into_traits.rs +++ /dev/null @@ -1,42 +0,0 @@ - -use rayon::iter::ParallelIterator; - -pub trait NdarrayIntoParallelIterator { - type Iter: ParallelIterator; - type Item: Send; - fn into_par_iter(self) -> Self::Iter; -} - -pub trait NdarrayIntoParallelRefIterator<'x> { - type Iter: ParallelIterator; - type Item: Send + 'x; - fn par_iter(&'x self) -> Self::Iter; -} - -pub trait NdarrayIntoParallelRefMutIterator<'x> { - type Iter: ParallelIterator; - type Item: Send + 'x; - fn par_iter_mut(&'x mut self) -> Self::Iter; -} - -impl<'data, I: 'data + ?Sized> NdarrayIntoParallelRefIterator<'data> for I - where &'data I: NdarrayIntoParallelIterator -{ - type Iter = <&'data I as NdarrayIntoParallelIterator>::Iter; - type Item = <&'data I as NdarrayIntoParallelIterator>::Item; - - fn par_iter(&'data self) -> Self::Iter { - self.into_par_iter() - } -} - -impl<'data, I: 'data + ?Sized> NdarrayIntoParallelRefMutIterator<'data> for I - where &'data mut I: NdarrayIntoParallelIterator -{ - type Iter = <&'data mut I as NdarrayIntoParallelIterator>::Iter; - type Item = <&'data mut I as NdarrayIntoParallelIterator>::Item; - - fn par_iter_mut(&'data mut self) -> Self::Iter { - self.into_par_iter() - } -} diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index def0da6e3..64bf2756b 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -94,23 +94,14 @@ /// Into- traits for creating parallelized iterators. pub mod prelude { // happy and insane; ignorance is bluss - pub use super::NdarrayIntoParallelIterator; - pub use super::NdarrayIntoParallelRefIterator; - pub use super::NdarrayIntoParallelRefMutIterator; - #[doc(no_inline)] - pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator}; + pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator, + IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator}; } pub use self::par::Parallel; -pub use self::into_traits::{ - NdarrayIntoParallelIterator, - NdarrayIntoParallelRefIterator, - NdarrayIntoParallelRefMutIterator, -}; mod par; mod ext_traits; -mod into_traits; mod into_impls; mod zipmacro; diff --git a/src/parallel/par.rs b/src/parallel/par.rs index b7bcf4d5e..13cf10ec8 100644 --- a/src/parallel/par.rs +++ b/src/parallel/par.rs @@ -1,5 +1,6 @@ use rayon::iter::ParallelIterator; +use rayon::prelude::IntoParallelIterator; use rayon::iter::IndexedParallelIterator; use rayon::iter::plumbing::{Consumer, UnindexedConsumer}; use rayon::iter::plumbing::bridge; @@ -14,8 +15,6 @@ use iter::AxisIterMut; use {Dimension}; use {ArrayView, ArrayViewMut}; -use super::NdarrayIntoParallelIterator; - /// Parallel iterator wrapper. #[derive(Copy, Clone, Debug)] pub struct Parallel { @@ -29,7 +28,7 @@ struct ParallelProducer(I); macro_rules! par_iter_wrapper { // thread_bounds are either Sync or Send + Sync ($iter_name:ident, [$($thread_bounds:tt)*]) => { - impl<'a, A, D> NdarrayIntoParallelIterator for $iter_name<'a, A, D> + impl<'a, A, D> IntoParallelIterator for $iter_name<'a, A, D> where D: Dimension, A: $($thread_bounds)*, { @@ -120,7 +119,7 @@ par_iter_wrapper!(AxisIterMut, [Send + Sync]); macro_rules! par_iter_view_wrapper { // thread_bounds are either Sync or Send + Sync ($view_name:ident, [$($thread_bounds:tt)*]) => { - impl<'a, A, D> NdarrayIntoParallelIterator for $view_name<'a, A, D> + impl<'a, A, D> IntoParallelIterator for $view_name<'a, A, D> where D: Dimension, A: $($thread_bounds)*, { @@ -197,7 +196,7 @@ macro_rules! zip_impl { ($([$($p:ident)*],)+) => { $( #[allow(non_snake_case)] - impl),*> NdarrayIntoParallelIterator for Zip<($($p,)*), Dim> + impl),*> IntoParallelIterator for Zip<($($p,)*), Dim> where $($p::Item : Send , )* $($p : Send , )* { From 9fbb90f9e0769cfbb799cc9f5b3877df11ce5b2b Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:12:00 +0100 Subject: [PATCH 04/24] DOC: Update docs for integrated ndarray::parallel --- src/parallel/mod.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 64bf2756b..191cb513e 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -1,18 +1,16 @@ //! Parallelization features for ndarray. //! //! The array views and references to owned arrays all implement -//! `NdarrayIntoParallelIterator` (*); the default parallel iterators (each element +//! `IntoParallelIterator`; the default parallel iterators (each element //! by reference or mutable reference) have no ordering guarantee in their //! parallel implementations. //! //! `.axis_iter()` and `.axis_iter_mut()` also have parallel counterparts, //! and their parallel iterators are indexed (and thus ordered) and exact length. //! -//! `Zip` also implements `NdarrayIntoParallelIterator`, and there is an -//! extension trait so that it can use a method `.par_apply` directly. -//! -//! (*) This regime of a custom trait instead of rayon’s own is since we -//! use this intermediate ndarray-parallel crate. +//! `Zip` also implements `IntoParallelIterator`, and it has direct methods +//! called `.par_apply()` that one can use as direct parallelized replacements +//! for `.apply()`. //! //! # Examples //! @@ -30,7 +28,7 @@ //! fn main() { //! let mut a = Array2::::zeros((128, 128)); //! -//! // Parallel versions of regular array methods (ParMap trait) +//! // Parallel versions of regular array methods //! a.par_map_inplace(|x| *x = x.exp()); //! a.par_mapv_inplace(f64::exp); //! @@ -71,7 +69,6 @@ //! //! use ndarray::Array3; //! use ndarray::Zip; -//! use ndarray::parallel::prelude::*; //! //! type Array3f64 = Array3; //! @@ -93,7 +90,6 @@ /// Into- traits for creating parallelized iterators. pub mod prelude { - // happy and insane; ignorance is bluss #[doc(no_inline)] pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator}; From 4317698a8e300f69c6448b4e74a1ec62acd12bac Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:37:52 +0100 Subject: [PATCH 05/24] DOC: Update parallel doc Here we try to use the new rustdoc links. They should render fine on nightly and thus on docs.rs, and on stable the '[' brackets are visible verbatim, but we hope most are using docs.rs --- src/parallel/mod.rs | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 191cb513e..9d2f78aba 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -1,19 +1,33 @@ //! Parallelization features for ndarray. //! -//! The array views and references to owned arrays all implement -//! `IntoParallelIterator`; the default parallel iterators (each element -//! by reference or mutable reference) have no ordering guarantee in their -//! parallel implementations. +//! Parallelization features are based on the crate [rayon] and its parallel +//! iterators. Ndarray implements conversions from its iterators to parallel +//! iterators, and there are parallelized methods on arrays and on [Zip]. //! -//! `.axis_iter()` and `.axis_iter_mut()` also have parallel counterparts, -//! and their parallel iterators are indexed (and thus ordered) and exact length. +//! This requires the crate feature `rayon` to be enabled. //! -//! `Zip` also implements `IntoParallelIterator`, and it has direct methods -//! called `.par_apply()` that one can use as direct parallelized replacements -//! for `.apply()`. +//! The following implement [rayon::iter::IntoParallelIterator]: //! -//! # Examples +//! - [Array], [ArcArray]: `.par_iter()` and `.par_iter_mut()` +//! - [ArrayView](ArrayView): `.into_par_iter()` +//! - [ArrayViewMut](ArrayViewMut): `.into_par_iter()` +//! - [AxisIter](iter::AxisIter), [AxisIterMut](iter::AxisIterMut): `.into_par_iter()` +//! - [Zip] `.into_par_iter()` +//! +//! The following parallelized methods exist: +//! +//! - [ArrayBase::par_map_inplace()] +//! - [ArrayBase::par_mapv_inplace()] +//! - [Zip::par_apply()] (all arities) //! +//! Note that you can use the parallel iterator for [Zip] to access all other +//! rayon parallel iterator methods. +//! +//! Only the axis iterators are indexed parallel iterators, the rest are all +//! “unindexed”. Use ndarray’s [Zip] for lock step parallel iteration of +//! multiple arrays or producers at a time. +//! +//! # Examples //! //! ## Arrays and array views //! From 1da56d8995114ed9d31ee2b8c97ca7d9991931a1 Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:45:42 +0100 Subject: [PATCH 06/24] FIX: Use where clause in Zip's parallel methods This way they are somewhat easier to decipher in rustdoc. --- src/parallel/ext_traits.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parallel/ext_traits.rs b/src/parallel/ext_traits.rs index d10b560ce..be4bb2ab8 100644 --- a/src/parallel/ext_traits.rs +++ b/src/parallel/ext_traits.rs @@ -43,9 +43,11 @@ macro_rules! zip_impl { ($([$name:ident $($p:ident)*],)+) => { $( #[allow(non_snake_case)] - impl),*> Zip<($($p,)*), Dim> + impl Zip<($($p,)*), D> where $($p::Item : Send , )* $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* { /// The `par_apply` method for `Zip`. /// From 49f79f49c63f03e40fe9880a5cd890966ed1e96d Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 19:47:36 +0100 Subject: [PATCH 07/24] FIX: Use where clauses in Zip's regular methods Easier to read in rustdoc. --- src/zip/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/zip/mod.rs b/src/zip/mod.rs index 366a85c0c..877c97a20 100644 --- a/src/zip/mod.rs +++ b/src/zip/mod.rs @@ -708,7 +708,10 @@ macro_rules! map_impl { ($([$notlast:ident $($p:ident)*],)+) => { $( #[allow(non_snake_case)] - impl),*> Zip<($($p,)*), D> { + impl Zip<($($p,)*), D> + where D: Dimension, + $($p: NdProducer ,)* + { /// Apply a function to all elements of the input arrays, /// visiting elements in lock step. pub fn apply(mut self, mut function: F) From cd7b6c8a05e1b09c580ab23ba3519306662efaad Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 20:32:00 +0100 Subject: [PATCH 08/24] FIX: Use where clauses in Zip's parallel trait impls --- src/parallel/par.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/parallel/par.rs b/src/parallel/par.rs index 13cf10ec8..d98587e86 100644 --- a/src/parallel/par.rs +++ b/src/parallel/par.rs @@ -196,9 +196,11 @@ macro_rules! zip_impl { ($([$($p:ident)*],)+) => { $( #[allow(non_snake_case)] - impl),*> IntoParallelIterator for Zip<($($p,)*), Dim> + impl IntoParallelIterator for Zip<($($p,)*), D> where $($p::Item : Send , )* $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* { type Item = ($($p::Item ,)*); type Iter = Parallel; @@ -210,9 +212,11 @@ macro_rules! zip_impl { } #[allow(non_snake_case)] - impl),*> ParallelIterator for Parallel> + impl ParallelIterator for Parallel> where $($p::Item : Send , )* $($p : Send , )* + D: Dimension, + $($p: NdProducer ,)* { type Item = ($($p::Item ,)*); @@ -228,9 +232,11 @@ macro_rules! zip_impl { } #[allow(non_snake_case)] - impl),*> UnindexedProducer for ParallelProducer> + impl UnindexedProducer for ParallelProducer> where $($p : Send , )* $($p::Item : Send , )* + D: Dimension, + $($p: NdProducer ,)* { type Item = ($($p::Item ,)*); From 3c6df1253a7622e1063013da4bf54b920061681a Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 20:34:35 +0100 Subject: [PATCH 09/24] DOC: Clarify parallel doc --- src/parallel/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 9d2f78aba..64bc6527d 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -1,8 +1,9 @@ //! Parallelization features for ndarray. //! //! Parallelization features are based on the crate [rayon] and its parallel -//! iterators. Ndarray implements conversions from its iterators to parallel -//! iterators, and there are parallelized methods on arrays and on [Zip]. +//! iterators. Ndarray implements the parallel iterable traits for arrays +//! and array views, for some of its iterators and for [Zip]. +//! There are also directly parallelized methods on arrays and on [Zip]. //! //! This requires the crate feature `rayon` to be enabled. //! From e8bd68bc562b1ee76cb60135fe15cb7c7403a05f Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 20:49:57 +0100 Subject: [PATCH 10/24] DOC: Add more notices about crate feature rayon --- src/parallel/into_impls.rs | 10 +++++++--- src/parallel/par.rs | 3 +++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/parallel/into_impls.rs b/src/parallel/into_impls.rs index 72f8d5b0b..4d38939fd 100644 --- a/src/parallel/into_impls.rs +++ b/src/parallel/into_impls.rs @@ -1,8 +1,9 @@ -use {Array, RcArray, Dimension, ArrayView, ArrayViewMut}; +use {Array, ArcArray, Dimension, ArrayView, ArrayViewMut}; use super::prelude::IntoParallelIterator; use super::Parallel; +/// Requires crate feature `rayon`. impl<'a, A, D> IntoParallelIterator for &'a Array where D: Dimension, A: Sync @@ -15,7 +16,8 @@ impl<'a, A, D> IntoParallelIterator for &'a Array } // This is allowed: goes through `.view()` -impl<'a, A, D> IntoParallelIterator for &'a RcArray +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a ArcArray where D: Dimension, A: Sync { @@ -26,6 +28,7 @@ impl<'a, A, D> IntoParallelIterator for &'a RcArray } } +/// Requires crate feature `rayon`. impl<'a, A, D> IntoParallelIterator for &'a mut Array where D: Dimension, A: Sync + Send @@ -38,7 +41,8 @@ impl<'a, A, D> IntoParallelIterator for &'a mut Array } // This is allowed: goes through `.view_mut()`, which is unique access -impl<'a, A, D> IntoParallelIterator for &'a mut RcArray +/// Requires crate feature `rayon`. +impl<'a, A, D> IntoParallelIterator for &'a mut ArcArray where D: Dimension, A: Sync + Send + Clone, { diff --git a/src/parallel/par.rs b/src/parallel/par.rs index d98587e86..c5d2da0bf 100644 --- a/src/parallel/par.rs +++ b/src/parallel/par.rs @@ -28,6 +28,7 @@ struct ParallelProducer(I); macro_rules! par_iter_wrapper { // thread_bounds are either Sync or Send + Sync ($iter_name:ident, [$($thread_bounds:tt)*]) => { + /// Requires crate feature `rayon`. impl<'a, A, D> IntoParallelIterator for $iter_name<'a, A, D> where D: Dimension, A: $($thread_bounds)*, @@ -119,6 +120,7 @@ par_iter_wrapper!(AxisIterMut, [Send + Sync]); macro_rules! par_iter_view_wrapper { // thread_bounds are either Sync or Send + Sync ($view_name:ident, [$($thread_bounds:tt)*]) => { + /// Requires crate feature `rayon`. impl<'a, A, D> IntoParallelIterator for $view_name<'a, A, D> where D: Dimension, A: $($thread_bounds)*, @@ -195,6 +197,7 @@ use {Zip, NdProducer, FoldWhile}; macro_rules! zip_impl { ($([$($p:ident)*],)+) => { $( + /// Requires crate feature `rayon`. #[allow(non_snake_case)] impl IntoParallelIterator for Zip<($($p,)*), D> where $($p::Item : Send , )* From 8a868ebc756992d9da3f1f3c76f8007615e8e94a Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 21:53:13 +0100 Subject: [PATCH 11/24] TEST: Move in all the tests from parallel --- tests/par_azip.rs | 68 +++++++++++++++++++++++++++++++++++++ tests/par_rayon.rs | 55 ++++++++++++++++++++++++++++++ tests/par_zip.rs | 83 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+) create mode 100644 tests/par_azip.rs create mode 100644 tests/par_rayon.rs create mode 100644 tests/par_zip.rs diff --git a/tests/par_azip.rs b/tests/par_azip.rs new file mode 100644 index 000000000..93caee62e --- /dev/null +++ b/tests/par_azip.rs @@ -0,0 +1,68 @@ +#![cfg(feature="rayon")] + +#[macro_use] +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; +use itertools::{enumerate}; +use std::sync::atomic::{AtomicUsize, Ordering}; + +#[test] +fn test_par_azip1() { + let mut a = Array::zeros(62); + let b = Array::from_elem(62, 42); + par_azip!(mut a in { *a = 42 }); + assert_eq!(a, b); +} + +#[test] +fn test_par_azip2() { + let mut a = Array::zeros((5, 7)); + let b = Array::from_shape_fn(a.dim(), |(i, j)| 1. / (i + 2*j) as f32); + par_azip!(mut a, b in { *a = b; }); + assert_eq!(a, b); +} + +#[test] +fn test_par_azip3() { + let mut a = [0.; 32]; + let mut b = [0.; 32]; + let mut c = [0.; 32]; + for (i, elt) in enumerate(&mut b) { + *elt = i as f32; + } + + par_azip!(mut a (&mut a[..]), b (&b[..]), mut c (&mut c[..]) in { + *a += b / 10.; + *c = a.sin(); + }); + let res = Array::linspace(0., 3.1, 32).mapv_into(f32::sin); + assert!(res.all_close(&ArrayView::from(&c), 1e-4)); +} + +#[should_panic] +#[test] +fn test_zip_dim_mismatch_1() { + let mut a = Array::zeros((5, 7)); + let mut d = a.raw_dim(); + d[0] += 1; + let b = Array::from_shape_fn(d, |(i, j)| 1. / (i + 2*j) as f32); + par_azip!(mut a, b in { *a = b; }); +} + +#[test] +fn test_indices_1() { + let mut a1 = Array::default(12); + for (i, elt) in a1.indexed_iter_mut() { + *elt = i; + } + + let count = AtomicUsize::new(0); + par_azip!(index i, elt (&a1) in { + count.fetch_add(1, Ordering::SeqCst); + assert_eq!(elt, i); + }); + assert_eq!(count.load(Ordering::SeqCst), a1.len()); +} diff --git a/tests/par_rayon.rs b/tests/par_rayon.rs new file mode 100644 index 000000000..3f457c46f --- /dev/null +++ b/tests/par_rayon.rs @@ -0,0 +1,55 @@ +#![cfg(feature="rayon")] + +extern crate rayon; + +#[macro_use] +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; + +const M: usize = 1024 * 10; +const N: usize = 100; + +#[test] +fn test_axis_iter() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + assert_eq!(a.axis_iter(Axis(0)).len(), M); + let s: f64 = a.axis_iter(Axis(0)).into_par_iter().map(|x| x.sum()).sum(); + println!("{:?}", a.slice(s![..10, ..5])); + assert_eq!(s, a.sum()); +} + +#[test] +fn test_axis_iter_mut() { + let mut a = Array::linspace(0., 1.0f64, M * N).into_shape((M, N)).unwrap(); + let b = a.mapv(|x| x.exp()); + a.axis_iter_mut(Axis(0)).into_par_iter().for_each(|mut v| v.mapv_inplace(|x| x.exp())); + println!("{:?}", a.slice(s![..10, ..5])); + assert!(a.all_close(&b, 0.001)); +} + +#[test] +fn test_regular_iter() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + let s: f64 = a.view().into_par_iter().map(|&x| x).sum(); + println!("{:?}", a.slice(s![..10, ..5])); + assert_eq!(s, a.sum()); +} + +#[test] +fn test_regular_iter_collect() { + let mut a = Array2::::zeros((M, N)); + for (i, mut v) in a.axis_iter_mut(Axis(0)).enumerate() { + v.fill(i as _); + } + let v = a.view().into_par_iter().map(|&x| x).collect::>(); + assert_eq!(v.len(), a.len()); +} diff --git a/tests/par_zip.rs b/tests/par_zip.rs new file mode 100644 index 000000000..9cba9888c --- /dev/null +++ b/tests/par_zip.rs @@ -0,0 +1,83 @@ +#![cfg(feature="rayon")] + +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; + +use ndarray::Zip; + +const M: usize = 1024 * 10; +const N: usize = 100; + +#[test] +fn test_zip_1() { + let mut a = Array2::::zeros((M, N)); + + Zip::from(&mut a) + .par_apply(|x| { + *x = x.exp() + }); +} + +#[test] +fn test_zip_index_1() { + let mut a = Array2::default((10, 10)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_2() { + let mut a = Array2::default((M, N)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_3() { + let mut a = Array::default((1, 2, 1, 2, 3)); + + Zip::indexed(&mut a) + .par_apply(|i, x| { + *x = i; + }); + + for (i, elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } +} + +#[test] +fn test_zip_index_4() { + let mut a = Array2::zeros((M, N)); + let mut b = Array2::zeros((M, N)); + + Zip::indexed(&mut a) + .and(&mut b) + .par_apply(|(i, j), x, y| { + *x = i; + *y = j; + }); + + for ((i, _), elt) in a.indexed_iter() { + assert_eq!(*elt, i); + } + for ((_, j), elt) in b.indexed_iter() { + assert_eq!(*elt, j); + } +} From c8febb0f7eba557145c7cb2eef672596f884b4cc Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 21:53:41 +0100 Subject: [PATCH 12/24] MAINT: Add rayon to docs features: show in docs.rs and run on travis --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6246b78df..81da3bd0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,7 +59,7 @@ test-blas-openblas-sys = ["blas"] test = ["test-blas-openblas-sys"] # This feature is used for docs -docs = ["serde-1"] +docs = ["serde-1", "rayon"] [profile.release] [profile.bench] From 81b0906fe7b9d2a50d8659ea066b7b8b8ec36827 Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 21:54:57 +0100 Subject: [PATCH 13/24] MAINT: Drop old parallel crate from travis tests --- scripts/all-tests.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/all-tests.sh b/scripts/all-tests.sh index e2e00b9d6..5ad7306ab 100755 --- a/scripts/all-tests.sh +++ b/scripts/all-tests.sh @@ -11,7 +11,6 @@ cargo test --verbose --no-default-features cargo test --release --verbose --no-default-features cargo build --verbose --features "$FEATURES" cargo test --verbose --features "$FEATURES" -cargo test --manifest-path=parallel/Cargo.toml --verbose cargo test --manifest-path=serialization-tests/Cargo.toml --verbose cargo test --manifest-path=blas-tests/Cargo.toml --verbose CARGO_TARGET_DIR=target/ cargo test --manifest-path=numeric-tests/Cargo.toml --verbose From 68b751e7cc37491facd27a7753faaf1ff491822c Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 24 Nov 2018 22:18:13 +0100 Subject: [PATCH 14/24] TEST: Move benchmarks from parallel to main crate --- Cargo.toml | 1 + benches/par_rayon.rs | 158 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 benches/par_rayon.rs diff --git a/Cargo.toml b/Cargo.toml index 81da3bd0d..1c6c81323 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ serde = { version = "1.0", optional = true } defmac = "0.2" quickcheck = { version = "0.7.2", default-features = false } rawpointer = "0.1" +num_cpus = "1.2" [features] # Enable blas usage diff --git a/benches/par_rayon.rs b/benches/par_rayon.rs new file mode 100644 index 000000000..a52e627d0 --- /dev/null +++ b/benches/par_rayon.rs @@ -0,0 +1,158 @@ +#![cfg(feature="rayon")] +#![feature(test)] + +extern crate rayon; + +#[macro_use] +extern crate ndarray; +extern crate itertools; + +use ndarray::prelude::*; +use ndarray::parallel::prelude::*; + +extern crate num_cpus; +extern crate test; +use test::Bencher; + +use ndarray::Zip; + +const EXP_N: usize = 256; +const ADDN: usize = 512; + +use std::cmp::max; + +fn set_threads() { + let n = max(1, num_cpus::get() / 2); + //println!("Using {} threads", n); + let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global(); +} + +#[bench] +fn map_exp_regular(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((EXP_N, EXP_N)); + a.swap_axes(0, 1); + bench.iter(|| { + a.mapv_inplace(|x| x.exp()); + }); +} + +#[bench] +fn rayon_exp_regular(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((EXP_N, EXP_N)); + a.swap_axes(0, 1); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = x.exp()); + }); +} + +const FASTEXP: usize = EXP_N; + +#[inline] +fn fastexp(x: f64) -> f64 { + let x = 1. + x/1024.; + x.powi(1024) +} + +#[bench] +fn map_fastexp_regular(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.mapv_inplace(|x| fastexp(x)) + }); +} + +#[bench] +fn rayon_fastexp_regular(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x)); + }); +} + +#[bench] +fn map_fastexp_cut(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + let mut a = a.slice_mut(s![.., ..-1]); + bench.iter(|| { + a.mapv_inplace(|x| fastexp(x)) + }); +} + +#[bench] +fn rayon_fastexp_cut(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + let mut a = a.slice_mut(s![.., ..-1]); + bench.iter(|| { + a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x)); + }); +} + +#[bench] +fn map_fastexp_by_axis(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + for mut sheet in a.axis_iter_mut(Axis(0)) { + sheet.mapv_inplace(fastexp) + } + }); +} + +#[bench] +fn rayon_fastexp_by_axis(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + a.axis_iter_mut(Axis(0)).into_par_iter() + .for_each(|mut sheet| sheet.mapv_inplace(fastexp)); + }); +} + +#[bench] +fn rayon_fastexp_zip(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((FASTEXP, FASTEXP)); + bench.iter(|| { + Zip::from(&mut a).into_par_iter().for_each(|(elt, )| *elt = fastexp(*elt)); + }); +} + +#[bench] +fn add(bench: &mut Bencher) +{ + let mut a = Array2::::zeros((ADDN, ADDN)); + let b = Array2::::zeros((ADDN, ADDN)); + let c = Array2::::zeros((ADDN, ADDN)); + let d = Array2::::zeros((ADDN, ADDN)); + bench.iter(|| { + azip!(mut a, b, c, d in { + *a += b.exp() + c.exp() + d.exp(); + }); + }); +} + +#[bench] +fn rayon_add(bench: &mut Bencher) +{ + set_threads(); + let mut a = Array2::::zeros((ADDN, ADDN)); + let b = Array2::::zeros((ADDN, ADDN)); + let c = Array2::::zeros((ADDN, ADDN)); + let d = Array2::::zeros((ADDN, ADDN)); + bench.iter(|| { + par_azip!(mut a, b, c, d in { + *a += b.exp() + c.exp() + d.exp(); + }); + }); +} From 82f5cb7e837363da9cbd1612050601f1f70f81f7 Mon Sep 17 00:00:00 2001 From: bluss Date: Fri, 30 Nov 2018 19:19:21 +0100 Subject: [PATCH 15/24] FIX: Use $crate in par_azip and add to the ndarray::parallel prelude --- benches/par_rayon.rs | 1 - src/parallel/mod.rs | 3 +++ src/parallel/zipmacro.rs | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/benches/par_rayon.rs b/benches/par_rayon.rs index a52e627d0..774b7eba9 100644 --- a/benches/par_rayon.rs +++ b/benches/par_rayon.rs @@ -3,7 +3,6 @@ extern crate rayon; -#[macro_use] extern crate ndarray; extern crate itertools; diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 64bc6527d..6462692a8 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -108,9 +108,12 @@ pub mod prelude { #[doc(no_inline)] pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator, IntoParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator}; + + pub use super::par_azip; } pub use self::par::Parallel; +pub use par_azip; mod par; mod ext_traits; diff --git a/src/parallel/zipmacro.rs b/src/parallel/zipmacro.rs index 1c69b1d9d..17a15cc29 100644 --- a/src/parallel/zipmacro.rs +++ b/src/parallel/zipmacro.rs @@ -30,10 +30,10 @@ /// ## Examples /// /// ```rust -/// #[macro_use(par_azip)] /// extern crate ndarray; /// /// use ndarray::Array2; +/// use ndarray::parallel::par_azip; /// /// type M = Array2; /// @@ -53,11 +53,11 @@ macro_rules! par_azip { // Build Zip Rule (index) (@parse [index => $a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { - par_azip!(@finish ($crate::Zip::indexed($a)) [$($aa,)*] $t1 in $t2) + $crate::par_azip!(@finish ($crate::Zip::indexed($a)) [$($aa,)*] $t1 in $t2) }; // Build Zip Rule (no index) (@parse [$a:expr, $($aa:expr,)*] $t1:tt in $t2:tt) => { - par_azip!(@finish ($crate::Zip::from($a)) [$($aa,)*] $t1 in $t2) + $crate::par_azip!(@finish ($crate::Zip::from($a)) [$($aa,)*] $t1 in $t2) }; // Build Finish Rule (both) (@finish ($z:expr) [$($aa:expr,)*] [$($p:pat,)+] in { $($t:tt)*}) => { @@ -74,31 +74,31 @@ macro_rules! par_azip { // parsing stack: [expressions] [patterns] (one per operand) // index uses empty [] -- must be first (@parse [] [] index $i:pat, $($t:tt)*) => { - par_azip!(@parse [index =>] [$i,] $($t)*); + $crate::par_azip!(@parse [index =>] [$i,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident ($e:expr) $($t:tt)*) => { - par_azip!(@parse [$($exprs)* $e,] [$($pats)* mut $x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* mut $x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] mut $x:ident $($t:tt)*) => { - par_azip!(@parse [$($exprs)* &mut $x,] [$($pats)* mut $x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* &mut $x,] [$($pats)* mut $x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] , $($t:tt)*) => { - par_azip!(@parse [$($exprs)*] [$($pats)*] $($t)*); + $crate::par_azip!(@parse [$($exprs)*] [$($pats)*] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident ($e:expr) $($t:tt)*) => { - par_azip!(@parse [$($exprs)* $e,] [$($pats)* $x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* $x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] ref $x:ident $($t:tt)*) => { - par_azip!(@parse [$($exprs)* &$x,] [$($pats)* $x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* &$x,] [$($pats)* $x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident ($e:expr) $($t:tt)*) => { - par_azip!(@parse [$($exprs)* $e,] [$($pats)* &$x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* $e,] [$($pats)* &$x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] $x:ident $($t:tt)*) => { - par_azip!(@parse [$($exprs)* &$x,] [$($pats)* &$x,] $($t)*); + $crate::par_azip!(@parse [$($exprs)* &$x,] [$($pats)* &$x,] $($t)*); }; (@parse [$($exprs:tt)*] [$($pats:tt)*] $($t:tt)*) => { }; ($($t:tt)*) => { - par_azip!(@parse [] [] $($t)*); + $crate::par_azip!(@parse [] [] $($t)*); } } From 566d28b33acb8496beaa81cb17afde70ca720123 Mon Sep 17 00:00:00 2001 From: bluss Date: Sat, 1 Dec 2018 22:19:42 +0100 Subject: [PATCH 16/24] DOC: Minor edits to ndarray::parallel docs --- src/parallel/mod.rs | 5 +++-- src/parallel/zipmacro.rs | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 6462692a8..8b337d6af 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -7,7 +7,8 @@ //! //! This requires the crate feature `rayon` to be enabled. //! -//! The following implement [rayon::iter::IntoParallelIterator]: +//! The following types implement parallel iterators, accessed using these +//! methods: //! //! - [Array], [ArcArray]: `.par_iter()` and `.par_iter_mut()` //! - [ArrayView](ArrayView): `.into_par_iter()` @@ -15,7 +16,7 @@ //! - [AxisIter](iter::AxisIter), [AxisIterMut](iter::AxisIterMut): `.into_par_iter()` //! - [Zip] `.into_par_iter()` //! -//! The following parallelized methods exist: +//! The following other parallelized methods exist: //! //! - [ArrayBase::par_map_inplace()] //! - [ArrayBase::par_mapv_inplace()] diff --git a/src/parallel/zipmacro.rs b/src/parallel/zipmacro.rs index 17a15cc29..a61ae88d4 100644 --- a/src/parallel/zipmacro.rs +++ b/src/parallel/zipmacro.rs @@ -7,9 +7,11 @@ // except according to those terms. #[macro_export] -/// Parallel version of the `azip!` macro. +/// Parallelized array zip macro: lock step function application across several +/// arrays and producers. /// -/// See the `azip!` documentation for more details. +/// This is a version of the [`azip`] macro that requires the crate feature +/// `rayon` to be enabled. /// /// This example: /// From 6f1108cc1abc6feac0b2b65c3811ab50e8b6cb50 Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 20:42:02 +0100 Subject: [PATCH 17/24] FIX: Rename ext_traits to impl_par_methods --- src/parallel/{ext_traits.rs => impl_par_methods.rs} | 0 src/parallel/mod.rs | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/parallel/{ext_traits.rs => impl_par_methods.rs} (100%) diff --git a/src/parallel/ext_traits.rs b/src/parallel/impl_par_methods.rs similarity index 100% rename from src/parallel/ext_traits.rs rename to src/parallel/impl_par_methods.rs diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 8b337d6af..8dc67d118 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -117,6 +117,6 @@ pub use self::par::Parallel; pub use par_azip; mod par; -mod ext_traits; +mod impl_par_methods; mod into_impls; mod zipmacro; From 219dab0b90d86e44466d016c502425c9feb1c602 Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 20:42:21 +0100 Subject: [PATCH 18/24] DOC: Improve docs for the parallel array methods --- src/parallel/impl_par_methods.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/parallel/impl_par_methods.rs b/src/parallel/impl_par_methods.rs index be4bb2ab8..4ff54f749 100644 --- a/src/parallel/impl_par_methods.rs +++ b/src/parallel/impl_par_methods.rs @@ -9,15 +9,20 @@ use { use parallel::prelude::*; -// Arrays - +/// # Parallel methods +/// +/// These methods require crate feature `rayon`. impl ArrayBase where S: DataMut, D: Dimension, A: Send + Sync, { - /// Parallel version of `map_inplace` + /// Parallel version of `map_inplace`. + /// + /// Modify the array in place by calling `f` by mutable reference on each element. + /// + /// Elements are visited in arbitrary order. pub fn par_map_inplace(&mut self, f: F) where F: Fn(&mut A) + Sync + Send { @@ -25,6 +30,11 @@ impl ArrayBase } /// Parallel version of `mapv_inplace`. + /// + /// Modify the array in place by calling `f` by **v**alue on each element. + /// The array is updated with the new values. + /// + /// Elements are visited in arbitrary order. pub fn par_mapv_inplace(&mut self, f: F) where F: Fn(A) -> A + Sync + Send, A: Clone, From 558a2d28d247703af13355730d062f3e0115664d Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 20:43:45 +0100 Subject: [PATCH 19/24] FIX: Move parallel ArrayBase methods up close to the other mapping methods --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 4e872e0ba..50a85550c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1336,6 +1336,10 @@ impl ArrayBase } +// parallel methods +#[cfg(feature="rayon")] +pub mod parallel; + mod impl_1d; mod impl_2d; mod impl_dyn; @@ -1353,10 +1357,6 @@ mod impl_views; // Array raw view methods mod impl_raw_views; -// parallel methods -#[cfg(feature="rayon")] -pub mod parallel; - /// A contiguous array shape of n dimensions. /// /// Either c- or f- memory ordered (*c* a.k.a *row major* is the default). From 74aa19e0b567f2efb824246479ba9862bde518cf Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 21:21:46 +0100 Subject: [PATCH 20/24] FIX: Remove unused $name parameter in parallel macro --- src/parallel/impl_par_methods.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/parallel/impl_par_methods.rs b/src/parallel/impl_par_methods.rs index 4ff54f749..462eb8cc8 100644 --- a/src/parallel/impl_par_methods.rs +++ b/src/parallel/impl_par_methods.rs @@ -50,7 +50,7 @@ impl ArrayBase // Zip macro_rules! zip_impl { - ($([$name:ident $($p:ident)*],)+) => { + ($([$($p:ident)*],)+) => { $( #[allow(non_snake_case)] impl Zip<($($p,)*), D> @@ -76,10 +76,10 @@ macro_rules! zip_impl { } zip_impl!{ - [ParApply1 P1], - [ParApply2 P1 P2], - [ParApply3 P1 P2 P3], - [ParApply4 P1 P2 P3 P4], - [ParApply5 P1 P2 P3 P4 P5], - [ParApply6 P1 P2 P3 P4 P5 P6], + [P1], + [P1 P2], + [P1 P2 P3], + [P1 P2 P3 P4], + [P1 P2 P3 P4 P5], + [P1 P2 P3 P4 P5 P6], } From e53c273ce1993546b429913f02a1cf2501b43c27 Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 21:25:12 +0100 Subject: [PATCH 21/24] DOC: Use backticks in ndarray::parallel mod docs --- src/parallel/mod.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 8dc67d118..4bce0f3db 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -10,17 +10,17 @@ //! The following types implement parallel iterators, accessed using these //! methods: //! -//! - [Array], [ArcArray]: `.par_iter()` and `.par_iter_mut()` -//! - [ArrayView](ArrayView): `.into_par_iter()` -//! - [ArrayViewMut](ArrayViewMut): `.into_par_iter()` -//! - [AxisIter](iter::AxisIter), [AxisIterMut](iter::AxisIterMut): `.into_par_iter()` -//! - [Zip] `.into_par_iter()` +//! - [`Array`], [`ArcArray`]: `.par_iter()` and `.par_iter_mut()` +//! - [`ArrayView`](ArrayView): `.into_par_iter()` +//! - [`ArrayViewMut`](ArrayViewMut): `.into_par_iter()` +//! - [`AxisIter`](iter::AxisIter), [`AxisIterMut`](iter::AxisIterMut): `.into_par_iter()` +//! - [`Zip`] `.into_par_iter()` //! //! The following other parallelized methods exist: //! -//! - [ArrayBase::par_map_inplace()] -//! - [ArrayBase::par_mapv_inplace()] -//! - [Zip::par_apply()] (all arities) +//! - [`ArrayBase::par_map_inplace()`] +//! - [`ArrayBase::par_mapv_inplace()`] +//! - [`Zip::par_apply()`] (all arities) //! //! Note that you can use the parallel iterator for [Zip] to access all other //! rayon parallel iterator methods. From d19585ec0c674fba8a86ccdeef40c417ca7c8b92 Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 21:28:09 +0100 Subject: [PATCH 22/24] DOC: Edit doc for ndarray::parallel::prelude --- src/parallel/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parallel/mod.rs b/src/parallel/mod.rs index 4bce0f3db..dfed8a636 100644 --- a/src/parallel/mod.rs +++ b/src/parallel/mod.rs @@ -104,7 +104,7 @@ //! ``` -/// Into- traits for creating parallelized iterators. +/// Into- traits for creating parallelized iterators and/or using [`par_azip!`] pub mod prelude { #[doc(no_inline)] pub use rayon::prelude::{ParallelIterator, IndexedParallelIterator, From b3c8fe72bb5086f0353a34b3af7793aaa60cc8ab Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 21:36:51 +0100 Subject: [PATCH 23/24] DOC: Update main module doc and readme for ndarray::parallel --- README.rst | 5 +++++ src/lib.rs | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 3e8959a9a..1fa147a62 100644 --- a/README.rst +++ b/README.rst @@ -52,6 +52,11 @@ your `Cargo.toml`. - Optional, compatible with Rust stable - Enables serialization support for serde 1.0 +- ``rayon`` + + - Optional, compatible with Rust stable + - Enables parallel iterators, parallelized methods and ``par_azip!``. + - ``blas`` - Optional and experimental, compatible with Rust stable diff --git a/src/lib.rs b/src/lib.rs index 50a85550c..1c884e0a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -55,11 +55,8 @@ //! needs matching memory layout to be efficient (with some exceptions). //! + Efficient floating point matrix multiplication even for very large //! matrices; can optionally use BLAS to improve it further. -//! + See also the [`ndarray-parallel`] crate for integration with rayon. //! - **Requires Rust 1.30** //! -//! [`ndarray-parallel`]: https://docs.rs/ndarray-parallel -//! //! ## Crate Feature Flags //! //! The following crate feature flags are available. They are configured in your @@ -68,6 +65,9 @@ //! - `serde-1` //! - Optional, compatible with Rust stable //! - Enables serialization support for serde 1.0 +//! - `rayon` +//! - Optional, compatible with Rust stable +//! - Enables parallel iterators, parallelized methods and [`par_azip!`]. //! - `blas` //! - Optional and experimental, compatible with Rust stable //! - Enable transparent BLAS support for matrix multiplication. From b677c77d4eaaaeb99a058f02fd5c04b6e8ae7a75 Mon Sep 17 00:00:00 2001 From: bluss Date: Mon, 3 Dec 2018 21:46:12 +0100 Subject: [PATCH 24/24] TEST: Drop num_cpus as benchmark dev-dependency We don't need another dep just to have faster rayon benchmarks (they are for comparison anyway.) --- Cargo.toml | 1 - benches/par_rayon.rs | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1c6c81323..81da3bd0d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,6 @@ serde = { version = "1.0", optional = true } defmac = "0.2" quickcheck = { version = "0.7.2", default-features = false } rawpointer = "0.1" -num_cpus = "1.2" [features] # Enable blas usage diff --git a/benches/par_rayon.rs b/benches/par_rayon.rs index 774b7eba9..e207a65aa 100644 --- a/benches/par_rayon.rs +++ b/benches/par_rayon.rs @@ -9,7 +9,6 @@ extern crate itertools; use ndarray::prelude::*; use ndarray::parallel::prelude::*; -extern crate num_cpus; extern crate test; use test::Bencher; @@ -21,9 +20,10 @@ const ADDN: usize = 512; use std::cmp::max; fn set_threads() { - let n = max(1, num_cpus::get() / 2); - //println!("Using {} threads", n); - let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global(); + // Consider setting a fixed number of threads here, for example to avoid + // oversubscribing on hyperthreaded cores. + // let n = 4; + // let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global(); } #[bench]