Skip to content

Commit

Permalink
Merge pull request #563 from rust-ndarray/integrate-rayon
Browse files Browse the repository at this point in the history
Integrate ndarray-parallel and make rayon an optional feature
  • Loading branch information
bluss committed Dec 3, 2018
2 parents 1cf7963 + b677c77 commit ad714f8
Show file tree
Hide file tree
Showing 14 changed files with 1,030 additions and 6 deletions.
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ num-traits = "0.2"
num-complex = "0.2"
itertools = { version = "0.7.0", default-features = false }

rayon = { version = "1.0.3", optional = true }

# Use via the `blas` crate feature!
cblas-sys = { version = "0.1.4", optional = true, default-features = false }
blas-src = { version = "0.2.0", optional = true, default-features = false }
Expand All @@ -57,7 +59,7 @@ test-blas-openblas-sys = ["blas"]
test = ["test-blas-openblas-sys"]

# This feature is used for docs
docs = ["serde-1"]
docs = ["serde-1", "rayon"]

[profile.release]
[profile.bench]
Expand Down
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ your `Cargo.toml`.
- Optional, compatible with Rust stable
- Enables serialization support for serde 1.0

- ``rayon``

- Optional, compatible with Rust stable
- Enables parallel iterators, parallelized methods and ``par_azip!``.

- ``blas``

- Optional and experimental, compatible with Rust stable
Expand Down
157 changes: 157 additions & 0 deletions benches/par_rayon.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
#![cfg(feature="rayon")]
#![feature(test)]

extern crate rayon;

extern crate ndarray;
extern crate itertools;

use ndarray::prelude::*;
use ndarray::parallel::prelude::*;

extern crate test;
use test::Bencher;

use ndarray::Zip;

const EXP_N: usize = 256;
const ADDN: usize = 512;

use std::cmp::max;

fn set_threads() {
// Consider setting a fixed number of threads here, for example to avoid
// oversubscribing on hyperthreaded cores.
// let n = 4;
// let _ = rayon::ThreadPoolBuilder::new().num_threads(n).build_global();
}

#[bench]
fn map_exp_regular(bench: &mut Bencher)
{
let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
a.swap_axes(0, 1);
bench.iter(|| {
a.mapv_inplace(|x| x.exp());
});
}

#[bench]
fn rayon_exp_regular(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((EXP_N, EXP_N));
a.swap_axes(0, 1);
bench.iter(|| {
a.view_mut().into_par_iter().for_each(|x| *x = x.exp());
});
}

const FASTEXP: usize = EXP_N;

#[inline]
fn fastexp(x: f64) -> f64 {
let x = 1. + x/1024.;
x.powi(1024)
}

#[bench]
fn map_fastexp_regular(bench: &mut Bencher)
{
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
bench.iter(|| {
a.mapv_inplace(|x| fastexp(x))
});
}

#[bench]
fn rayon_fastexp_regular(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
bench.iter(|| {
a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
});
}

#[bench]
fn map_fastexp_cut(bench: &mut Bencher)
{
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
let mut a = a.slice_mut(s![.., ..-1]);
bench.iter(|| {
a.mapv_inplace(|x| fastexp(x))
});
}

#[bench]
fn rayon_fastexp_cut(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
let mut a = a.slice_mut(s![.., ..-1]);
bench.iter(|| {
a.view_mut().into_par_iter().for_each(|x| *x = fastexp(*x));
});
}

#[bench]
fn map_fastexp_by_axis(bench: &mut Bencher)
{
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
bench.iter(|| {
for mut sheet in a.axis_iter_mut(Axis(0)) {
sheet.mapv_inplace(fastexp)
}
});
}

#[bench]
fn rayon_fastexp_by_axis(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
bench.iter(|| {
a.axis_iter_mut(Axis(0)).into_par_iter()
.for_each(|mut sheet| sheet.mapv_inplace(fastexp));
});
}

#[bench]
fn rayon_fastexp_zip(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((FASTEXP, FASTEXP));
bench.iter(|| {
Zip::from(&mut a).into_par_iter().for_each(|(elt, )| *elt = fastexp(*elt));
});
}

#[bench]
fn add(bench: &mut Bencher)
{
let mut a = Array2::<f64>::zeros((ADDN, ADDN));
let b = Array2::<f64>::zeros((ADDN, ADDN));
let c = Array2::<f64>::zeros((ADDN, ADDN));
let d = Array2::<f64>::zeros((ADDN, ADDN));
bench.iter(|| {
azip!(mut a, b, c, d in {
*a += b.exp() + c.exp() + d.exp();
});
});
}

#[bench]
fn rayon_add(bench: &mut Bencher)
{
set_threads();
let mut a = Array2::<f64>::zeros((ADDN, ADDN));
let b = Array2::<f64>::zeros((ADDN, ADDN));
let c = Array2::<f64>::zeros((ADDN, ADDN));
let d = Array2::<f64>::zeros((ADDN, ADDN));
bench.iter(|| {
par_azip!(mut a, b, c, d in {
*a += b.exp() + c.exp() + d.exp();
});
});
}
1 change: 0 additions & 1 deletion scripts/all-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ cargo test --verbose --no-default-features
cargo test --release --verbose --no-default-features
cargo build --verbose --features "$FEATURES"
cargo test --verbose --features "$FEATURES"
cargo test --manifest-path=parallel/Cargo.toml --verbose
cargo test --manifest-path=serialization-tests/Cargo.toml --verbose
cargo test --manifest-path=blas-tests/Cargo.toml --verbose
CARGO_TARGET_DIR=target/ cargo test --manifest-path=numeric-tests/Cargo.toml --verbose
Expand Down
13 changes: 10 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,8 @@
//! needs matching memory layout to be efficient (with some exceptions).
//! + Efficient floating point matrix multiplication even for very large
//! matrices; can optionally use BLAS to improve it further.
//! + See also the [`ndarray-parallel`] crate for integration with rayon.
//! - **Requires Rust 1.30**
//!
//! [`ndarray-parallel`]: https://docs.rs/ndarray-parallel
//!
//! ## Crate Feature Flags
//!
//! The following crate feature flags are available. They are configured in your
Expand All @@ -68,6 +65,9 @@
//! - `serde-1`
//! - Optional, compatible with Rust stable
//! - Enables serialization support for serde 1.0
//! - `rayon`
//! - Optional, compatible with Rust stable
//! - Enables parallel iterators, parallelized methods and [`par_azip!`].
//! - `blas`
//! - Optional and experimental, compatible with Rust stable
//! - Enable transparent BLAS support for matrix multiplication.
Expand All @@ -87,6 +87,9 @@
#[cfg(feature = "serde-1")]
extern crate serde;

#[cfg(feature="rayon")]
extern crate rayon;

#[cfg(feature="blas")]
extern crate cblas_sys;
#[cfg(feature="blas")]
Expand Down Expand Up @@ -1333,6 +1336,10 @@ impl<A, S, D> ArrayBase<S, D>
}


// parallel methods
#[cfg(feature="rayon")]
pub mod parallel;

mod impl_1d;
mod impl_2d;
mod impl_dyn;
Expand Down
85 changes: 85 additions & 0 deletions src/parallel/impl_par_methods.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@

use {
Dimension,
NdProducer,
Zip,
ArrayBase,
DataMut,
};

use parallel::prelude::*;


/// # Parallel methods
///
/// These methods require crate feature `rayon`.
impl<A, S, D> ArrayBase<S, D>
where S: DataMut<Elem=A>,
D: Dimension,
A: Send + Sync,
{
/// Parallel version of `map_inplace`.
///
/// Modify the array in place by calling `f` by mutable reference on each element.
///
/// Elements are visited in arbitrary order.
pub fn par_map_inplace<F>(&mut self, f: F)
where F: Fn(&mut A) + Sync + Send
{
self.view_mut().into_par_iter().for_each(f)
}

/// Parallel version of `mapv_inplace`.
///
/// Modify the array in place by calling `f` by **v**alue on each element.
/// The array is updated with the new values.
///
/// Elements are visited in arbitrary order.
pub fn par_mapv_inplace<F>(&mut self, f: F)
where F: Fn(A) -> A + Sync + Send,
A: Clone,
{
self.view_mut().into_par_iter()
.for_each(move |x| *x = f(x.clone()))
}
}




// Zip

macro_rules! zip_impl {
($([$($p:ident)*],)+) => {
$(
#[allow(non_snake_case)]
impl<D, $($p),*> Zip<($($p,)*), D>
where $($p::Item : Send , )*
$($p : Send , )*
D: Dimension,
$($p: NdProducer<Dim=D> ,)*
{
/// The `par_apply` method for `Zip`.
///
/// This is a shorthand for using `.into_par_iter().for_each()` on
/// `Zip`.
///
/// Requires crate feature `rayon`.
pub fn par_apply<F>(self, function: F)
where F: Fn($($p::Item),*) + Sync + Send
{
self.into_par_iter().for_each(move |($($p,)*)| function($($p),*))
}
}
)+
}
}

zip_impl!{
[P1],
[P1 P2],
[P1 P2 P3],
[P1 P2 P3 P4],
[P1 P2 P3 P4 P5],
[P1 P2 P3 P4 P5 P6],
}
54 changes: 54 additions & 0 deletions src/parallel/into_impls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
use {Array, ArcArray, Dimension, ArrayView, ArrayViewMut};

use super::prelude::IntoParallelIterator;
use super::Parallel;

/// Requires crate feature `rayon`.
impl<'a, A, D> IntoParallelIterator for &'a Array<A, D>
where D: Dimension,
A: Sync
{
type Item = &'a A;
type Iter = Parallel<ArrayView<'a, A, D>>;
fn into_par_iter(self) -> Self::Iter {
self.view().into_par_iter()
}
}

// This is allowed: goes through `.view()`
/// Requires crate feature `rayon`.
impl<'a, A, D> IntoParallelIterator for &'a ArcArray<A, D>
where D: Dimension,
A: Sync
{
type Item = &'a A;
type Iter = Parallel<ArrayView<'a, A, D>>;
fn into_par_iter(self) -> Self::Iter {
self.view().into_par_iter()
}
}

/// Requires crate feature `rayon`.
impl<'a, A, D> IntoParallelIterator for &'a mut Array<A, D>
where D: Dimension,
A: Sync + Send
{
type Item = &'a mut A;
type Iter = Parallel<ArrayViewMut<'a, A, D>>;
fn into_par_iter(self) -> Self::Iter {
self.view_mut().into_par_iter()
}
}

// This is allowed: goes through `.view_mut()`, which is unique access
/// Requires crate feature `rayon`.
impl<'a, A, D> IntoParallelIterator for &'a mut ArcArray<A, D>
where D: Dimension,
A: Sync + Send + Clone,
{
type Item = &'a mut A;
type Iter = Parallel<ArrayViewMut<'a, A, D>>;
fn into_par_iter(self) -> Self::Iter {
self.view_mut().into_par_iter()
}
}

0 comments on commit ad714f8

Please sign in to comment.