diff --git a/benches/iter.rs b/benches/iter.rs index 24bda95b9..8b89e721b 100644 --- a/benches/iter.rs +++ b/benches/iter.rs @@ -381,12 +381,12 @@ pub fn zip_mut_with(data: &Array3, out: &mut Array3) { fn zip_mut_with_cc(b: &mut Bencher) { let data: Array3 = Array3::zeros((ISZ, ISZ, ISZ)); let mut out = Array3::zeros(data.dim()); - b.iter(|| black_box(zip_mut_with(&data, &mut out))); + b.iter(|| zip_mut_with(&data, &mut out)); } #[bench] fn zip_mut_with_ff(b: &mut Bencher) { let data: Array3 = Array3::zeros((ISZ, ISZ, ISZ).f()); let mut out = Array3::zeros(data.dim().f()); - b.iter(|| black_box(zip_mut_with(&data, &mut out))); + b.iter(|| zip_mut_with(&data, &mut out)); } diff --git a/benches/zip.rs b/benches/zip.rs new file mode 100644 index 000000000..ac7bef589 --- /dev/null +++ b/benches/zip.rs @@ -0,0 +1,120 @@ +#![feature(test)] +extern crate test; +use test::{black_box, Bencher}; +use ndarray::{Array3, ShapeBuilder, Zip}; +use ndarray::s; +use ndarray::IntoNdProducer; + +pub fn zip_copy<'a, A, P, Q>(data: P, out: Q) + where P: IntoNdProducer, + Q: IntoNdProducer, + A: Copy + 'a +{ + Zip::from(data).and(out).apply(|&i, o| { + *o = i; + }); +} + +pub fn zip_copy_split<'a, A, P, Q>(data: P, out: Q) + where P: IntoNdProducer, + Q: IntoNdProducer, + A: Copy + 'a +{ + let z = Zip::from(data).and(out); + let (z1, z2) = z.split(); + let (z11, z12) = z1.split(); + let (z21, z22) = z2.split(); + let f = |&i: &A, o: &mut A| *o = i; + z11.apply(f); + z12.apply(f); + z21.apply(f); + z22.apply(f); +} + +pub fn zip_indexed(data: &Array3, out: &mut Array3) { + Zip::indexed(data).and(out).apply(|idx, &i, o| { + let _ = black_box(idx); + *o = i; + }); +} + +// array size in benchmarks +const SZ3: (usize, usize, usize) = (100, 110, 100); + +#[bench] +fn zip_cc(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3); + let mut out = Array3::zeros(data.dim()); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn zip_cf(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3); + let mut out = Array3::zeros(data.dim().f()); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn zip_fc(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3.f()); + let mut out = Array3::zeros(data.dim()); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn zip_ff(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3.f()); + let mut out = Array3::zeros(data.dim().f()); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn zip_indexed_cc(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3); + let mut out = Array3::zeros(data.dim()); + b.iter(|| zip_indexed(&data, &mut out)); +} + +#[bench] +fn zip_indexed_ff(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3.f()); + let mut out = Array3::zeros(data.dim().f()); + b.iter(|| zip_indexed(&data, &mut out)); +} + +#[bench] +fn slice_zip_cc(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3); + let mut out = Array3::zeros(data.dim()); + let data = data.slice(s![1.., 1.., 1..]); + let mut out = out.slice_mut(s![1.., 1.., 1..]); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn slice_zip_ff(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3.f()); + let mut out = Array3::zeros(data.dim().f()); + let data = data.slice(s![1.., 1.., 1..]); + let mut out = out.slice_mut(s![1.., 1.., 1..]); + b.iter(|| zip_copy(&data, &mut out)); +} + +#[bench] +fn slice_split_zip_cc(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3); + let mut out = Array3::zeros(data.dim()); + let data = data.slice(s![1.., 1.., 1..]); + let mut out = out.slice_mut(s![1.., 1.., 1..]); + b.iter(|| zip_copy_split(&data, &mut out)); +} + +#[bench] +fn slice_split_zip_ff(b: &mut Bencher) { + let data: Array3 = Array3::zeros(SZ3.f()); + let mut out = Array3::zeros(data.dim().f()); + let data = data.slice(s![1.., 1.., 1..]); + let mut out = out.slice_mut(s![1.., 1.., 1..]); + b.iter(|| zip_copy_split(&data, &mut out)); +} diff --git a/src/layout/layoutfmt.rs b/src/layout/layoutfmt.rs index d5049512e..3d7fad00a 100644 --- a/src/layout/layoutfmt.rs +++ b/src/layout/layoutfmt.rs @@ -8,7 +8,7 @@ use super::Layout; -const LAYOUT_NAMES: &[&str] = &["C", "F"]; +const LAYOUT_NAMES: &[&str] = &["C", "F", "c", "f"]; use std::fmt; diff --git a/src/layout/mod.rs b/src/layout/mod.rs index 57983cc3b..24dd09958 100644 --- a/src/layout/mod.rs +++ b/src/layout/mod.rs @@ -1,6 +1,8 @@ mod layoutfmt; -// public struct but users don't interact with it +// Layout it a bitset used for internal layout description of +// arrays, producers and sets of producers. +// The type is public but users don't interact with it. #[doc(hidden)] /// Memory layout description #[derive(Copy, Clone)] @@ -8,47 +10,136 @@ pub struct Layout(u32); impl Layout { #[inline(always)] - pub(crate) fn new(x: u32) -> Self { - Layout(x) + pub(crate) fn is(self, flag: u32) -> bool { + self.0 & flag != 0 } + /// Return layout common to both inputs #[inline(always)] - pub(crate) fn is(self, flag: u32) -> bool { - self.0 & flag != 0 + pub(crate) fn intersect(self, other: Layout) -> Layout { + Layout(self.0 & other.0) } + + /// Return a layout that simultaneously "is" what both of the inputs are #[inline(always)] - pub(crate) fn and(self, flag: Layout) -> Layout { - Layout(self.0 & flag.0) + pub(crate) fn also(self, other: Layout) -> Layout { + Layout(self.0 | other.0) } #[inline(always)] - pub(crate) fn flag(self) -> u32 { - self.0 + pub(crate) fn one_dimensional() -> Layout { + Layout::c().also(Layout::f()) } -} -impl Layout { - #[doc(hidden)] #[inline(always)] - pub fn one_dimensional() -> Layout { - Layout(CORDER | FORDER) + pub(crate) fn c() -> Layout { + Layout(CORDER | CPREFER) } - #[doc(hidden)] + #[inline(always)] - pub fn c() -> Layout { - Layout(CORDER) + pub(crate) fn f() -> Layout { + Layout(FORDER | FPREFER) } - #[doc(hidden)] + #[inline(always)] - pub fn f() -> Layout { - Layout(FORDER) + pub(crate) fn cpref() -> Layout { + Layout(CPREFER) } + + #[inline(always)] + pub(crate) fn fpref() -> Layout { + Layout(FPREFER) + } + #[inline(always)] - #[doc(hidden)] - pub fn none() -> Layout { + pub(crate) fn none() -> Layout { Layout(0) } + + /// A simple "score" method which scores positive for preferring C-order, negative for F-order + /// Subject to change when we can describe other layouts + pub(crate) fn tendency(self) -> i32 { + (self.is(CORDER) as i32 - self.is(FORDER) as i32) + + (self.is(CPREFER) as i32 - self.is(FPREFER) as i32) + + } } pub const CORDER: u32 = 0b01; pub const FORDER: u32 = 0b10; +pub const CPREFER: u32 = 0b0100; +pub const FPREFER: u32 = 0b1000; + + +#[cfg(test)] +mod tests { + use super::*; + use crate::imp_prelude::*; + use crate::NdProducer; + + type M = Array2; + + #[test] + fn contig_layouts() { + let a = M::zeros((5, 5)); + let b = M::zeros((5, 5).f()); + let ac = a.view().layout(); + let af = b.view().layout(); + assert!(ac.is(CORDER) && ac.is(CPREFER)); + assert!(!ac.is(FORDER) && !ac.is(FPREFER)); + assert!(!af.is(CORDER) && !af.is(CPREFER)); + assert!(af.is(FORDER) && af.is(FPREFER)); + } + + #[test] + fn stride_layouts() { + let a = M::zeros((5, 5)); + + { + let v1 = a.slice(s![1.., ..]).layout(); + let v2 = a.slice(s![.., 1..]).layout(); + + assert!(v1.is(CORDER) && v1.is(CPREFER)); + assert!(!v1.is(FORDER) && !v1.is(FPREFER)); + assert!(!v2.is(CORDER) && v2.is(CPREFER)); + assert!(!v2.is(FORDER) && !v2.is(FPREFER)); + } + + let b = M::zeros((5, 5).f()); + + { + let v1 = b.slice(s![1.., ..]).layout(); + let v2 = b.slice(s![.., 1..]).layout(); + + assert!(!v1.is(CORDER) && !v1.is(CPREFER)); + assert!(!v1.is(FORDER) && v1.is(FPREFER)); + assert!(!v2.is(CORDER) && !v2.is(CPREFER)); + assert!(v2.is(FORDER) && v2.is(FPREFER)); + } + } + + #[test] + fn skip_layouts() { + let a = M::zeros((5, 5)); + { + let v1 = a.slice(s![..;2, ..]).layout(); + let v2 = a.slice(s![.., ..;2]).layout(); + + assert!(!v1.is(CORDER) && v1.is(CPREFER)); + assert!(!v1.is(FORDER) && !v1.is(FPREFER)); + assert!(!v2.is(CORDER) && !v2.is(CPREFER)); + assert!(!v2.is(FORDER) && !v2.is(FPREFER)); + } + + let b = M::zeros((5, 5).f()); + { + let v1 = b.slice(s![..;2, ..]).layout(); + let v2 = b.slice(s![.., ..;2]).layout(); + + assert!(!v1.is(CORDER) && !v1.is(CPREFER)); + assert!(!v1.is(FORDER) && !v1.is(FPREFER)); + assert!(!v2.is(CORDER) && !v2.is(CPREFER)); + assert!(!v2.is(FORDER) && v2.is(FPREFER)); + } + } +} diff --git a/src/zip/mod.rs b/src/zip/mod.rs index b2e24a62e..d8ebc4faa 100644 --- a/src/zip/mod.rs +++ b/src/zip/mod.rs @@ -53,17 +53,26 @@ where D: Dimension, { pub(crate) fn layout_impl(&self) -> Layout { - Layout::new(if self.is_standard_layout() { - if self.ndim() <= 1 { - FORDER | CORDER + let n = self.ndim(); + if self.is_standard_layout() { + if n <= 1 { + Layout::one_dimensional() } else { - CORDER + Layout::c() + } + } else if n > 1 && self.raw_view().reversed_axes().is_standard_layout() { + Layout::f() + } else if n > 1 { + if self.stride_of(Axis(0)) == 1 { + Layout::fpref() + } else if self.stride_of(Axis(n - 1)) == 1 { + Layout::cpref() + } else { + Layout::none() } - } else if self.ndim() > 1 && self.raw_view().reversed_axes().is_standard_layout() { - FORDER } else { - 0 - }) + Layout::none() + } } } @@ -587,6 +596,9 @@ pub struct Zip { parts: Parts, dimension: D, layout: Layout, + /// The sum of the layout tendencies of the parts; + /// positive for c- and negative for f-layout preference. + layout_tendency: i32, } @@ -605,10 +617,12 @@ where { let array = p.into_producer(); let dim = array.raw_dim(); + let layout = array.layout(); Zip { dimension: dim, - layout: array.layout(), + layout, parts: (array,), + layout_tendency: layout.tendency(), } } } @@ -661,24 +675,29 @@ where self.dimension[axis.index()] } + fn prefer_f(&self) -> bool { + !self.layout.is(CORDER) && (self.layout.is(FORDER) || self.layout_tendency < 0) + } + /// Return an *approximation* to the max stride axis; if /// component arrays disagree, there may be no choice better than the /// others. fn max_stride_axis(&self) -> Axis { - let i = match self.layout.flag() { - FORDER => self + let i = if self.prefer_f() { + self .dimension .slice() .iter() .rposition(|&len| len > 1) - .unwrap_or(self.dimension.ndim() - 1), + .unwrap_or(self.dimension.ndim() - 1) + } else { /* corder or default */ - _ => self + self .dimension .slice() .iter() .position(|&len| len > 1) - .unwrap_or(0), + .unwrap_or(0) }; Axis(i) } @@ -699,6 +718,7 @@ where self.apply_core_strided(acc, function) } } + fn apply_core_contiguous(&mut self, mut acc: Acc, mut function: F) -> FoldWhile where F: FnMut(Acc, P::Item) -> FoldWhile, @@ -717,7 +737,7 @@ where FoldWhile::Continue(acc) } - fn apply_core_strided(&mut self, mut acc: Acc, mut function: F) -> FoldWhile + fn apply_core_strided(&mut self, acc: Acc, function: F) -> FoldWhile where F: FnMut(Acc, P::Item) -> FoldWhile, P: ZippableTuple, @@ -726,13 +746,27 @@ where if n == 0 { panic!("Unreachable: ndim == 0 is contiguous") } + if n == 1 || self.layout_tendency >= 0 { + self.apply_core_strided_c(acc, function) + } else { + self.apply_core_strided_f(acc, function) + } + } + + // Non-contiguous but preference for C - unroll over Axis(ndim - 1) + fn apply_core_strided_c(&mut self, mut acc: Acc, mut function: F) -> FoldWhile + where + F: FnMut(Acc, P::Item) -> FoldWhile, + P: ZippableTuple, + { + let n = self.dimension.ndim(); let unroll_axis = n - 1; let inner_len = self.dimension[unroll_axis]; self.dimension[unroll_axis] = 1; let mut index_ = self.dimension.first_index(); let inner_strides = self.parts.stride_of(unroll_axis); + // Loop unrolled over closest axis while let Some(index) = index_ { - // Let's “unroll” the loop over the innermost axis unsafe { let ptr = self.parts.uget_ptr(&index); for i in 0..inner_len { @@ -747,9 +781,40 @@ where FoldWhile::Continue(acc) } + // Non-contiguous but preference for F - unroll over Axis(0) + fn apply_core_strided_f(&mut self, mut acc: Acc, mut function: F) -> FoldWhile + where + F: FnMut(Acc, P::Item) -> FoldWhile, + P: ZippableTuple, + { + let unroll_axis = 0; + let inner_len = self.dimension[unroll_axis]; + self.dimension[unroll_axis] = 1; + let index_ = self.dimension.first_index(); + let inner_strides = self.parts.stride_of(unroll_axis); + // Loop unrolled over closest axis + if let Some(mut index) = index_ { + loop { + unsafe { + let ptr = self.parts.uget_ptr(&index); + for i in 0..inner_len { + let p = ptr.stride_offset(inner_strides, i); + acc = fold_while!(function(acc, self.parts.as_ref(p))); + } + } + + if !self.dimension.next_for_f(&mut index) { + break; + } + } + } + self.dimension[unroll_axis] = inner_len; + FoldWhile::Continue(acc) + } + pub(crate) fn uninitalized_for_current_layout(&self) -> Array, D> { - let is_f = !self.layout.is(CORDER) && self.layout.is(FORDER); + let is_f = self.prefer_f(); Array::maybe_uninit(self.dimension.clone().set_f(is_f)) } } @@ -995,8 +1060,9 @@ macro_rules! map_impl { let ($($p,)*) = self.parts; Zip { parts: ($($p,)* part, ), - layout: self.layout.and(part_layout), + layout: self.layout.intersect(part_layout), dimension: self.dimension, + layout_tendency: self.layout_tendency + part_layout.tendency(), } } @@ -1052,11 +1118,13 @@ macro_rules! map_impl { dimension: d1, layout: self.layout, parts: p1, + layout_tendency: self.layout_tendency, }, Zip { dimension: d2, layout: self.layout, parts: p2, + layout_tendency: self.layout_tendency, }) } } diff --git a/tests/format.rs b/tests/format.rs index 5c2e2b6f4..422fa2957 100644 --- a/tests/format.rs +++ b/tests/format.rs @@ -62,13 +62,13 @@ fn debug_format() { "\ [[0, 0, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 0]], shape=[3, 4], strides=[4, 1], layout=C (0x1), const ndim=2" + [0, 0, 0, 0]], shape=[3, 4], strides=[4, 1], layout=Cc (0x5), const ndim=2" ); assert_eq!( format!("{:?}", a.into_dyn()), "\ [[0, 0, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 0]], shape=[3, 4], strides=[4, 1], layout=C (0x1), dynamic ndim=2" + [0, 0, 0, 0]], shape=[3, 4], strides=[4, 1], layout=Cc (0x5), dynamic ndim=2" ); }