dimforge · LucasCampos · Aug 30, 2019 · Aug 30, 2019 · Sep 1, 2019 · Sep 1, 2019
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name    = "nalgebra"
-version = "0.18.1"
+version = "0.18.2"
 authors = [ "Sébastien Crozet <developer@crozet.re>" ]
 
 description = "Linear algebra library with transformations and statically-sized or dynamically-sized matrices."

diff --git a/benches/lib.rs b/benches/lib.rs
@@ -35,5 +35,6 @@ criterion_main!(
     linalg::schur,
     linalg::solve,
     linalg::svd,
+    linalg::polar,
     linalg::symmetric_eigen,
-);
+);
diff --git a/benches/linalg/mod.rs b/benches/linalg/mod.rs
@@ -7,6 +7,8 @@ pub use self::qr::qr;
 pub use self::schur::schur;
 pub use self::solve::solve;
 pub use self::svd::svd;
+#[cfg(any(feature = "std", feature = "alloc"))]
+pub use self::polar::polar;
 pub use self::symmetric_eigen::symmetric_eigen;
 
 mod bidiagonal;
@@ -18,5 +20,7 @@ mod qr;
 mod schur;
 mod solve;
 mod svd;
+#[cfg(any(feature = "std", feature = "alloc"))]
+mod polar;
 mod symmetric_eigen;
-// mod eigen;
+// mod eigen;
diff --git a/benches/linalg/polar.rs b/benches/linalg/polar.rs
@@ -0,0 +1,28 @@
+use na::{DMatrix, Polar};
+
+fn polar_decompose_4x4(bh: &mut criterion::Criterion) {
+    let m = DMatrix::<f64>::new_random(4,4);
+    bh.bench_function("polar_decompose_4x4", move |bh| bh.iter(|| test::black_box(Polar::new(m.clone()))));
+}
+
+fn polar_decompose_10x10(bh: &mut criterion::Criterion) {
+    let m = crate::reproductible_dmatrix(10, 10);
+    bh.bench_function("polar_decompose_10x10", move |bh| bh.iter(|| test::black_box(Polar::new(m.clone()))));
+}
+
+fn polar_decompose_100x100(bh: &mut criterion::Criterion) {
+    let m = crate::reproductible_dmatrix(100, 100);
+    bh.bench_function("polar_decompose_100x100", move |bh| bh.iter(|| test::black_box(Polar::new(m.clone()))));
+}
+
+fn polar_decompose_200x200(bh: &mut criterion::Criterion) {
+    let m = crate::reproductible_dmatrix(200, 200);
+    bh.bench_function("polar_decompose_200x200", move |bh| bh.iter(|| test::black_box(Polar::new(m.clone()))));
+}
+
+criterion_group!(polar,
+    polar_decompose_4x4,
+    polar_decompose_10x10,
+    polar_decompose_100x100,
+    polar_decompose_200x200,
+);
diff --git a/nalgebra-glm/src/geometric.rs b/nalgebra-glm/src/geometric.rs
@@ -4,7 +4,7 @@ use crate::aliases::{TVec, TVec3};
 use crate::traits::{Alloc, Dimension, Number};
 
 /// The cross product of two vectors.
-pub fn cross<N: Number, D: Dimension>(x: &TVec3<N>, y: &TVec3<N>) -> TVec3<N> {
+pub fn cross<N: Number>(x: &TVec3<N>, y: &TVec3<N>) -> TVec3<N> {
     x.cross(y)
 }
 

diff --git a/src/base/blas.rs b/src/base/blas.rs
@@ -565,6 +565,14 @@ where
         );
 
         if ncols2 == 0 {
+            // NOTE: we can't just always multiply by beta
+            // because we documented the guaranty that `self` is
+            // never read if `beta` is zero.
+            if beta.is_zero() {
+                self.fill(N::zero());
+            } else {
+                *self *= beta;
+            }
             return;
         }
 
@@ -991,92 +999,109 @@ where N: Scalar + Zero + ClosedAdd + ClosedMul
 
         #[cfg(feature = "std")]
         {
-            // matrixmultiply can be used only if the std feature is available.
-            let nrows1 = self.nrows();
-            let (nrows2, ncols2) = a.shape();
-            let (nrows3, ncols3) = b.shape();
-
-            assert_eq!(
-                ncols2, nrows3,
-                "gemm: dimensions mismatch for multiplication."
-            );
-            assert_eq!(
-                (nrows1, ncols1),
-                (nrows2, ncols3),
-                "gemm: dimensions mismatch for addition."
-            );
-
             // We assume large matrices will be Dynamic but small matrices static.
             // We could use matrixmultiply for large statically-sized matrices but the performance
             // threshold to activate it would be different from SMALL_DIM because our code optimizes
             // better for statically-sized matrices.
-            let is_dynamic = R1::is::<Dynamic>()
+            if R1::is::<Dynamic>()
                 || C1::is::<Dynamic>()
                 || R2::is::<Dynamic>()
                 || C2::is::<Dynamic>()
                 || R3::is::<Dynamic>()
-                || C3::is::<Dynamic>();
-            // Threshold determined empirically.
-            const SMALL_DIM: usize = 5;
-
-            if is_dynamic
-                && nrows1 > SMALL_DIM
-                && ncols1 > SMALL_DIM
-                && nrows2 > SMALL_DIM
-                && ncols2 > SMALL_DIM
-            {
-                if N::is::<f32>() {
-                    let (rsa, csa) = a.strides();
-                    let (rsb, csb) = b.strides();
-                    let (rsc, csc) = self.strides();
-
-                    unsafe {
-                        matrixmultiply::sgemm(
-                            nrows2,
-                            ncols2,
-                            ncols3,
-                            mem::transmute_copy(&alpha),
-                            a.data.ptr() as *const f32,
-                            rsa as isize,
-                            csa as isize,
-                            b.data.ptr() as *const f32,
-                            rsb as isize,
-                            csb as isize,
-                            mem::transmute_copy(&beta),
-                            self.data.ptr_mut() as *mut f32,
-                            rsc as isize,
-                            csc as isize,
-                        );
+                || C3::is::<Dynamic>() {
+                // matrixmultiply can be used only if the std feature is available.
+                let nrows1 = self.nrows();
+                let (nrows2, ncols2) = a.shape();
+                let (nrows3, ncols3) = b.shape();
+
+                // Threshold determined empirically.
+                const SMALL_DIM: usize = 5;
+
+                if nrows1 > SMALL_DIM
+                    && ncols1 > SMALL_DIM
+                    && nrows2 > SMALL_DIM
+                    && ncols2 > SMALL_DIM
+                {
+                    assert_eq!(
+                        ncols2, nrows3,
+                        "gemm: dimensions mismatch for multiplication."
+                    );
+                    assert_eq!(
+                        (nrows1, ncols1),
+                        (nrows2, ncols3),
+                        "gemm: dimensions mismatch for addition."
+                    );
+
+                    // NOTE: this case should never happen because we enter this
+                    // codepath only when ncols2 > SMALL_DIM. Though we keep this
+                    // here just in case if in the future we change the conditions to
+                    // enter this codepath.
+                    if ncols2 == 0 {
+                        // NOTE: we can't just always multiply by beta
+                        // because we documented the guaranty that `self` is
+                        // never read if `beta` is zero.
+                        if beta.is_zero() {
+                            self.fill(N::zero());
+                        } else {
+                            *self *= beta;
+                        }
+                        return;
                     }
-                    return;
-                } else if N::is::<f64>() {
-                    let (rsa, csa) = a.strides();
-                    let (rsb, csb) = b.strides();
-                    let (rsc, csc) = self.strides();
-
-                    unsafe {
-                        matrixmultiply::dgemm(
-                            nrows2,
-                            ncols2,
-                            ncols3,
-                            mem::transmute_copy(&alpha),
-                            a.data.ptr() as *const f64,
-                            rsa as isize,
-                            csa as isize,
-                            b.data.ptr() as *const f64,
-                            rsb as isize,
-                            csb as isize,
-                            mem::transmute_copy(&beta),
-                            self.data.ptr_mut() as *mut f64,
-                            rsc as isize,
-                            csc as isize,
-                        );
+
+                    if N::is::<f32>() {
+                        let (rsa, csa) = a.strides();
+                        let (rsb, csb) = b.strides();
+                        let (rsc, csc) = self.strides();
+
+                        unsafe {
+                            matrixmultiply::sgemm(
+                                nrows2,
+                                ncols2,
+                                ncols3,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f32,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f32,
+                                rsb as isize,
+                                csb as isize,
+                                mem::transmute_copy(&beta),
+                                self.data.ptr_mut() as *mut f32,
+                                rsc as isize,
+                                csc as isize,
+                            );
+                        }
+                        return;
+                    } else if N::is::<f64>() {
+                        let (rsa, csa) = a.strides();
+                        let (rsb, csb) = b.strides();
+                        let (rsc, csc) = self.strides();
+
+                        unsafe {
+                            matrixmultiply::dgemm(
+                                nrows2,
+                                ncols2,
+                                ncols3,
+                                mem::transmute_copy(&alpha),
+                                a.data.ptr() as *const f64,
+                                rsa as isize,
+                                csa as isize,
+                                b.data.ptr() as *const f64,
+                                rsb as isize,
+                                csb as isize,
+                                mem::transmute_copy(&beta),
+                                self.data.ptr_mut() as *mut f64,
+                                rsc as isize,
+                                csc as isize,
+                            );
+                        }
+                        return;
                     }
-                    return;
                 }
             }
         }
 
+
         for j1 in 0..ncols1 {
             // FIXME: avoid bound checks.
             self.column_mut(j1).gemv(alpha, a, &b.column(j1), beta);