diff --git a/Cargo.lock b/Cargo.lock
index 8b55a36d..6cf4ab9b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -328,9 +328,9 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 
 [[package]]
 name = "elliptic-curve"
-version = "0.11.2"
+version = "0.11.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "befe3b23562c66e85abf1bcc872d4d59ac058fcd1db38afb16620cd45bbc65e4"
+checksum = "4c4c31bb557a73d165c838b614521f888112f9d4fcff7421d35646376dd17caf"
 dependencies = [
  "base64ct",
  "crypto-bigint",
diff --git a/k256/Cargo.toml b/k256/Cargo.toml
index 169150e9..6c45b562 100644
--- a/k256/Cargo.toml
+++ b/k256/Cargo.toml
@@ -19,7 +19,7 @@ rust-version = "1.56"
 
 [dependencies]
 cfg-if = "1.0"
-elliptic-curve = { version = "0.11", default-features = false, features = ["hazmat", "sec1"] }
+elliptic-curve = { version = "0.11.3", default-features = false, features = ["hazmat", "sec1"] }
 sec1 = { version = "0.2", default-features = false }
 
 # optional dependencies
diff --git a/k256/src/arithmetic/scalar.rs b/k256/src/arithmetic/scalar.rs
index eb32518c..ae48f836 100644
--- a/k256/src/arithmetic/scalar.rs
+++ b/k256/src/arithmetic/scalar.rs
@@ -12,7 +12,7 @@ use elliptic_curve::{
     bigint::{nlimbs, prelude::*, Limb, LimbUInt, U256, U512},
     generic_array::arr,
     group::ff::{Field, PrimeField},
-    ops::Reduce,
+    ops::{Reduce, ReduceNonZero},
     rand_core::{CryptoRng, RngCore},
     subtle::{
         Choice, ConditionallySelectable, ConstantTimeEq, ConstantTimeGreater, ConstantTimeLess,
@@ -581,6 +581,12 @@ impl Reduce<U512> for Scalar {
     }
 }
 
+impl ReduceNonZero<U512> for Scalar {
+    fn from_uint_reduced_nonzero(w: U512) -> Self {
+        WideScalar(w).reduce_nonzero()
+    }
+}
+
 #[cfg(feature = "bits")]
 #[cfg_attr(docsrs, doc(cfg(feature = "bits")))]
 impl From<&Scalar> for ScalarBits {
@@ -628,7 +634,10 @@ mod tests {
     use super::Scalar;
     use crate::arithmetic::dev::{biguint_to_bytes, bytes_to_biguint};
     use elliptic_curve::{
+        bigint::U512,
         ff::{Field, PrimeField},
+        generic_array::GenericArray,
+        ops::Reduce,
         IsHigh,
     };
     use num_bigint::{BigUint, ToBigUint};
@@ -745,6 +754,15 @@ mod tests {
         assert_eq!((a - &a).is_zero().unwrap_u8(), 1);
     }
 
+    #[test]
+    fn from_wide_bytes_reduced() {
+        let m = Scalar::modulus_as_biguint();
+        let b = [0xffu8; 64];
+        let s = <Scalar as Reduce<U512>>::from_be_bytes_reduced(GenericArray::clone_from_slice(&b));
+        let s_bu = s.to_biguint().unwrap();
+        assert!(s_bu < m);
+    }
+
     prop_compose! {
         fn scalar()(bytes in any::<[u8; 32]>()) -> Scalar {
             let mut res = bytes_to_biguint(&bytes);
@@ -843,5 +861,16 @@ mod tests {
             let m = Scalar::modulus_as_biguint();
             assert_eq!((&inv_bi * &a_bi) % &m, 1.to_biguint().unwrap());
         }
+
+        #[test]
+        fn fuzzy_from_wide_bytes_reduced(bytes_hi in any::<[u8; 32]>(), bytes_lo in any::<[u8; 32]>()) {
+            let m = Scalar::modulus_as_biguint();
+            let mut bytes = [0u8; 64];
+            bytes[0..32].clone_from_slice(&bytes_hi);
+            bytes[32..64].clone_from_slice(&bytes_lo);
+            let s = <Scalar as Reduce<U512>>::from_be_bytes_reduced(GenericArray::clone_from_slice(&bytes));
+            let s_bu = s.to_biguint().unwrap();
+            assert!(s_bu < m);
+        }
     }
 }
diff --git a/k256/src/arithmetic/scalar/wide32.rs b/k256/src/arithmetic/scalar/wide32.rs
index f0f510a9..6fd07f7c 100644
--- a/k256/src/arithmetic/scalar/wide32.rs
+++ b/k256/src/arithmetic/scalar/wide32.rs
@@ -230,8 +230,13 @@ impl WideScalar {
         Scalar::conditional_select(&res, &res.add(&Scalar::ONE), Choice::from(c as u8))
     }
 
-    #[inline(always)] // only used in Scalar::mul(), so won't cause binary bloat
-    pub(super) fn reduce(&self) -> Scalar {
+    pub(super) fn reduce_impl(&self, modulus_minus_one: bool) -> Scalar {
+        let neg_modulus0 = if modulus_minus_one {
+            NEG_MODULUS[0] + 1
+        } else {
+            NEG_MODULUS[0]
+        };
+
         let w = self.0.to_uint_array();
         let n0 = w[8];
         let n1 = w[9];
@@ -249,46 +254,46 @@ impl WideScalar {
         let c0 = w[0];
         let c1 = 0;
         let c2 = 0;
-        let (c0, c1) = muladd_fast(n0, NEG_MODULUS[0], c0, c1);
+        let (c0, c1) = muladd_fast(n0, neg_modulus0, c0, c1);
         let (m0, c0, c1) = (c0, c1, 0);
         let (c0, c1) = sumadd_fast(w[1], c0, c1);
-        let (c0, c1, c2) = muladd(n1, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n1, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n0, NEG_MODULUS[1], c0, c1, c2);
         let (m1, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[2], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n2, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n2, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n1, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n0, NEG_MODULUS[2], c0, c1, c2);
         let (m2, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[3], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n3, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n3, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n2, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n1, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(n0, NEG_MODULUS[3], c0, c1, c2);
         let (m3, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[4], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n4, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n4, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n3, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n2, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(n1, NEG_MODULUS[3], c0, c1, c2);
         let (c0, c1, c2) = sumadd(n0, c0, c1, c2);
         let (m4, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[5], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n5, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n5, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n4, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n3, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(n2, NEG_MODULUS[3], c0, c1, c2);
         let (c0, c1, c2) = sumadd(n1, c0, c1, c2);
         let (m5, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[6], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n6, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n6, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n5, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n4, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(n3, NEG_MODULUS[3], c0, c1, c2);
         let (c0, c1, c2) = sumadd(n2, c0, c1, c2);
         let (m6, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[7], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n7, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n7, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n6, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(n5, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(n4, NEG_MODULUS[3], c0, c1, c2);
@@ -316,25 +321,25 @@ impl WideScalar {
         let c0 = m0;
         let c1 = 0;
         let c2 = 0;
-        let (c0, c1) = muladd_fast(m8, NEG_MODULUS[0], c0, c1);
+        let (c0, c1) = muladd_fast(m8, neg_modulus0, c0, c1);
         let (p0, c0, c1) = (c0, c1, 0);
         let (c0, c1) = sumadd_fast(m1, c0, c1);
-        let (c0, c1, c2) = muladd(m9, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m9, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m8, NEG_MODULUS[1], c0, c1, c2);
         let (p1, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(m2, c0, c1, c2);
-        let (c0, c1, c2) = muladd(m10, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m10, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m9, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(m8, NEG_MODULUS[2], c0, c1, c2);
         let (p2, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(m3, c0, c1, c2);
-        let (c0, c1, c2) = muladd(m11, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m11, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m10, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(m9, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(m8, NEG_MODULUS[3], c0, c1, c2);
         let (p3, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(m4, c0, c1, c2);
-        let (c0, c1, c2) = muladd(m12, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m12, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m11, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = muladd(m10, NEG_MODULUS[2], c0, c1, c2);
         let (c0, c1, c2) = muladd(m9, NEG_MODULUS[3], c0, c1, c2);
@@ -360,7 +365,7 @@ impl WideScalar {
 
         // Reduce 258 bits into 256.
         // r[0..7] = p[0..7] + p[8] * NEG_MODULUS.
-        let mut c = p0 as u64 + (NEG_MODULUS[0] as u64) * (p8 as u64);
+        let mut c = p0 as u64 + (neg_modulus0 as u64) * (p8 as u64);
         let r0 = (c & 0xFFFFFFFFu64) as u32;
         c >>= 32;
         c += p1 as u64 + (NEG_MODULUS[1] as u64) * (p8 as u64);
@@ -392,6 +397,15 @@ impl WideScalar {
         let underflow = Choice::from((underflow.0 >> 31) as u8);
         Scalar(U256::conditional_select(&r, &r2, !underflow | high_bit))
     }
+
+    #[inline(always)] // only used in Scalar::mul(), so won't cause binary bloat
+    pub(super) fn reduce(&self) -> Scalar {
+        self.reduce_impl(false)
+    }
+
+    pub(super) fn reduce_nonzero(&self) -> Scalar {
+        self.reduce_impl(true) + Scalar::ONE
+    }
 }
 
 /// Constant-time comparison.
diff --git a/k256/src/arithmetic/scalar/wide64.rs b/k256/src/arithmetic/scalar/wide64.rs
index f8d72ae9..0d0ae511 100644
--- a/k256/src/arithmetic/scalar/wide64.rs
+++ b/k256/src/arithmetic/scalar/wide64.rs
@@ -118,8 +118,13 @@ impl WideScalar {
         Scalar::conditional_select(&res, &res.add(&Scalar::ONE), Choice::from(c as u8))
     }
 
-    #[inline(always)] // only used in Scalar::mul(), so won't cause binary bloat
-    pub(super) fn reduce(&self) -> Scalar {
+    fn reduce_impl(&self, modulus_minus_one: bool) -> Scalar {
+        let neg_modulus0 = if modulus_minus_one {
+            NEG_MODULUS[0] + 1
+        } else {
+            NEG_MODULUS[0]
+        };
+
         let w = self.0.to_uint_array();
         let n0 = w[4];
         let n1 = w[5];
@@ -127,23 +132,23 @@ impl WideScalar {
         let n3 = w[7];
 
         // Reduce 512 bits into 385.
-        // m[0..6] = self[0..3] + n[0..3] * NEG_MODULUS.
+        // m[0..6] = self[0..3] + n[0..3] * neg_modulus.
         let c0 = w[0];
         let c1 = 0;
         let c2 = 0;
-        let (c0, c1) = muladd_fast(n0, NEG_MODULUS[0], c0, c1);
+        let (c0, c1) = muladd_fast(n0, neg_modulus0, c0, c1);
         let (m0, c0, c1) = (c0, c1, 0);
         let (c0, c1) = sumadd_fast(w[1], c0, c1);
-        let (c0, c1, c2) = muladd(n1, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n1, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n0, NEG_MODULUS[1], c0, c1, c2);
         let (m1, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[2], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n2, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n2, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n1, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = sumadd(n0, c0, c1, c2);
         let (m2, c0, c1, c2) = (c0, c1, c2, 0);
         let (c0, c1, c2) = sumadd(w[3], c0, c1, c2);
-        let (c0, c1, c2) = muladd(n3, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(n3, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(n2, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = sumadd(n1, c0, c1, c2);
         let (m3, c0, c1, c2) = (c0, c1, c2, 0);
@@ -156,18 +161,18 @@ impl WideScalar {
         let m6 = c0;
 
         // Reduce 385 bits into 258.
-        // p[0..4] = m[0..3] + m[4..6] * NEG_MODULUS.
+        // p[0..4] = m[0..3] + m[4..6] * neg_modulus.
         let c0 = m0;
         let c1 = 0;
         let c2 = 0;
-        let (c0, c1) = muladd_fast(m4, NEG_MODULUS[0], c0, c1);
+        let (c0, c1) = muladd_fast(m4, neg_modulus0, c0, c1);
         let (p0, c0, c1) = (c0, c1, 0);
         let (c0, c1) = sumadd_fast(m1, c0, c1);
-        let (c0, c1, c2) = muladd(m5, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m5, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m4, NEG_MODULUS[1], c0, c1, c2);
         let (p1, c0, c1) = (c0, c1, 0);
         let (c0, c1, c2) = sumadd(m2, c0, c1, c2);
-        let (c0, c1, c2) = muladd(m6, NEG_MODULUS[0], c0, c1, c2);
+        let (c0, c1, c2) = muladd(m6, neg_modulus0, c0, c1, c2);
         let (c0, c1, c2) = muladd(m5, NEG_MODULUS[1], c0, c1, c2);
         let (c0, c1, c2) = sumadd(m4, c0, c1, c2);
         let (p2, c0, c1, _c2) = (c0, c1, c2, 0);
@@ -179,8 +184,8 @@ impl WideScalar {
         debug_assert!(p4 <= 2);
 
         // Reduce 258 bits into 256.
-        // r[0..3] = p[0..3] + p[4] * NEG_MODULUS.
-        let mut c = (p0 as u128) + (NEG_MODULUS[0] as u128) * (p4 as u128);
+        // r[0..3] = p[0..3] + p[4] * neg_modulus.
+        let mut c = (p0 as u128) + (neg_modulus0 as u128) * (p4 as u128);
         let r0 = (c & 0xFFFFFFFFFFFFFFFFu128) as u64;
         c >>= 64;
         c += (p1 as u128) + (NEG_MODULUS[1] as u128) * (p4 as u128);
@@ -200,6 +205,15 @@ impl WideScalar {
         let underflow = Choice::from((underflow.0 >> 63) as u8);
         Scalar(U256::conditional_select(&r, &r2, !underflow | high_bit))
     }
+
+    #[inline(always)] // only used in Scalar::mul(), so won't cause binary bloat
+    pub(super) fn reduce(&self) -> Scalar {
+        self.reduce_impl(false)
+    }
+
+    pub(super) fn reduce_nonzero(&self) -> Scalar {
+        self.reduce_impl(true) + Scalar::ONE
+    }
 }
 
 /// Constant-time comparison.