From 77f697eaa1cf030ce687d2ebd503b592dfbcb808 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 16:56:03 -0800 Subject: [PATCH 1/6] Add more mul/pow tests --- benches/bigint.rs | 17 ++++++++++++++++- tests/bigint.rs | 4 ++++ tests/biguint.rs | 4 ++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/benches/bigint.rs b/benches/bigint.rs index b7f5fd21..960b7cee 100644 --- a/benches/bigint.rs +++ b/benches/bigint.rs @@ -39,7 +39,7 @@ fn factorial(n: usize) -> BigUint { let mut f: BigUint = One::one(); for i in 1..=n { let bu: BigUint = FromPrimitive::from_usize(i).unwrap(); - f += bu; + f *= bu; } f } @@ -351,6 +351,21 @@ fn pow_bench_bigexp(b: &mut Bencher) { }); } +#[bench] +fn pow_bench_1e1000(b: &mut Bencher) { + b.iter(|| BigUint::from(10u32).pow(1_000)); +} + +#[bench] +fn pow_bench_1e10000(b: &mut Bencher) { + b.iter(|| BigUint::from(10u32).pow(10_000)); +} + +#[bench] +fn pow_bench_1e100000(b: &mut Bencher) { + b.iter(|| BigUint::from(10u32).pow(100_000)); +} + /// This modulus is the prime from the 2048-bit MODP DH group: /// https://tools.ietf.org/html/rfc3526#section-3 const RFC3526_2048BIT_MODP_GROUP: &str = "\ diff --git a/tests/bigint.rs b/tests/bigint.rs index 8eff5ba7..f244bc4b 100644 --- a/tests/bigint.rs +++ b/tests/bigint.rs @@ -1306,6 +1306,10 @@ fn test_pow() { check!(u32); check!(u64); check!(usize); + + let pow_1e10000 = BigInt::from(10u32).pow(10_000_u32); + let manual_1e10000 = repeat(10u32).take(10_000).product::(); + assert!(manual_1e10000 == pow_1e10000); } #[test] diff --git a/tests/biguint.rs b/tests/biguint.rs index 13b69f21..716bcc81 100644 --- a/tests/biguint.rs +++ b/tests/biguint.rs @@ -1776,6 +1776,10 @@ fn test_pow() { check!(u64); check!(u128); check!(usize); + + let pow_1e10000 = BigUint::from(10u32).pow(10_000_u32); + let manual_1e10000 = repeat(10u32).take(10_000).product::(); + assert!(manual_1e10000 == pow_1e10000); } #[test] From d4015d90f2c7330e03d133c6f6d65c58907a73e8 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 16:59:31 -0800 Subject: [PATCH 2/6] Do normalization truncation all at once --- src/biguint.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/biguint.rs b/src/biguint.rs index 42350714..271a8837 100644 --- a/src/biguint.rs +++ b/src/biguint.rs @@ -846,8 +846,9 @@ impl BigUint { /// be nonzero. #[inline] fn normalize(&mut self) { - while let Some(&0) = self.data.last() { - self.data.pop(); + if let Some(&0) = self.data.last() { + let len = self.data.iter().rposition(|&d| d != 0).map_or(0, |i| i + 1); + self.data.truncate(len); } if self.data.len() < self.data.capacity() / 4 { self.data.shrink_to_fit(); From 42c9c9ac7b567d4ebc2b436d5b6b45d4d260b5ed Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 17:05:23 -0800 Subject: [PATCH 3/6] Enable allocation reuse in scalar multiplication --- src/bigint/multiplication.rs | 57 ++++++++---- src/biguint/multiplication.rs | 168 ++++++++++++++++++++-------------- src/biguint/power.rs | 5 +- 3 files changed, 144 insertions(+), 86 deletions(-) diff --git a/src/bigint/multiplication.rs b/src/bigint/multiplication.rs index aaf5b142..a2d97081 100644 --- a/src/bigint/multiplication.rs +++ b/src/bigint/multiplication.rs @@ -21,24 +21,49 @@ impl Mul for Sign { } } -forward_all_binop_to_ref_ref!(impl Mul for BigInt, mul); - -impl<'a, 'b> Mul<&'b BigInt> for &'a BigInt { - type Output = BigInt; - - #[inline] - fn mul(self, other: &BigInt) -> BigInt { - BigInt::from_biguint(self.sign * other.sign, &self.data * &other.data) - } +macro_rules! impl_mul { + ($(impl<$($a:lifetime),*> Mul<$Other:ty> for $Self:ty;)*) => {$( + impl<$($a),*> Mul<$Other> for $Self { + type Output = BigInt; + + #[inline] + fn mul(self, other: $Other) -> BigInt { + // automatically match value/ref + let BigInt { data: x, .. } = self; + let BigInt { data: y, .. } = other; + BigInt::from_biguint(self.sign * other.sign, x * y) + } + } + )*} +} +impl_mul! { + impl<> Mul for BigInt; + impl<'b> Mul<&'b BigInt> for BigInt; + impl<'a> Mul for &'a BigInt; + impl<'a, 'b> Mul<&'b BigInt> for &'a BigInt; +} + +macro_rules! impl_mul_assign { + ($(impl<$($a:lifetime),*> MulAssign<$Other:ty> for BigInt;)*) => {$( + impl<$($a),*> MulAssign<$Other> for BigInt { + #[inline] + fn mul_assign(&mut self, other: $Other) { + // automatically match value/ref + let BigInt { data: y, .. } = other; + self.data *= y; + if self.data.is_zero() { + self.sign = NoSign; + } else { + self.sign = self.sign * other.sign; + } + } + } + )*} } - -impl<'a> MulAssign<&'a BigInt> for BigInt { - #[inline] - fn mul_assign(&mut self, other: &BigInt) { - *self = &*self * other; - } +impl_mul_assign! { + impl<> MulAssign for BigInt; + impl<'a> MulAssign<&'a BigInt> for BigInt; } -forward_val_assign!(impl MulAssign for BigInt, mul_assign); promote_all_scalars!(impl Mul for BigInt, mul); promote_all_scalars_assign!(impl MulAssign for BigInt, mul_assign); diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs index aaa69344..c07cca17 100644 --- a/src/biguint/multiplication.rs +++ b/src/biguint/multiplication.rs @@ -11,7 +11,7 @@ use crate::{BigInt, UsizePromotion}; use core::cmp::Ordering; use core::iter::Product; use core::ops::{Mul, MulAssign}; -use num_traits::{CheckedMul, One, Zero}; +use num_traits::{CheckedMul, FromPrimitive, One, Zero}; #[inline] pub(super) fn mac_with_carry( @@ -155,28 +155,28 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { // We reuse the same BigUint for all the intermediate multiplies and have to size p // appropriately here: x1.len() >= x0.len and y1.len() >= y0.len(): - let len = x1.len() + y1.len() + 1; + let len = x1.len() + y1.len(); let mut p = BigUint { data: vec![0; len] }; // p2 = x1 * y1 - mac3(&mut p.data[..], x1, y1); + mac3(&mut p.data, x1, y1); // Not required, but the adds go faster if we drop any unneeded 0s from the end: p.normalize(); - add2(&mut acc[b..], &p.data[..]); - add2(&mut acc[b * 2..], &p.data[..]); + add2(&mut acc[b..], &p.data); + add2(&mut acc[b * 2..], &p.data); // Zero out p before the next multiply: p.data.truncate(0); p.data.resize(len, 0); // p0 = x0 * y0 - mac3(&mut p.data[..], x0, y0); + mac3(&mut p.data, x0, y0); p.normalize(); - add2(&mut acc[..], &p.data[..]); - add2(&mut acc[b..], &p.data[..]); + add2(acc, &p.data); + add2(&mut acc[b..], &p.data); // p1 = (x1 - x0) * (y1 - y0) // We do this one last, since it may be negative and acc can't ever be negative: @@ -188,13 +188,13 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { p.data.truncate(0); p.data.resize(len, 0); - mac3(&mut p.data[..], &j0.data[..], &j1.data[..]); + mac3(&mut p.data, &j0.data, &j1.data); p.normalize(); - sub2(&mut acc[b..], &p.data[..]); + sub2(&mut acc[b..], &p.data); } Minus => { - mac3(&mut acc[b..], &j0.data[..], &j1.data[..]); + mac3(&mut acc[b..], &j0.data, &j1.data); } NoSign => (), } @@ -321,25 +321,41 @@ fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { } fn mul3(x: &[BigDigit], y: &[BigDigit]) -> BigUint { - let len = x.len() + y.len() + 1; + let len = x.len() + y.len(); let mut prod = BigUint { data: vec![0; len] }; - mac3(&mut prod.data[..], x, y); + mac3(&mut prod.data, x, y); prod.normalized() } -fn scalar_mul(a: &mut [BigDigit], b: BigDigit) -> BigDigit { - let mut carry = 0; - for a in a.iter_mut() { - *a = mul_with_carry(*a, b, &mut carry); +fn scalar_mul(a: &mut BigUint, b: BigDigit) { + match b { + 0 => a.set_zero(), + 1 => {} + _ => { + if b.is_power_of_two() { + *a <<= b.trailing_zeros(); + } else { + let mut carry = 0; + for a in a.data.iter_mut() { + *a = mul_with_carry(*a, b, &mut carry); + } + if carry != 0 { + a.data.push(carry as BigDigit); + } + } + } } - carry as BigDigit } fn sub_sign(mut a: &[BigDigit], mut b: &[BigDigit]) -> (Sign, BigUint) { // Normalize: - a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; - b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; + if let Some(&0) = a.last() { + a = &a[..a.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; + } + if let Some(&0) = b.last() { + b = &b[..b.iter().rposition(|&x| x != 0).map_or(0, |i| i + 1)]; + } match cmp_slice(a, b) { Ordering::Greater => { @@ -356,22 +372,55 @@ fn sub_sign(mut a: &[BigDigit], mut b: &[BigDigit]) -> (Sign, BigUint) { } } -forward_all_binop_to_ref_ref!(impl Mul for BigUint, mul); -forward_val_assign!(impl MulAssign for BigUint, mul_assign); - -impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint { - type Output = BigUint; +macro_rules! impl_mul { + ($(impl<$($a:lifetime),*> Mul<$Other:ty> for $Self:ty;)*) => {$( + impl<$($a),*> Mul<$Other> for $Self { + type Output = BigUint; + + #[inline] + fn mul(self, other: $Other) -> BigUint { + match (&*self.data, &*other.data) { + // multiply by zero + (&[], _) | (_, &[]) => BigUint::zero(), + // multiply by a scalar + (_, &[digit]) => self * digit, + (&[digit], _) => other * digit, + // full multiplication + (x, y) => mul3(x, y), + } + } + } + )*} +} +impl_mul! { + impl<> Mul for BigUint; + impl<'b> Mul<&'b BigUint> for BigUint; + impl<'a> Mul for &'a BigUint; + impl<'a, 'b> Mul<&'b BigUint> for &'a BigUint; +} - #[inline] - fn mul(self, other: &BigUint) -> BigUint { - mul3(&self.data[..], &other.data[..]) - } +macro_rules! impl_mul_assign { + ($(impl<$($a:lifetime),*> MulAssign<$Other:ty> for BigUint;)*) => {$( + impl<$($a),*> MulAssign<$Other> for BigUint { + #[inline] + fn mul_assign(&mut self, other: $Other) { + match (&*self.data, &*other.data) { + // multiply by zero + (&[], _) => {}, + (_, &[]) => self.set_zero(), + // multiply by a scalar + (_, &[digit]) => *self *= digit, + (&[digit], _) => *self = other * digit, + // full multiplication + (x, y) => *self = mul3(x, y), + } + } + } + )*} } -impl<'a> MulAssign<&'a BigUint> for BigUint { - #[inline] - fn mul_assign(&mut self, other: &'a BigUint) { - *self = &*self * other - } +impl_mul_assign! { + impl<> MulAssign for BigUint; + impl<'a> MulAssign<&'a BigUint> for BigUint; } promote_unsigned_scalars!(impl Mul for BigUint, mul); @@ -392,14 +441,7 @@ impl Mul for BigUint { impl MulAssign for BigUint { #[inline] fn mul_assign(&mut self, other: u32) { - if other == 0 { - self.data.clear(); - } else { - let carry = scalar_mul(&mut self.data[..], other as BigDigit); - if carry != 0 { - self.data.push(carry); - } - } + scalar_mul(self, other as BigDigit); } } @@ -416,27 +458,18 @@ impl MulAssign for BigUint { #[cfg(not(u64_digit))] #[inline] fn mul_assign(&mut self, other: u64) { - if other == 0 { - self.data.clear(); - } else if other <= u64::from(BigDigit::max_value()) { - *self *= other as BigDigit + if let Some(other) = BigDigit::from_u64(other) { + scalar_mul(self, other); } else { let (hi, lo) = big_digit::from_doublebigdigit(other); - *self = mul3(&self.data[..], &[lo, hi]) + *self = mul3(&self.data, &[lo, hi]); } } #[cfg(u64_digit)] #[inline] fn mul_assign(&mut self, other: u64) { - if other == 0 { - self.data.clear(); - } else { - let carry = scalar_mul(&mut self.data[..], other as BigDigit); - if carry != 0 { - self.data.push(carry); - } - } + scalar_mul(self, other); } } @@ -454,26 +487,25 @@ impl MulAssign for BigUint { #[cfg(not(u64_digit))] #[inline] fn mul_assign(&mut self, other: u128) { - if other == 0 { - self.data.clear(); - } else if other <= u128::from(BigDigit::max_value()) { - *self *= other as BigDigit + if let Some(other) = BigDigit::from_u128(other) { + scalar_mul(self, other); } else { - let (a, b, c, d) = u32_from_u128(other); - *self = mul3(&self.data[..], &[d, c, b, a]) + *self = match u32_from_u128(other) { + (0, 0, c, d) => mul3(&self.data, &[d, c]), + (0, b, c, d) => mul3(&self.data, &[d, c, b]), + (a, b, c, d) => mul3(&self.data, &[d, c, b, a]), + }; } } #[cfg(u64_digit)] #[inline] fn mul_assign(&mut self, other: u128) { - if other == 0 { - self.data.clear(); - } else if other <= BigDigit::max_value() as u128 { - *self *= other as BigDigit + if let Some(other) = BigDigit::from_u128(other) { + scalar_mul(self, other); } else { let (hi, lo) = big_digit::from_doublebigdigit(other); - *self = mul3(&self.data[..], &[lo, hi]) + *self = mul3(&self.data, &[lo, hi]); } } } @@ -502,6 +534,6 @@ fn test_sub_sign() { let a_i = BigInt::from(a.clone()); let b_i = BigInt::from(b.clone()); - assert_eq!(sub_sign_i(&a.data[..], &b.data[..]), &a_i - &b_i); - assert_eq!(sub_sign_i(&b.data[..], &a.data[..]), &b_i - &a_i); + assert_eq!(sub_sign_i(&a.data, &b.data), &a_i - &b_i); + assert_eq!(sub_sign_i(&b.data, &a.data), &b_i - &a_i); } diff --git a/src/biguint/power.rs b/src/biguint/power.rs index 44b38146..d24651bf 100644 --- a/src/biguint/power.rs +++ b/src/biguint/power.rs @@ -85,7 +85,7 @@ macro_rules! pow_impl { exp >>= 1; base = &base * &base; if exp & 1 == 1 { - acc = &acc * &base; + acc *= &base; } } acc @@ -185,7 +185,8 @@ fn plain_modpow(base: &BigUint, exp_data: &[BigDigit], modulus: &BigUint) -> Big let mut unit = |exp_is_odd| { base = &base * &base % modulus; if exp_is_odd { - acc = &acc * &base % modulus; + acc *= &base; + acc %= modulus; } }; From c528be1468570fbab6707e43068e4b382bd8852c Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 17:06:12 -0800 Subject: [PATCH 4/6] Skip trailing zeros in multiplication --- src/biguint/multiplication.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs index c07cca17..860c6e16 100644 --- a/src/biguint/multiplication.rs +++ b/src/biguint/multiplication.rs @@ -66,7 +66,20 @@ fn bigint_from_slice(slice: &[BigDigit]) -> BigInt { /// Three argument multiply accumulate: /// acc += b * c #[allow(clippy::many_single_char_names)] -fn mac3(acc: &mut [BigDigit], b: &[BigDigit], c: &[BigDigit]) { +fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) { + // Least-significant zeros have no effect on the output. + if let Some(&0) = b.first() { + let nz = b.iter().position(|&d| d != 0).unwrap(); + b = &b[nz..]; + acc = &mut acc[nz..]; + } + if let Some(&0) = c.first() { + let nz = c.iter().position(|&d| d != 0).unwrap(); + c = &c[nz..]; + acc = &mut acc[nz..]; + } + + let acc = acc; let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) }; // We use three algorithms for different input sizes. From bc967f6abea8395a787ed364172d1405952e4135 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 17:06:54 -0800 Subject: [PATCH 5/6] Use shifts instead of mul/div by 2 in Toom-3 --- src/biguint/multiplication.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs index 860c6e16..c7afd3f4 100644 --- a/src/biguint/multiplication.rs +++ b/src/biguint/multiplication.rs @@ -312,10 +312,10 @@ fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) { // // This particular sequence is given by Bodrato and is an interpolation // of the above equations. - let mut comp3: BigInt = (r3 - &r1) / 3; - let mut comp1: BigInt = (r1 - &r2) / 2; + let mut comp3: BigInt = (r3 - &r1) / 3u32; + let mut comp1: BigInt = (r1 - &r2) >> 1; let mut comp2: BigInt = r2 - &r0; - comp3 = (&comp2 - comp3) / 2 + &r4 * 2; + comp3 = ((&comp2 - comp3) >> 1) + (&r4 << 1); comp2 += &comp1 - &r4; comp1 -= &comp3; From dce75e10ce76a7209bbae5cbc8351064e10abd4a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 13 Mar 2021 17:07:37 -0800 Subject: [PATCH 6/6] Avoid allocation in the Toom-3 finalization --- src/biguint/multiplication.rs | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs index c7afd3f4..581c9e17 100644 --- a/src/biguint/multiplication.rs +++ b/src/biguint/multiplication.rs @@ -2,7 +2,7 @@ use super::addition::{__add2, add2}; use super::subtraction::sub2; #[cfg(not(u64_digit))] use super::u32_from_u128; -use super::{biguint_from_vec, cmp_slice, BigUint}; +use super::{biguint_from_vec, cmp_slice, BigUint, IntDigits}; use crate::big_digit::{self, BigDigit, DoubleBigDigit}; use crate::Sign::{self, Minus, NoSign, Plus}; @@ -322,14 +322,24 @@ fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) { // Recomposition. The coefficients of the polynomial are now known. // // Evaluate at w(t) where t is our given base to get the result. - let bits = u64::from(big_digit::BITS) * i as u64; - let result = r0 - + (comp1 << bits) - + (comp2 << (2 * bits)) - + (comp3 << (3 * bits)) - + (r4 << (4 * bits)); - let result_pos = result.to_biguint().unwrap(); - add2(&mut acc[..], &result_pos.data); + // + // let bits = u64::from(big_digit::BITS) * i as u64; + // let result = r0 + // + (comp1 << bits) + // + (comp2 << (2 * bits)) + // + (comp3 << (3 * bits)) + // + (r4 << (4 * bits)); + // let result_pos = result.to_biguint().unwrap(); + // add2(&mut acc[..], &result_pos.data); + // + // But with less intermediate copying: + for (j, result) in [&r0, &comp1, &comp2, &comp3, &r4].iter().enumerate().rev() { + match result.sign() { + Plus => add2(&mut acc[i * j..], result.digits()), + Minus => sub2(&mut acc[i * j..], result.digits()), + NoSign => {} + } + } } }