diff --git a/benches/bigint.rs b/benches/bigint.rs index 80ec191c..22795274 100644 --- a/benches/bigint.rs +++ b/benches/bigint.rs @@ -87,6 +87,16 @@ fn multiply_3(b: &mut Bencher) { multiply_bench(b, 1 << 16, 1 << 17); } +#[bench] +fn multiply_4(b: &mut Bencher) { + multiply_bench(b, 1 << 12, 1 << 13); +} + +#[bench] +fn multiply_5(b: &mut Bencher) { + multiply_bench(b, 1 << 12, 1 << 14); +} + #[bench] fn divide_0(b: &mut Bencher) { divide_bench(b, 1 << 8, 1 << 6); diff --git a/src/biguint/multiplication.rs b/src/biguint/multiplication.rs index 4d7f1f21..fe56ac36 100644 --- a/src/biguint/multiplication.rs +++ b/src/biguint/multiplication.rs @@ -88,9 +88,10 @@ fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) { let acc = acc; let (x, y) = if b.len() < c.len() { (b, c) } else { (c, b) }; - // We use three algorithms for different input sizes. + // We use four algorithms for different input sizes. // // - For small inputs, long multiplication is fastest. + // - If y is at least least twice as long as x, split using Half-Karatsuba. // - Next we use Karatsuba multiplication (Toom-2), which we have optimized // to avoid unnecessary allocations for intermediate values. // - For the largest inputs we use Toom-3, which better optimizes the @@ -104,6 +105,65 @@ fn mac3(mut acc: &mut [BigDigit], mut b: &[BigDigit], mut c: &[BigDigit]) { for (i, xi) in x.iter().enumerate() { mac_digit(&mut acc[i..], y, *xi); } + } else if x.len() * 2 <= y.len() { + // Karatsuba Multiplication for factors with significant length disparity. + // + // The Half-Karatsuba Multiplication Algorithm is a specialized case of + // the normal Karatsuba multiplication algorithm, designed for the scenario + // where y has at least twice as many base digits as x. + // + // In this case y (the longer input) is split into high2 and low2, + // at m2 (half the length of y) and x (the shorter input), + // is used directly without splitting. + // + // The algorithm then proceeds as follows: + // + // 1. Compute the product z0 = x * low2. + // 2. Compute the product temp = x * high2. + // 3. Adjust the weight of temp by adding m2 (* NBASE ^ m2) + // 4. Add temp and z0 to obtain the final result. + // + // Proof: + // + // The algorithm can be derived from the original Karatsuba algorithm by + // simplifying the formula when the shorter factor x is not split into + // high and low parts, as shown below. + // + // Original Karatsuba formula: + // + // result = (z2 * NBASE ^ (m2 × 2)) + ((z1 - z2 - z0) * NBASE ^ m2) + z0 + // + // Substitutions: + // + // low1 = x + // high1 = 0 + // + // Applying substitutions: + // + // z0 = (low1 * low2) + // = (x * low2) + // + // z1 = ((low1 + high1) * (low2 + high2)) + // = ((x + 0) * (low2 + high2)) + // = (x * low2) + (x * high2) + // + // z2 = (high1 * high2) + // = (0 * high2) + // = 0 + // + // Simplified using the above substitutions: + // + // result = (z2 * NBASE ^ (m2 × 2)) + ((z1 - z2 - z0) * NBASE ^ m2) + z0 + // = (0 * NBASE ^ (m2 × 2)) + ((z1 - 0 - z0) * NBASE ^ m2) + z0 + // = ((z1 - z0) * NBASE ^ m2) + z0 + // = ((z1 - z0) * NBASE ^ m2) + z0 + // = (x * high2) * NBASE ^ m2 + z0 + let m2 = y.len() / 2; + let (low2, high2) = y.split_at(m2); + + // (x * high2) * NBASE ^ m2 + z0 + mac3(acc, x, low2); + mac3(&mut acc[m2..], x, high2); } else if x.len() <= 256 { // Karatsuba multiplication: //