Skip to content

Commit

Permalink
Use inline asm! for x86 DIV on Rust 1.59+
Browse files Browse the repository at this point in the history
  • Loading branch information
cuviper committed Feb 23, 2022
1 parent e77ffac commit 83e8d2b
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 6 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yaml
Expand Up @@ -18,6 +18,7 @@ jobs:
1.36.0, # alloc, rand
1.40.0, # arbitrary
1.46.0, # quickcheck
1.59.0, # asm!
stable,
beta,
nightly
Expand Down
1 change: 1 addition & 0 deletions bors.toml
Expand Up @@ -4,6 +4,7 @@ status = [
"Test (1.36.0)",
"Test (1.40.0)",
"Test (1.46.0)",
"Test (1.59.0)",
"Test (stable)",
"Test (beta)",
"Test (nightly)",
Expand Down
5 changes: 5 additions & 0 deletions build.rs
Expand Up @@ -10,6 +10,7 @@ fn main() {
if u64_digit {
autocfg::emit("u64_digit");
}

let ac = autocfg::new();
let std = if ac.probe_sysroot_crate("std") {
"std"
Expand All @@ -28,6 +29,10 @@ fn main() {
if ac.probe_path(&addcarry) {
autocfg::emit("use_addcarry");
}

if ac.probe_path(&format!("{}::arch::asm", std)) {
autocfg::emit("use_x86_div");
}
}
}

Expand Down
13 changes: 10 additions & 3 deletions src/biguint/convert.rs
Expand Up @@ -657,16 +657,23 @@ pub(super) fn to_radix_digits_le(u: &BigUint, radix: u32) -> Vec<u8> {

let mut digits = u.clone();

let (base, power) = get_radix_base(radix, big_digit::HALF_BITS);
// X86 DIV can quickly divide by a full digit, otherwise we choose a divisor
// that's suitable for `div_half` to avoid slow `DoubleBigDigit` division.
let bits = if cfg!(use_x86_div) {
big_digit::BITS
} else {
big_digit::HALF_BITS
};
let (base, power) = get_radix_base(radix, bits);
let radix = radix as BigDigit;

// For very large numbers, the O(n²) loop of repeated `div_rem_digit` dominates the
// performance. We can mitigate this by dividing into chunks of a larger base first.
// The threshold for this was chosen by anecdotal performance measurements to
// approximate where this starts to make a noticeable difference.
if digits.data.len() >= 64 {
let mut big_base = BigUint::from(base * base);
let mut big_power = 2usize;
let mut big_base = BigUint::from(base);
let mut big_power = 1usize;

// Choose a target base length near √n.
let target_len = digits.data.len().sqrt();
Expand Down
45 changes: 42 additions & 3 deletions src/biguint/division.rs
Expand Up @@ -18,6 +18,7 @@ use num_traits::{CheckedDiv, One, ToPrimitive, Zero};
/// This is _not_ true for an arbitrary numerator/denominator.
///
/// (This function also matches what the x86 divide instruction does).
#[cfg(not(use_x86_div))]
#[inline]
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
debug_assert!(hi < divisor);
Expand All @@ -27,6 +28,44 @@ fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigi
((lhs / rhs) as BigDigit, (lhs % rhs) as BigDigit)
}

/// With Rust 1.59+ for stable `asm!`, x86 and x86_64 can use a real `div` instruction.
#[cfg(use_x86_div)]
#[inline]
fn div_wide(hi: BigDigit, lo: BigDigit, divisor: BigDigit) -> (BigDigit, BigDigit) {
// This debug assertion covers the potential #DE for divisor==0 or a quotient too large for one
// register, otherwise in release mode it will become a target-specific fault like SIGFPE.
// This should never occur with the inputs from our few `div_wide` callers.
debug_assert!(hi < divisor);

// SAFETY: The `div` instruction only affects registers, reading the explicit operand as the
// divisor, and implicitly reading RDX:RAX or EDX:EAX as the dividend. The result is implicitly
// written back to RAX or EAX for the quotient and RDX or EDX for the remainder. No memory is
// used, and flags are not preserved.
unsafe {
let (div, rem);

#[cfg(u64_digit)]
core::arch::asm!(
"div {:r}",
in(reg) divisor,
inout("rdx") hi => rem,
inout("rax") lo => div,
options(pure, nomem, nostack),
);

#[cfg(not(u64_digit))]
core::arch::asm!(
"div {:e}",
in(reg) divisor,
inout("edx") hi => rem,
inout("eax") lo => div,
options(pure, nomem, nostack),
);

(div, rem)
}
}

/// For small divisors, we can divide without promoting to `DoubleBigDigit` by
/// using half-size pieces of digit, like long-division.
#[inline]
Expand All @@ -47,7 +86,7 @@ pub(super) fn div_rem_digit(mut a: BigUint, b: BigDigit) -> (BigUint, BigDigit)

let mut rem = 0;

if b <= big_digit::HALF {
if !cfg!(use_x86_div) && b <= big_digit::HALF {
for d in a.data.iter_mut().rev() {
let (q, r) = div_half(rem, *d, b);
*d = q;
Expand All @@ -72,7 +111,7 @@ fn rem_digit(a: &BigUint, b: BigDigit) -> BigDigit {

let mut rem = 0;

if b <= big_digit::HALF {
if !cfg!(use_x86_div) && b <= big_digit::HALF {
for &digit in a.data.iter().rev() {
let (_, r) = div_half(rem, digit, b);
rem = r;
Expand Down Expand Up @@ -232,7 +271,7 @@ fn div_rem_core(mut a: BigUint, b: &[BigDigit]) -> (BigUint, BigUint) {
let mut a0 = 0;

// [b1, b0] are the two most significant digits of the divisor. They never change.
let b0 = *b.last().unwrap();
let b0 = b[b.len() - 1];
let b1 = b[b.len() - 2];

let q_len = a.data.len() - b.len() + 1;
Expand Down

0 comments on commit 83e8d2b

Please sign in to comment.