From 0a44f513f5de7b5617fae4376391e8ce53a9de6c Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 19 Feb 2019 17:03:25 +0100 Subject: [PATCH 1/3] Binomial: Faster sampling for n * p < 10 Fixes #734. --- src/distributions/binomial.rs | 115 +++++++++++++++++++--------------- 1 file changed, 65 insertions(+), 50 deletions(-) diff --git a/src/distributions/binomial.rs b/src/distributions/binomial.rs index 2df393e53ab..907155b24ed 100644 --- a/src/distributions/binomial.rs +++ b/src/distributions/binomial.rs @@ -10,7 +10,7 @@ //! The binomial distribution. use Rng; -use distributions::{Distribution, Bernoulli, Cauchy}; +use distributions::{Distribution, Cauchy}; use distributions::utils::log_gamma; /// The binomial distribution `Binomial(n, p)`. @@ -55,19 +55,7 @@ impl Distribution for Binomial { } else if self.p == 1.0 { return self.n; } - - // For low n, it is faster to sample directly. For both methods, - // performance is independent of p. On Intel Haswell CPU this method - // appears to be faster for approx n < 300. - if self.n < 300 { - let mut result = 0; - let d = Bernoulli::new(self.p); - for _ in 0 .. self.n { - result += rng.sample(d) as u32; - } - return result as u64; - } - + // binomial distribution is symmetrical with respect to p -> 1-p, k -> n-k // switch p so that it is less than 0.5 - this allows for lower expected values // we will just invert the result at the end @@ -77,53 +65,80 @@ impl Distribution for Binomial { 1.0 - self.p }; - // prepare some cached values - let float_n = self.n as f64; - let ln_fact_n = log_gamma(float_n + 1.0); - let pc = 1.0 - p; - let log_p = p.ln(); - let log_pc = pc.ln(); - let expected = self.n as f64 * p; - let sq = (expected * (2.0 * pc)).sqrt(); - - let mut lresult; - - // we use the Cauchy distribution as the comparison distribution - // f(x) ~ 1/(1+x^2) - let cauchy = Cauchy::new(0.0, 1.0); - loop { - let mut comp_dev: f64; + let result; + + // For small n * min(p, 1 - p), the BINV algorithm based on the inverse + // transformation of the binomial distribution is more efficient: + // + // Voratas Kachitvichyanukul and Bruce W. Schmeiser. 1988. Binomial + // random variate generation. Commun. ACM 31, 2 (February 1988), + // 216-222. http://dx.doi.org/10.1145/42372.42381 + if (self.n as f64) * p < 10. { + let q = 1. - p; + let s = p / q; + let a = ((self.n + 1) as f64) * s; + let mut r = q.powf(self.n as f64); + // FIXME: Using integer exponentiation should be faster, but it is not available for + // `u64` exponents, neither in `std` or `num`. + let mut u: f64 = rng.gen(); + let mut x = 0; + while u > r as f64 { + u -= r; + x += 1; + r *= a / (x as f64) - s; + } + result = x; + } else { + // FIXME: Using the BTPE algorithm is probably faster. + + // prepare some cached values + let float_n = self.n as f64; + let ln_fact_n = log_gamma(float_n + 1.0); + let pc = 1.0 - p; + let log_p = p.ln(); + let log_pc = pc.ln(); + let expected = self.n as f64 * p; + let sq = (expected * (2.0 * pc)).sqrt(); + let mut lresult; + + // we use the Cauchy distribution as the comparison distribution + // f(x) ~ 1/(1+x^2) + let cauchy = Cauchy::new(0.0, 1.0); loop { - // draw from the Cauchy distribution - comp_dev = rng.sample(cauchy); - // shift the peak of the comparison ditribution - lresult = expected + sq * comp_dev; - // repeat the drawing until we are in the range of possible values - if lresult >= 0.0 && lresult < float_n + 1.0 { - break; + let mut comp_dev: f64; + loop { + // draw from the Cauchy distribution + comp_dev = rng.sample(cauchy); + // shift the peak of the comparison ditribution + lresult = expected + sq * comp_dev; + // repeat the drawing until we are in the range of possible values + if lresult >= 0.0 && lresult < float_n + 1.0 { + break; + } } - } - // the result should be discrete - lresult = lresult.floor(); + // the result should be discrete + lresult = lresult.floor(); - let log_binomial_dist = ln_fact_n - log_gamma(lresult+1.0) - - log_gamma(float_n - lresult + 1.0) + lresult*log_p + (float_n - lresult)*log_pc; - // this is the binomial probability divided by the comparison probability - // we will generate a uniform random value and if it is larger than this, - // we interpret it as a value falling out of the distribution and repeat - let comparison_coeff = (log_binomial_dist.exp() * sq) * (1.2 * (1.0 + comp_dev*comp_dev)); + let log_binomial_dist = ln_fact_n - log_gamma(lresult+1.0) - + log_gamma(float_n - lresult + 1.0) + lresult*log_p + (float_n - lresult)*log_pc; + // this is the binomial probability divided by the comparison probability + // we will generate a uniform random value and if it is larger than this, + // we interpret it as a value falling out of the distribution and repeat + let comparison_coeff = (log_binomial_dist.exp() * sq) * (1.2 * (1.0 + comp_dev*comp_dev)); - if comparison_coeff >= rng.gen() { - break; + if comparison_coeff >= rng.gen() { + break; + } } + result = lresult as u64; } // invert the result for p < 0.5 if p != self.p { - self.n - lresult as u64 + self.n - result } else { - lresult as u64 + result } } } From 6323d2951ec2a827b0113e2d81c038e846613b8d Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Tue, 19 Feb 2019 17:14:09 +0100 Subject: [PATCH 2/3] Binomial: Add benchmark for small n * p Our previous implementation was very slow (#734). The benchmarks makes sure we won't have such regressions in the future. --- benches/distributions.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/benches/distributions.rs b/benches/distributions.rs index 56a8d43b791..5ff91f56e2b 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -203,6 +203,7 @@ distr_float!(distr_gamma_large_shape, f64, Gamma::new(10., 1.0)); distr_float!(distr_gamma_small_shape, f64, Gamma::new(0.1, 1.0)); distr_float!(distr_cauchy, f64, Cauchy::new(4.2, 6.9)); distr_int!(distr_binomial, u64, Binomial::new(20, 0.7)); +distr_int!(distr_binomial_small, u64, Binomial::new(1000000, 1e-30)); distr_int!(distr_poisson, u64, Poisson::new(4.0)); distr!(distr_bernoulli, bool, Bernoulli::new(0.18)); distr_arr!(distr_circle, [f64; 2], UnitCircle::new()); From 5f6f89e5c7903da9fd6984dfc95300ccd0af630b Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Mon, 25 Feb 2019 14:05:00 +0100 Subject: [PATCH 3/3] Binomial: Use integer exponentation This increase performance significantly, see below. However, it requires shipping our own integer exponentiation implementation, because `u64` exponents are not supported by `std` or `num`. Before: ``` test distr_binomial ... bench: 88,365 ns/iter (+/- 2,215) =90 MB/s test distr_binomial_small ... bench: 33,775 ns/iter (+/- 5,494) =236 MB/s test misc_binomial_1 ... bench: 13 ns/iter (+/- 2) test misc_binomial_10 ... bench: 72 ns/iter (+/- 1) test misc_binomial_100 ... bench: 71 ns/iter (+/- 11) test misc_binomial_1000 ... bench: 624 ns/iter (+/- 19) test misc_binomial_1e12 ... bench: 681 ns/iter (+/- 18) ``` After: ``` test distr_binomial ... bench: 44,354 ns/iter (+/- 3,518) = 180 MB/s test distr_binomial_small ... bench: 22,736 ns/iter (+/- 514) = 351 MB/s test misc_binomial_1 ... bench: 10 ns/iter (+/- 0) test misc_binomial_10 ... bench: 23 ns/iter (+/- 0) test misc_binomial_100 ... bench: 27 ns/iter (+/- 4) test misc_binomial_1000 ... bench: 621 ns/iter (+/- 15) test misc_binomial_1e12 ... bench: 686 ns/iter (+/- 20) ``` --- src/distributions/binomial.rs | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/src/distributions/binomial.rs b/src/distributions/binomial.rs index 907155b24ed..2bad03b78bd 100644 --- a/src/distributions/binomial.rs +++ b/src/distributions/binomial.rs @@ -47,6 +47,31 @@ impl Binomial { } } +/// Raise a `base` to the power of `exp`, using exponentiation by squaring. +/// +/// This implementation is based on the one in the `num_traits` crate. It is +/// slightly modified to accept `u64` exponents. +fn pow(mut base: f64, mut exp: u64) -> f64 { + if exp == 0 { + return 1.; + } + + while exp & 1 == 0 { + base *= base; + exp >>= 1; + } + + let mut acc = base; + while exp > 1 { + exp >>= 1; + base *= base; + if exp & 1 == 1 { + acc *= base; + } + } + acc +} + impl Distribution for Binomial { fn sample(&self, rng: &mut R) -> u64 { // Handle these values directly. @@ -77,9 +102,7 @@ impl Distribution for Binomial { let q = 1. - p; let s = p / q; let a = ((self.n + 1) as f64) * s; - let mut r = q.powf(self.n as f64); - // FIXME: Using integer exponentiation should be faster, but it is not available for - // `u64` exponents, neither in `std` or `num`. + let mut r = pow(q, self.n); let mut u: f64 = rng.gen(); let mut x = 0; while u > r as f64 {