From ebf4e66394385e6ab57b84e503d8d6f87d8d7f8b Mon Sep 17 00:00:00 2001 From: qoh <1732901+qoh@users.noreply.github.com> Date: Tue, 21 Jan 2020 01:37:56 +0100 Subject: [PATCH 1/3] Impl Distribution for Alphanumeric Sampling a random alphanumeric string by collecting chars (that are known to be ASCII) into a String involves re-allocation as String is encoding to UTF-8, via the example: ```rust let chars: String = iter::repeat(()) .map(|()| rng.sample(Alphanumeric)) .take(7) .collect(); ``` I wanted to get rid of the clearly unnecessary re-allocations in my applications, so I needed to be able to access to the ASCII characters as simple bytes. It seems like that was already what was going on inside Alphanumeric however, it was just internal. This PR changes the `Distribution` impl to provide `u8`s (which it generates internally) instead, and implements the previous `Distribution` using it. One could then, for example, do this: ```rust let mut rng = thread_rng(); let bytes = (0..7).map(|_| rng.sample(ByteAlphanumeric)).collect(); let chars = unsafe { String::from_utf8_unchecked(bytes) }; ``` --- src/distributions/other.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/distributions/other.rs b/src/distributions/other.rs index 32cc470d1eb..e9c8d970571 100644 --- a/src/distributions/other.rs +++ b/src/distributions/other.rs @@ -19,7 +19,7 @@ use serde::{Serialize, Deserialize}; // ----- Sampling distributions ----- -/// Sample a `char`, uniformly distributed over ASCII letters and numbers: +/// Sample a `char` or `u8`, uniformly distributed over ASCII letters and numbers: /// a-z, A-Z and 0-9. /// /// # Example @@ -66,6 +66,13 @@ impl Distribution for Standard { impl Distribution for Alphanumeric { fn sample(&self, rng: &mut R) -> char { + let byte: u8 = self.sample(rng); + byte as char + } +} + +impl Distribution for Alphanumeric { + fn sample(&self, rng: &mut R) -> u8 { const RANGE: u32 = 26 + 26 + 10; const GEN_ASCII_STR_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ abcdefghijklmnopqrstuvwxyz\ @@ -77,7 +84,7 @@ impl Distribution for Alphanumeric { loop { let var = rng.next_u32() >> (32 - 6); if var < RANGE { - return GEN_ASCII_STR_CHARSET[var as usize] as char; + return GEN_ASCII_STR_CHARSET[var as usize]; } } } From 2c745274ecc5dd58200a1dff27e77fa53888eda3 Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Sat, 1 Aug 2020 21:24:33 +0200 Subject: [PATCH 2/3] `Alphanumeric` samples bytes instead of chars The corresponds more closely to the internally used types and can be easily converted to a `char` via `From` and `Into`, while being more flexible to use. This is a breaking change. --- CHANGELOG.md | 1 + src/distributions/mod.rs | 2 +- src/distributions/other.rs | 14 ++++---------- src/rng.rs | 2 +- 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c6f33d1b7..f1a6ae8bf27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ You may also find the [Upgrade Guide](https://rust-random.github.io/book/update. is supported (#744, #1003). Note that `a` and `b` can no longer be references or SIMD types. - Replace `AsByteSliceMut` with `Fill` (#940) - Move alias method for `WeightedIndex` to `rand_distr` (#945) +- `Alphanumeric` samples bytes instead of chars (#935) - Better NaN handling for `WeightedIndex` (#1005) - Implement `IntoIterator` for `IndexVec`, replacing the `into_iter` method (#1007) - Reduce packaged crate size (#983) diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index 001084e985c..1fa7afe084a 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -166,7 +166,7 @@ pub trait Distribution { /// let v: Vec = Standard.sample_iter(rng).take(16).collect(); /// /// // String: - /// let s: String = Alphanumeric.sample_iter(rng).take(7).collect(); + /// let s: String = Alphanumeric.sample_iter(rng).take(7).map(char::from).collect(); /// /// // Dice-rolling: /// let die_range = Uniform::new_inclusive(1, 6); diff --git a/src/distributions/other.rs b/src/distributions/other.rs index e9c8d970571..888d9f4cd58 100644 --- a/src/distributions/other.rs +++ b/src/distributions/other.rs @@ -19,7 +19,7 @@ use serde::{Serialize, Deserialize}; // ----- Sampling distributions ----- -/// Sample a `char` or `u8`, uniformly distributed over ASCII letters and numbers: +/// Sample a `u8`, uniformly distributed over ASCII letters and numbers: /// a-z, A-Z and 0-9. /// /// # Example @@ -32,6 +32,7 @@ use serde::{Serialize, Deserialize}; /// let mut rng = thread_rng(); /// let chars: String = iter::repeat(()) /// .map(|()| rng.sample(Alphanumeric)) +/// .map(char::from) /// .take(7) /// .collect(); /// println!("Random chars: {}", chars); @@ -64,13 +65,6 @@ impl Distribution for Standard { } } -impl Distribution for Alphanumeric { - fn sample(&self, rng: &mut R) -> char { - let byte: u8 = self.sample(rng); - byte as char - } -} - impl Distribution for Alphanumeric { fn sample(&self, rng: &mut R) -> u8 { const RANGE: u32 = 26 + 26 + 10; @@ -228,7 +222,7 @@ mod tests { // take the rejection sampling path. let mut incorrect = false; for _ in 0..100 { - let c = rng.sample(Alphanumeric); + let c: char = rng.sample(Alphanumeric).into(); incorrect |= !((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ); @@ -256,7 +250,7 @@ mod tests { '\u{ed692}', '\u{35888}', ]); - test_samples(&Alphanumeric, 'a', &['h', 'm', 'e', '3', 'M']); + test_samples(&Alphanumeric, 0, &[104, 109, 101, 51, 77]); test_samples(&Standard, false, &[true, true, false, true, false]); test_samples(&Standard, None as Option, &[ Some(true), diff --git a/src/rng.rs b/src/rng.rs index 80f61a4ce49..3bff930b4aa 100644 --- a/src/rng.rs +++ b/src/rng.rs @@ -171,7 +171,7 @@ pub trait Rng: RngCore { /// let v: Vec = rng.sample_iter(Standard).take(16).collect(); /// /// // String: - /// let s: String = rng.sample_iter(Alphanumeric).take(7).collect(); + /// let s: String = rng.sample_iter(Alphanumeric).take(7).map(char::from).collect(); /// /// // Combined values /// println!("{:?}", rng.sample_iter(Standard).take(5) From fe7b50928d1a6d407c3b024339868b5dad4da4dc Mon Sep 17 00:00:00 2001 From: Vinzent Steinberg Date: Sat, 1 Aug 2020 22:56:29 +0200 Subject: [PATCH 3/3] Fix benchmarks --- benches/distributions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/distributions.rs b/benches/distributions.rs index 9ab74cf563d..7d8ac94c37b 100644 --- a/benches/distributions.rs +++ b/benches/distributions.rs @@ -176,7 +176,7 @@ distr_nz_int!(distr_standard_nz64, NonZeroU64, u64, Standard); distr_nz_int!(distr_standard_nz128, NonZeroU128, u128, Standard); distr!(distr_standard_bool, bool, Standard); -distr!(distr_standard_alphanumeric, char, Alphanumeric); +distr!(distr_standard_alphanumeric, u8, Alphanumeric); distr!(distr_standard_codepoint, char, Standard); distr_float!(distr_standard_f32, f32, Standard);