Skip to content

Commit

Permalink
Merge pull request #519 from Pazzaz/char-standard
Browse files Browse the repository at this point in the history
Avoid surrogates when generating `char` using Standard distribution
  • Loading branch information
dhardy committed Jun 21, 2018
2 parents b7b1176 + 02a7a11 commit af1303c
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions src/distributions/other.rs
Expand Up @@ -44,15 +44,21 @@ pub struct Alphanumeric;
impl Distribution<char> for Standard {
#[inline]
fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char {
let range = Uniform::new(0u32, 0x11_0000);
loop {
match char::from_u32(range.sample(rng)) {
Some(c) => return c,
// About 0.2% of numbers in the range 0..0x110000 are invalid
// codepoints (surrogates).
None => {}
}
// A valid `char` is either in the interval `[0, 0xD800)` or
// `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in
// `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is
// reserved for surrogates. This is the size of that gap.
const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1;

// Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used but it
// seemed slower.
let range = Uniform::new(GAP_SIZE, 0x11_0000);

let mut n = range.sample(rng);
if n <= 0xDFFF {
n -= GAP_SIZE;
}
unsafe { char::from_u32_unchecked(n) }
}
}

Expand Down

0 comments on commit af1303c

Please sign in to comment.