From 8723d99d634548e1febe3209129721c3b6ffdb12 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Tue, 18 May 2021 21:05:18 -0500 Subject: [PATCH] perf(dict): Bypass vars when possible Variant support slows us down by 10-50$. I assume most people will run with `en` and so most of this overhead is to waste. So instead of merging vars with dict, let's instead get a quick win by just skipping vars when we don't need to. If the assumptions behind this change over time or if there is need for speeding up a specific locale, we can re-address this. Before: ``` check_file/Typos/code time: [35.860 us 36.021 us 36.187 us] thrpt: [8.0117 MiB/s 8.0486 MiB/s 8.0846 MiB/s] check_file/Typos/corpus time: [26.966 ms 27.215 ms 27.521 ms] thrpt: [21.127 MiB/s 21.365 MiB/s 21.562 MiB/s] ``` After: ``` check_file/Typos/code time: [33.837 us 33.928 us 34.031 us] thrpt: [8.5191 MiB/s 8.5452 MiB/s 8.5680 MiB/s] check_file/Typos/corpus time: [17.521 ms 17.620 ms 17.730 ms] thrpt: [32.794 MiB/s 32.999 MiB/s 33.184 MiB/s] ``` This puts us inline with `--no-default-features --features dict` Fixes #253 --- crates/typos-vars/codegen/src/main.rs | 16 ++++++++++ crates/typos-vars/src/vars_codegen.rs | 3 ++ src/dict.rs | 44 ++++++++++++++++----------- 3 files changed, 46 insertions(+), 17 deletions(-) diff --git a/crates/typos-vars/codegen/src/main.rs b/crates/typos-vars/codegen/src/main.rs index d861dd92e..9f2225fa2 100644 --- a/crates/typos-vars/codegen/src/main.rs +++ b/crates/typos-vars/codegen/src/main.rs @@ -78,6 +78,7 @@ fn generate_variations(file: &mut W) { let mut smallest = usize::MAX; let mut largest = usize::MIN; + let mut no_invalid = true; writeln!( file, @@ -97,6 +98,8 @@ fn generate_variations(file: &mut W) { builder.entry(unicase::UniCase::new(word), &value); smallest = std::cmp::min(smallest, word.len()); largest = std::cmp::max(largest, word.len()); + + no_invalid &= !is_always_invalid(data); } let codegenned = builder.build(); writeln!(file, "{}", codegenned).unwrap(); @@ -110,6 +113,10 @@ fn generate_variations(file: &mut W) { ) .unwrap(); + writeln!(file).unwrap(); + writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap(); + + writeln!(file).unwrap(); for (symbol, entry) in entries.iter() { if !referenced_symbols.contains(symbol.as_str()) { continue; @@ -156,6 +163,15 @@ fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool { false } +fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool { + for (_symbol, set) in data.iter() { + if set.is_empty() { + return true; + } + } + false +} + fn entries() -> BTreeMap { varcon::VARCON .iter() diff --git a/crates/typos-vars/src/vars_codegen.rs b/crates/typos-vars/src/vars_codegen.rs index 138106457..2664e00e7 100644 --- a/crates/typos-vars/src/vars_codegen.rs +++ b/crates/typos-vars/src/vars_codegen.rs @@ -113083,6 +113083,9 @@ pub static VARS_DICTIONARY: phf::Map< }; pub const WORD_RANGE: std::ops::RangeInclusive = 2..=24; + +pub const NO_INVALID: bool = true; + pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap = [&["abettors"], &["abetters"], &["abettors"], &["abetters"]]; diff --git a/src/dict.rs b/src/dict.rs index 5541f5dea..86511b98d 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -72,26 +72,30 @@ impl BuiltIn { #[cfg(feature = "vars")] impl BuiltIn { fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> { - let mut chained: Vec<_> = corrections - .iter() - .flat_map(|c| match self.correct_with_vars(c) { - Some(Status::Valid) | None => vec![Cow::Borrowed(*c)], - Some(Status::Corrections(vars)) => vars, - Some(Status::Invalid) => { - unreachable!("correct_with_vars should always have valid suggestions") - } - }) - .collect(); - if chained.len() != 1 { - chained.sort_unstable(); - chained.dedup(); + if self.is_vars_enabled() { + let mut chained: Vec<_> = corrections + .iter() + .flat_map(|c| match self.correct_with_vars(c) { + Some(Status::Valid) | None => vec![Cow::Borrowed(*c)], + Some(Status::Corrections(vars)) => vars, + Some(Status::Invalid) => { + unreachable!("correct_with_vars should always have valid suggestions") + } + }) + .collect(); + if chained.len() != 1 { + chained.sort_unstable(); + chained.dedup(); + } + debug_assert!(!chained.is_empty()); + Status::Corrections(chained) + } else { + Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect()) } - debug_assert!(!chained.is_empty()); - Status::Corrections(chained) } fn correct_with_vars(&self, word: &str) -> Option> { - if typos_vars::WORD_RANGE.contains(&word.len()) { + if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) { map_lookup(&typos_vars::VARS_DICTIONARY, word) .map(|variants| self.select_variant(variants)) } else { @@ -99,6 +103,12 @@ impl BuiltIn { } } + fn is_vars_enabled(&self) -> bool { + #[allow(clippy::assertions_on_constants)] + debug_assert!(typos_vars::NO_INVALID); + self.locale.is_some() + } + fn select_variant( &self, vars: &'static [(u8, &'static typos_vars::VariantsMap)], @@ -301,7 +311,7 @@ mod test { typos::tokens::Case::Lower, 0, )); - assert_eq!(correction, Some(Status::Valid)); + assert_eq!(correction, None); } #[cfg(feature = "vars")]