Skip to content

Commit

Permalink
perf(dict): Bypass vars when possible
Browse files Browse the repository at this point in the history
Variant support slows us down by 10-50$.  I assume most people will run
with `en` and so most of this overhead is to waste.  So instead of
merging vars with dict, let's instead get a quick win by just skipping
vars when we don't need to.  If the assumptions behind this change over
time or if there is need for speeding up a specific locale, we can
re-address this.

Before:
```
check_file/Typos/code   time:   [35.860 us 36.021 us 36.187 us]
                        thrpt:  [8.0117 MiB/s 8.0486 MiB/s 8.0846 MiB/s]
check_file/Typos/corpus time:   [26.966 ms 27.215 ms 27.521 ms]
                        thrpt:  [21.127 MiB/s 21.365 MiB/s 21.562 MiB/s]
```
After:
```
check_file/Typos/code   time:   [33.837 us 33.928 us 34.031 us]
                        thrpt:  [8.5191 MiB/s 8.5452 MiB/s 8.5680 MiB/s]
check_file/Typos/corpus time:   [17.521 ms 17.620 ms 17.730 ms]
                        thrpt:  [32.794 MiB/s 32.999 MiB/s 33.184 MiB/s]
```

This puts us inline with `--no-default-features --features dict`

Fixes #253
  • Loading branch information
Ed Page committed May 19, 2021
1 parent d65fa79 commit c4765fd
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 16 deletions.
16 changes: 16 additions & 0 deletions crates/typos-vars/codegen/src/main.rs
Expand Up @@ -78,6 +78,7 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {

let mut smallest = usize::MAX;
let mut largest = usize::MIN;
let mut no_invalid = true;

writeln!(
file,
Expand All @@ -97,6 +98,8 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
builder.entry(unicase::UniCase::new(word), &value);
smallest = std::cmp::min(smallest, word.len());
largest = std::cmp::max(largest, word.len());

no_invalid &= !is_always_invalid(data);
}
let codegenned = builder.build();
writeln!(file, "{}", codegenned).unwrap();
Expand All @@ -110,6 +113,10 @@ fn generate_variations<W: std::io::Write>(file: &mut W) {
)
.unwrap();

writeln!(file).unwrap();
writeln!(file, "pub const NO_INVALID: bool = {:?};", no_invalid,).unwrap();

writeln!(file).unwrap();
for (symbol, entry) in entries.iter() {
if !referenced_symbols.contains(symbol.as_str()) {
continue;
Expand Down Expand Up @@ -156,6 +163,15 @@ fn is_always_valid(data: &[(&str, varcon::CategorySet)]) -> bool {
false
}

fn is_always_invalid(data: &[(&str, varcon::CategorySet)]) -> bool {
for (_symbol, set) in data.iter() {
if set.is_empty() {
return true;
}
}
false
}

fn entries() -> BTreeMap<String, varcon_core::Entry> {
varcon::VARCON
.iter()
Expand Down
3 changes: 3 additions & 0 deletions crates/typos-vars/src/vars_codegen.rs
Expand Up @@ -113083,6 +113083,9 @@ pub static VARS_DICTIONARY: phf::Map<
};

pub const WORD_RANGE: std::ops::RangeInclusive<usize> = 2..=24;

pub const NO_INVALID: bool = true;

pub(crate) static ENTRY_ABETTORS_7043394254318611656: VariantsMap =
[&["abettors"], &["abetters"], &["abettors"], &["abetters"]];

Expand Down
41 changes: 25 additions & 16 deletions src/dict.rs
Expand Up @@ -72,33 +72,42 @@ impl BuiltIn {
#[cfg(feature = "vars")]
impl BuiltIn {
fn chain_with_vars(&self, corrections: &'static [&'static str]) -> Status<'static> {
let mut chained: Vec<_> = corrections
.iter()
.flat_map(|c| match self.correct_with_vars(c) {
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
Some(Status::Corrections(vars)) => vars,
Some(Status::Invalid) => {
unreachable!("correct_with_vars should always have valid suggestions")
}
})
.collect();
if chained.len() != 1 {
chained.sort_unstable();
chained.dedup();
if self.is_vars_enabled() {
let mut chained: Vec<_> = corrections
.iter()
.flat_map(|c| match self.correct_with_vars(c) {
Some(Status::Valid) | None => vec![Cow::Borrowed(*c)],
Some(Status::Corrections(vars)) => vars,
Some(Status::Invalid) => {
unreachable!("correct_with_vars should always have valid suggestions")
}
})
.collect();
if chained.len() != 1 {
chained.sort_unstable();
chained.dedup();
}
debug_assert!(!chained.is_empty());
Status::Corrections(chained)
} else {
Status::Corrections(corrections.iter().map(|c| Cow::Borrowed(*c)).collect())
}
debug_assert!(!chained.is_empty());
Status::Corrections(chained)
}

fn correct_with_vars(&self, word: &str) -> Option<Status<'static>> {
if typos_vars::WORD_RANGE.contains(&word.len()) {
if self.is_vars_enabled() && typos_vars::WORD_RANGE.contains(&word.len()) {
map_lookup(&typos_vars::VARS_DICTIONARY, word)
.map(|variants| self.select_variant(variants))
} else {
None
}
}

fn is_vars_enabled(&self) -> bool {
debug_assert!(typos_vars::NO_INVALID);
self.locale.is_some()
}

fn select_variant(
&self,
vars: &'static [(u8, &'static typos_vars::VariantsMap)],
Expand Down

0 comments on commit c4765fd

Please sign in to comment.