Skip to content

Commit

Permalink
refactor(dict): Allow 0..n corrections in BuiltIn
Browse files Browse the repository at this point in the history
The main use case is taking `ther` -> `there` and adding `the` and
`their`.
  • Loading branch information
Ed Page committed May 18, 2021
1 parent 444d2cc commit fb0dac4
Show file tree
Hide file tree
Showing 6 changed files with 33,747 additions and 33,668 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/typos-dict/codegen/Cargo.toml
Expand Up @@ -18,6 +18,7 @@ codecov = { repository = "crate-ci/typos" }
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
csv = "1.1"
itertools = "0.10"
unicase = "2.5"
codegenrs = "1.0"
structopt = "0.3"
14 changes: 10 additions & 4 deletions crates/typos-dict/codegen/src/main.rs
Expand Up @@ -18,20 +18,26 @@ fn generate<W: std::io::Write>(file: &mut W) {

writeln!(
file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [&'static str]> = "
)
.unwrap();
let mut builder = phf_codegen::Map::new();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(DICT)
.records()
.map(|r| r.unwrap())
.collect();
for record in &records {
smallest = std::cmp::min(smallest, record[0].len());
largest = std::cmp::max(largest, record[0].len());
let value = format!(r#""{}""#, &record[1]);
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
smallest = std::cmp::min(smallest, key.len());
largest = std::cmp::max(largest, key.len());
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{}""#, field)), ", ")
);
builder.entry(unicase::UniCase::new(&record[0]), &value);
}
let codegenned = builder.build();
Expand Down

0 comments on commit fb0dac4

Please sign in to comment.