Skip to content

Commit

Permalink
Merge pull request #252 from epage/dict
Browse files Browse the repository at this point in the history
fix(dict): Handle cases from Linux
  • Loading branch information
epage committed May 18, 2021
2 parents 444d2cc + 639e65b commit e6c595c
Show file tree
Hide file tree
Showing 7 changed files with 33,748 additions and 33,671 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions crates/typos-dict/assets/words.csv
Expand Up @@ -13021,7 +13021,6 @@ handelbars,handlebars
handicaped,handicapped
handwritng,handwriting
harasments,harassments
hardlinked,hardline
harmoniacs,harmonic
harmonisch,harmonic
harrasment,harassment
Expand Down Expand Up @@ -23557,7 +23556,7 @@ referens,references
referere,referee
referign,refering
refering,referring
refernce,references
refernce,reference
reffered,referred
refilles,refills
refillls,refills
Expand Down Expand Up @@ -27976,7 +27975,7 @@ tast,taste
tath,that
tehy,they
tghe,the
ther,there
ther,there,their,the
thge,the
thna,than
thne,then
Expand Down
1 change: 1 addition & 0 deletions crates/typos-dict/codegen/Cargo.toml
Expand Up @@ -18,6 +18,7 @@ codecov = { repository = "crate-ci/typos" }
phf = { version = "0.8", features = ["unicase"] }
phf_codegen = "0.8"
csv = "1.1"
itertools = "0.10"
unicase = "2.5"
codegenrs = "1.0"
structopt = "0.3"
14 changes: 10 additions & 4 deletions crates/typos-dict/codegen/src/main.rs
Expand Up @@ -18,20 +18,26 @@ fn generate<W: std::io::Write>(file: &mut W) {

writeln!(
file,
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static str> = "
"pub static WORD_DICTIONARY: phf::Map<unicase::UniCase<&'static str>, &'static [&'static str]> = "
)
.unwrap();
let mut builder = phf_codegen::Map::new();
let records: Vec<_> = csv::ReaderBuilder::new()
.has_headers(false)
.flexible(true)
.from_reader(DICT)
.records()
.map(|r| r.unwrap())
.collect();
for record in &records {
smallest = std::cmp::min(smallest, record[0].len());
largest = std::cmp::max(largest, record[0].len());
let value = format!(r#""{}""#, &record[1]);
let mut record_fields = record.iter();
let key = record_fields.next().unwrap();
smallest = std::cmp::min(smallest, key.len());
largest = std::cmp::max(largest, key.len());
let value = format!(
"&[{}]",
itertools::join(record_fields.map(|field| format!(r#""{}""#, field)), ", ")
);
builder.entry(unicase::UniCase::new(&record[0]), &value);
}
let codegenned = builder.build();
Expand Down

0 comments on commit e6c595c

Please sign in to comment.