Skip to content

Commit

Permalink
feat: Support complex word lists
Browse files Browse the repository at this point in the history
fixes: #5222

The binary dictionary builder (TrieBlob) only supported word lists with 250 unique characters.

This was not an issue with the object based trie dictionaries used with the compiled dictionaries.
  • Loading branch information
Jason3S committed Feb 12, 2024
1 parent 9afe985 commit 18f4866
Show file tree
Hide file tree
Showing 26 changed files with 8,172 additions and 65 deletions.
@@ -1,8 +1,17 @@
import { promises as fs } from 'node:fs';

import type { DictionaryInformation } from '@cspell/cspell-types';
import type { ITrie } from 'cspell-trie-lib';
import { describe, expect, test } from 'vitest';

import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary.js';
import type { SpellingDictionaryOptions } from './SpellingDictionary.js';
import type { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie.js';

const urlPackageRoot = new URL('../../', import.meta.url);
const urlRepoRoot = new URL('../../', urlPackageRoot);
const urlTestFixtures = new URL('test-fixtures/', urlRepoRoot);
const urlTestFixturesIssues = new URL('issues/', urlTestFixtures);

describe('Validate createSpellingDictionary', () => {
test('createFailedToLoadDictionary', () => {
Expand Down Expand Up @@ -79,6 +88,43 @@ describe('Validate createSpellingDictionary', () => {
});
});

describe('test-fixtures', () => {
function readFixtureFile(name: string | URL): Promise<string> {
return fs.readFile(new URL(name, urlTestFixtures), 'utf8');
}

test('issue-5222', async () => {
const url = new URL('issue-5222/words.txt', urlTestFixturesIssues);
const words = (await readFixtureFile(url))
.split('\n')
.map((a) => a.trim())
.filter((a) => !!a);
const dict = createSpellingDictionary(words, 'issue-5222', url.toString(), {});
const trie = (dict as SpellingDictionaryFromTrie).trie;
const setOfWords = new Set(words);
for (const word of trie.words()) {
expect(word.startsWith('~') || setOfWords.has(word), `to have "${word}"`).toBe(true);
}
expect(trie.size).toBeGreaterThan(0);
expect(size(trie)).toBeGreaterThan(0);
expect(dict.size).toBeGreaterThan(20);
});

function size(trie: ITrie): number {
// walk the trie and get the approximate size.
const i = trie.iterate();
let deeper = true;
let size = 0;
for (let r = i.next(); !r.done; r = i.next(deeper)) {
// count all nodes even though they are not words.
// because we are not going to all the leaves, this should give a good enough approximation.
size += 1;
deeper = r.value.text.length < 5;
}
return size;
}
});

function opts(opts: Partial<SpellingDictionaryOptions> = {}): SpellingDictionaryOptions {
return {
weightMap: undefined,
Expand Down
19 changes: 10 additions & 9 deletions packages/cspell-lib/src/lib/textValidation/docValidator.test.ts
Expand Up @@ -132,12 +132,14 @@ describe('docValidator', () => {

// cspell:ignore kount naame colector Reciever reciever recievers serrors dockblock
test.each`
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${fix('sample-with-errors.ts')} | ${undefined} | ${['dockblock', 'Helllo']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${undefined} | ${['reciever', 'naame', 'naame', 'naame', 'reciever', 'Reciever', 'naame', 'Reciever', 'naame', 'kount', 'Reciever', 'kount', 'colector', 'recievers', 'Reciever', 'recievers', 'recievers']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${1} | ${['reciever', 'naame', 'Reciever', 'kount', 'colector', 'recievers']} | ${undefined}
${fix('parser/sample.ts')} | ${1} | ${['serrors']} | ${['\\x73errors']}
${fix('sample-with-directives-errors.ts')} | ${1} | ${['disable-prev', 'ignored', 'world', 'enable-line']} | ${undefined}
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${fix('sample-with-errors.ts')} | ${undefined} | ${['dockblock', 'Helllo']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${undefined} | ${['reciever', 'naame', 'naame', 'naame', 'reciever', 'Reciever', 'naame', 'Reciever', 'naame', 'kount', 'Reciever', 'kount', 'colector', 'recievers', 'Reciever', 'recievers', 'recievers']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${1} | ${['reciever', 'naame', 'Reciever', 'kount', 'colector', 'recievers']} | ${undefined}
${fix('parser/sample.ts')} | ${1} | ${['serrors']} | ${['\\x73errors']}
${fix('sample-with-directives-errors.ts')} | ${1} | ${['disable-prev', 'ignored', 'world', 'enable-line']} | ${undefined}
${tFix('issues/issue-4811/#local/README.md')} | ${undefined} | ${[]} | ${undefined}
${tFix('issues/issue-4811/#local/version@2.md')} | ${undefined} | ${['marrkdown']} | ${undefined /* cspell:disable-line */}
`(
'checkDocument $filename $maxDuplicateProblems',
async ({ filename, maxDuplicateProblems, expectedIssues, expectedRawIssues }) => {
Expand All @@ -152,9 +154,8 @@ describe('docValidator', () => {
);

test.each`
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${tFix('issues/issue-4811/#local/README.md')} | ${undefined} | ${[]} | ${undefined}
${tFix('issues/issue-4811/#local/version@2.md')} | ${undefined} | ${['marrkdown']} | ${undefined /* cspell:disable-line */}
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${tFix('issues/issue-5222/README.md')} | ${undefined} | ${[]} | ${undefined}
`(
'checkDocument $filename $maxDuplicateProblems',
async ({ filename, maxDuplicateProblems, expectedIssues, expectedRawIssues }) => {
Expand Down

0 comments on commit 18f4866

Please sign in to comment.