Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support complex word lists #5233

Merged
merged 4 commits into from Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions cspell.json
Expand Up @@ -34,6 +34,7 @@
"*.{png,jpg,pdf,svg}",
"*.cpuprofile",
"*.heapprofile",
"emoji*.txt",
"**/.docusaurus/**",
"**/.gitignore",
"**/.vscode/**",
Expand Down
@@ -1,8 +1,17 @@
import { promises as fs } from 'node:fs';

import type { DictionaryInformation } from '@cspell/cspell-types';
import { buildITrieFromWords, type ITrie, parseDictionaryLines } from 'cspell-trie-lib';
import { describe, expect, test } from 'vitest';

import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary.js';
import type { SpellingDictionaryOptions } from './SpellingDictionary.js';
import type { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie.js';

const urlPackageRoot = new URL('../../', import.meta.url);
const urlRepoRoot = new URL('../../', urlPackageRoot);
const urlTestFixtures = new URL('test-fixtures/', urlRepoRoot);
const urlTestFixturesIssues = new URL('issues/', urlTestFixtures);

describe('Validate createSpellingDictionary', () => {
test('createFailedToLoadDictionary', () => {
Expand Down Expand Up @@ -79,6 +88,54 @@ describe('Validate createSpellingDictionary', () => {
});
});

describe('test-fixtures', () => {
function readFixtureFile(name: string | URL): Promise<string> {
return fs.readFile(new URL(name, urlTestFixtures), 'utf8');
}

test('issue-5222', async () => {
const url = new URL('issue-5222/words.txt', urlTestFixturesIssues);
const words = (await readFixtureFile(url))
.normalize('NFC')
.split('\n')
.map((a) => a.trim())
.filter((a) => !!a);
const dict = createSpellingDictionary(words, 'issue-5222', url.toString(), {});
const lines = [...parseDictionaryLines(words)];
const bt = buildITrieFromWords(lines);
const trie = (dict as SpellingDictionaryFromTrie).trie;
for (const line of lines) {
expect(line.normalize('NFC')).toBe(line);
expect(bt.has(line), `bt to have "${line}"`).toBe(true);
expect(trie.has(line), `trie to have "${line}"`).toBe(true);
}
const setOfWords = new Set(words);
for (const word of setOfWords) {
expect(trie.has(word), `trie to have "${word}"`).toBe(true);
}
for (const word of trie.words()) {
expect(word.startsWith('~') || setOfWords.has(word), `to have "${word}"`).toBe(true);
}
expect(trie.size).toBeGreaterThan(0);
expect(size(trie)).toBeGreaterThan(0);
expect(dict.size).toBeGreaterThan(20);
});

function size(trie: ITrie): number {
// walk the trie and get the approximate size.
const i = trie.iterate();
let deeper = true;
let size = 0;
for (let r = i.next(); !r.done; r = i.next(deeper)) {
// count all nodes even though they are not words.
// because we are not going to all the leaves, this should give a good enough approximation.
size += 1;
deeper = r.value.text.length < 5;
}
return size;
}
});

function opts(opts: Partial<SpellingDictionaryOptions> = {}): SpellingDictionaryOptions {
return {
weightMap: undefined,
Expand Down
19 changes: 10 additions & 9 deletions packages/cspell-lib/src/lib/textValidation/docValidator.test.ts
Expand Up @@ -132,12 +132,14 @@ describe('docValidator', () => {

// cspell:ignore kount naame colector Reciever reciever recievers serrors dockblock
test.each`
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${fix('sample-with-errors.ts')} | ${undefined} | ${['dockblock', 'Helllo']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${undefined} | ${['reciever', 'naame', 'naame', 'naame', 'reciever', 'Reciever', 'naame', 'Reciever', 'naame', 'kount', 'Reciever', 'kount', 'colector', 'recievers', 'Reciever', 'recievers', 'recievers']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${1} | ${['reciever', 'naame', 'Reciever', 'kount', 'colector', 'recievers']} | ${undefined}
${fix('parser/sample.ts')} | ${1} | ${['serrors']} | ${['\\x73errors']}
${fix('sample-with-directives-errors.ts')} | ${1} | ${['disable-prev', 'ignored', 'world', 'enable-line']} | ${undefined}
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${fix('sample-with-errors.ts')} | ${undefined} | ${['dockblock', 'Helllo']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${undefined} | ${['reciever', 'naame', 'naame', 'naame', 'reciever', 'Reciever', 'naame', 'Reciever', 'naame', 'kount', 'Reciever', 'kount', 'colector', 'recievers', 'Reciever', 'recievers', 'recievers']} | ${undefined}
${fix('sample-with-many-errors.ts')} | ${1} | ${['reciever', 'naame', 'Reciever', 'kount', 'colector', 'recievers']} | ${undefined}
${fix('parser/sample.ts')} | ${1} | ${['serrors']} | ${['\\x73errors']}
${fix('sample-with-directives-errors.ts')} | ${1} | ${['disable-prev', 'ignored', 'world', 'enable-line']} | ${undefined}
${tFix('issues/issue-4811/#local/README.md')} | ${undefined} | ${[]} | ${undefined}
${tFix('issues/issue-4811/#local/version@2.md')} | ${undefined} | ${['marrkdown']} | ${undefined /* cspell:disable-line */}
`(
'checkDocument $filename $maxDuplicateProblems',
async ({ filename, maxDuplicateProblems, expectedIssues, expectedRawIssues }) => {
Expand All @@ -152,9 +154,8 @@ describe('docValidator', () => {
);

test.each`
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${tFix('issues/issue-4811/#local/README.md')} | ${undefined} | ${[]} | ${undefined}
${tFix('issues/issue-4811/#local/version@2.md')} | ${undefined} | ${['marrkdown']} | ${undefined /* cspell:disable-line */}
filename | maxDuplicateProblems | expectedIssues | expectedRawIssues
${tFix('issues/issue-5222/README.md')} | ${undefined} | ${[]} | ${undefined}
`(
'checkDocument $filename $maxDuplicateProblems',
async ({ filename, maxDuplicateProblems, expectedIssues, expectedRawIssues }) => {
Expand Down