Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve replace map used with dictionaries #3679

Merged
merged 4 commits into from Sep 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,92 @@
import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';

// const oc = expect.objectContaining;

// cspell:ignore êphone îphone geschäft

describe('IgnoreWordsDictionary', () => {
const dictWords = [' English', 'grumpy', 'Avocado', 'avocados', 'Café', ' êphone', 'îphone', 'geschäft'];
const dict = createIgnoreWordsDictionary(dictWords, 'ignore words', 'test');

test.each`
word | expected
${''} | ${false}
${'avocado'} | ${true}
${'Avocado'} | ${true}
${'english'} | ${true}
${'English'} | ${true}
${'grumpy'} | ${true}
${'Grumpy'} | ${true}
`('has of "$word"', async ({ word, expected }) => {
expect(dict.has(word)).toEqual(expected);
});

test.each`
word | options | expected
${'avocado'} | ${undefined} | ${{ found: 'avocado', forbidden: false, noSuggest: true }}
${'Avocado'} | ${undefined} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
${'Avocado'} | ${{ ignoreCase: true }} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
${''} | ${undefined} | ${undefined}
${'English'} | ${{ ignoreCase: true }} | ${{ found: 'English', forbidden: false, noSuggest: true }}
${'English'} | ${{ ignoreCase: false }} | ${{ found: 'English', forbidden: false, noSuggest: true }}
${'english'} | ${{ ignoreCase: true }} | ${{ found: 'english', forbidden: false, noSuggest: true }}
${'english'} | ${{ ignoreCase: false }} | ${undefined}
${'îphone'.normalize('NFC')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
${'îphone'.normalize('NFD')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
${'iphone'} | ${undefined} | ${{ found: 'iphone', forbidden: false, noSuggest: true }}
`('find "$word" $options', async ({ word, options, expected }) => {
expect(dict.find(word, options)).toEqual(expected);
});

test.each`
word | ignoreCase | expected
${''} | ${undefined} | ${false}
${'avocado'} | ${false} | ${false}
${'avocado'} | ${undefined} | ${true}
${'avocado'} | ${true} | ${true}
${'Avocado'} | ${undefined} | ${true}
${'avocadoS'} | ${undefined} | ${true}
${'AvocadoS'} | ${undefined} | ${true}
${'Café'} | ${false} | ${true}
${'cafe'} | ${false} | ${false}
${'cafe'} | ${true} | ${true}
${'îphone'.normalize('NFC')} | ${true} | ${true}
${'îphone'.normalize('NFD')} | ${true} | ${true}
${'english'} | ${undefined} | ${true}
${'English'} | ${undefined} | ${true}
${'grumpy'} | ${undefined} | ${true}
${'Grumpy'} | ${undefined} | ${true}
`('isNoSuggestWord of "$word" ignoreCase: $ignoreCase', async ({ word, ignoreCase, expected }) => {
expect(dict.isNoSuggestWord(word, { ignoreCase })).toEqual(expected);
});

test.each`
word | expected
${'avocado'} | ${false}
${'Avocado'} | ${false}
${''} | ${false}
${'English'} | ${false}
${'english'} | ${false}
${'avocadoS'} | ${false}
${'AvocadoS'} | ${false}
${'grumpy'} | ${false}
${'Grumpy'} | ${false}
`('isForbidden of "$word"', async ({ word, expected }) => {
expect(dict.isForbidden(word)).toEqual(expected);
});

test.each`
word | expected
${'avocado'} | ${[]}
${'Avocado'} | ${[]}
${''} | ${[]}
${'English'} | ${[]}
${'english'} | ${[]}
${'avocadoS'} | ${[]}
${'AvocadoS'} | ${[]}
${'grumpy'} | ${[]}
${'Grumpy'} | ${[]}
`('suggest of "$word"', async ({ word, expected }) => {
expect(dict.suggest(word)).toEqual(expected);
});
});
@@ -0,0 +1,128 @@
import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
import { CompoundWordsMethod, parseDictionaryLines, SuggestionResult } from 'cspell-trie-lib';
import {
FindResult,
HasOptions,
SearchOptions,
SpellingDictionary,
SpellingDictionaryOptions,
SuggestOptions,
} from './SpellingDictionary';
import * as Defaults from './defaults';
import { createSpellingDictionary } from './createSpellingDictionary';

const NormalizeForm = 'NFC' as const;

class IgnoreWordsDictionary implements SpellingDictionary {
private dict: Set<string>;
private dictNonStrict: Set<string>;
readonly containsNoSuggestWords = true;
readonly options: SpellingDictionaryOptions = {};
readonly type = 'forbidden';
constructor(readonly name: string, readonly source: string, words: Iterable<string>) {
this.dict = new Set(words);
this.dictNonStrict = new Set(
pipe(
this.dict,
opFilter((w) => w.startsWith('~')),
opMap((w) => w.slice(1))
)
);
}

/**
* A Forbidden word list does not "have" valid words.
* Therefore it always returns false.
* @param _word - the word
* @param _options - options
* @returns always false
*/
has(word: string, options?: HasOptions): boolean {
const nWord = word.normalize(NormalizeForm);
if (this.dict.has(nWord)) return true;
const lcWord = nWord.toLowerCase();
if (this.dict.has(lcWord)) return true;
const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
return ignoreCase && (this.dictNonStrict.has(nWord) || this.dictNonStrict.has(lcWord));
}

/** A more detailed search for a word, might take longer than `has` */
find(word: string, options?: SearchOptions): FindResult | undefined {
const nWord = word.normalize(NormalizeForm);
if (this.dict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
const lcWord = nWord.toLowerCase();
if (this.dict.has(lcWord)) return { found: lcWord, forbidden: false, noSuggest: true };

const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
if (!ignoreCase) return undefined;

if (this.dictNonStrict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
return (this.dictNonStrict.has(lcWord) && { found: lcWord, forbidden: false, noSuggest: true }) || undefined;
}

isForbidden(_word: string): boolean {
return false;
}

isNoSuggestWord(word: string, options: HasOptions): boolean {
return this.has(word, options);
}

suggest(
word: string,
numSuggestions?: number,
compoundMethod?: CompoundWordsMethod,
numChanges?: number,
ignoreCase?: boolean
): SuggestionResult[];
suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[];
suggest() {
return [];
}
genSuggestions(): void {
return;
}
mapWord(word: string): string {
return word;
}
get size() {
return this.dict.size;
}
readonly isDictionaryCaseSensitive: boolean = true;
getErrors?(): Error[] {
return [];
}
}

/**
* Create a dictionary where all words are to be ignored.
* Ignored words override forbidden words.
* @param wordList - list of words
* @param name - name of dictionary
* @param source - dictionary source
* @returns
*/
export function createIgnoreWordsDictionary(
wordList: readonly string[],
name: string,
source: string
): SpellingDictionary {
const testSpecialCharacters = /[*+]/;

const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) =>
w.normalize(NormalizeForm)
);

const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0;

if (hasSpecial) {
return createSpellingDictionary(words, name, source, {
caseSensitive: true,
noSuggest: true,
weightMap: undefined,
supportNonStrictSearches: true,
});
}

return new IgnoreWordsDictionary(name, source, words);
}
Expand Up @@ -2,6 +2,7 @@ import * as Trie from 'cspell-trie-lib';
import { SpellingDictionaryOptions } from '.';
import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary';
import { createForbiddenWordsDictionary } from './ForbiddenWordsDictionary';
import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';
import { CompoundWordsMethod } from './SpellingDictionary';
import { createCollection } from './SpellingDictionaryCollection';
import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';
Expand Down Expand Up @@ -29,12 +30,14 @@ describe('Verify using multiple dictionaries', () => {
const wordsG = ['café', 'accent'];

const wordsLegacy = ['error', 'code', 'system', 'ctrl'];
const wordsIgnore = ['ignored'];

// cspell:ignore pinkberry behaviour colour
const wordsNoSug = ['colour', 'behaviour', 'favour', 'pinkberry'];

const dictNoSug = createSpellingDictionary(wordsNoSug, 'words-no-suggest', 'test', opts({ noSuggest: true }));
const dictLegacy = createSpellingDictionary(wordsLegacy, 'legacy-dict', 'test', opts({ useCompounds: true }));
const dictIgnore = createIgnoreWordsDictionary(wordsIgnore, '[ignore]', 'test');

test.each`
word | expected
Expand Down Expand Up @@ -62,6 +65,28 @@ describe('Verify using multiple dictionaries', () => {
expect(dictCollection.has(word)).toEqual(expected);
});

test.each`
word | expected
${'colour'} | ${true}
${'Colour'} | ${true}
${'tree'} | ${false}
${''} | ${false}
${'ignored'} | ${true}
${'Ignored'} | ${true}
${'behaviour'} | ${true}
${'guava'} | ${false}
`('isNoSuggestWord "$word"', async ({ word, expected }) => {
const dicts = await Promise.all([
createSpellingDictionary(wordsA, 'wordsA', 'test', opts()),
createForbiddenWordsDictionary(['behaviour', 'guava', 'Ignored', 'Colour'], 'flag_words', 'test'),
dictNoSug,
dictIgnore,
]);

const dictCollection = createCollection(dicts, 'test');
expect(dictCollection.isNoSuggestWord(word, {})).toEqual(expected);
});

test('has for forbidden word.', () => {
const word = 'guava';
const expected = false;
Expand Down Expand Up @@ -220,6 +245,7 @@ describe('Verify using multiple dictionaries', () => {
${'áccent'} | ${true /* ignore the accent. cspell:disable-line */}
${'a\u0301ccent'} | ${true /* ignore the accent. cspell:disable-line */}
${'applé'} | ${true /* ignore the accent. cspell:disable-line */}
${'ignored'} | ${true}
`('checks has word: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', { dictionaryInformation: { ignore: '\u0300-\u0362' } }),
Expand All @@ -232,6 +258,7 @@ describe('Verify using multiple dictionaries', () => {
caseSensitive: true,
}),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand All @@ -248,6 +275,7 @@ describe('Verify using multiple dictionaries', () => {
${'pinkbug'} | ${{ found: 'pinkbug', forbidden: false, noSuggest: false }}
${'colour'} | ${{ found: 'colour', forbidden: false, noSuggest: true }}
${'behaviour'} | ${{ found: 'behaviour', forbidden: false, noSuggest: true }}
${'ignored'} | ${{ found: 'ignored', forbidden: false, noSuggest: true }}
`('find: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
Expand All @@ -257,6 +285,7 @@ describe('Verify using multiple dictionaries', () => {
createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictNoSug,
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down Expand Up @@ -287,6 +316,7 @@ describe('Verify using multiple dictionaries', () => {
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictNoSug,
dictLegacy,
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down Expand Up @@ -328,6 +358,7 @@ describe('Verify using multiple dictionaries', () => {
${'bug'} | ${[sr('bug', 5)]}
${'blackberry'} | ${[sr('blackberry', 0), sr('black berry', 98)]}
${'stinkbug'} | ${[sr('stink bug', 103), sr('pinkbug', 198)]}
${'ignored'} | ${[]}
`('checks suggestions word: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
Expand All @@ -336,6 +367,7 @@ describe('Verify using multiple dictionaries', () => {
createSpellingDictionary(wordsD, 'wordsD', 'test', undefined),
createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down
@@ -0,0 +1,18 @@
import { createMapper } from '../util/repMap';
import { __testing__ } from './SpellingDictionaryFromTrie';

const { outerWordForms } = __testing__;

// cspell:ignore guenstig günstig

describe('SpellingDictionaryFromTrie', () => {
test.each`
word | repMap | expected
${'hello'} | ${undefined} | ${['hello']}
${'guenstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['guenstig', 'günstig']}
${'günstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['günstig', 'günstig'.normalize('NFD')]}
`('outerWordForms $word', ({ word, repMap, expected }) => {
const mapWord = createMapper(repMap);
expect(outerWordForms(word, mapWord ?? ((a) => a))).toEqual(new Set(expected));
});
});