Skip to content

Commit

Permalink
fix: improve replace map used with dictionaries (#3679)
Browse files Browse the repository at this point in the history
* create IgnoreWordsDictionary
* improve search speed.
* improve replace map used with dictionaries.

Related to #3592 

* fix: Add IgnoreWordsDictionary to cspell-dictionary
* fix: Fix in document directive detection.
* fix: add createIgnoreWordsDictionary to cspell-lib
* set IgnoreWordsDictionary containsNoSuggestWords
  • Loading branch information
Jason3S committed Sep 29, 2022
1 parent 5d34d46 commit 9455ccc
Show file tree
Hide file tree
Showing 17 changed files with 398 additions and 59 deletions.
@@ -0,0 +1,92 @@
import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';

// const oc = expect.objectContaining;

// cspell:ignore êphone îphone geschäft

describe('IgnoreWordsDictionary', () => {
const dictWords = [' English', 'grumpy', 'Avocado', 'avocados', 'Café', ' êphone', 'îphone', 'geschäft'];
const dict = createIgnoreWordsDictionary(dictWords, 'ignore words', 'test');

test.each`
word | expected
${''} | ${false}
${'avocado'} | ${true}
${'Avocado'} | ${true}
${'english'} | ${true}
${'English'} | ${true}
${'grumpy'} | ${true}
${'Grumpy'} | ${true}
`('has of "$word"', async ({ word, expected }) => {
expect(dict.has(word)).toEqual(expected);
});

test.each`
word | options | expected
${'avocado'} | ${undefined} | ${{ found: 'avocado', forbidden: false, noSuggest: true }}
${'Avocado'} | ${undefined} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
${'Avocado'} | ${{ ignoreCase: true }} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
${''} | ${undefined} | ${undefined}
${'English'} | ${{ ignoreCase: true }} | ${{ found: 'English', forbidden: false, noSuggest: true }}
${'English'} | ${{ ignoreCase: false }} | ${{ found: 'English', forbidden: false, noSuggest: true }}
${'english'} | ${{ ignoreCase: true }} | ${{ found: 'english', forbidden: false, noSuggest: true }}
${'english'} | ${{ ignoreCase: false }} | ${undefined}
${'îphone'.normalize('NFC')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
${'îphone'.normalize('NFD')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
${'iphone'} | ${undefined} | ${{ found: 'iphone', forbidden: false, noSuggest: true }}
`('find "$word" $options', async ({ word, options, expected }) => {
expect(dict.find(word, options)).toEqual(expected);
});

test.each`
word | ignoreCase | expected
${''} | ${undefined} | ${false}
${'avocado'} | ${false} | ${false}
${'avocado'} | ${undefined} | ${true}
${'avocado'} | ${true} | ${true}
${'Avocado'} | ${undefined} | ${true}
${'avocadoS'} | ${undefined} | ${true}
${'AvocadoS'} | ${undefined} | ${true}
${'Café'} | ${false} | ${true}
${'cafe'} | ${false} | ${false}
${'cafe'} | ${true} | ${true}
${'îphone'.normalize('NFC')} | ${true} | ${true}
${'îphone'.normalize('NFD')} | ${true} | ${true}
${'english'} | ${undefined} | ${true}
${'English'} | ${undefined} | ${true}
${'grumpy'} | ${undefined} | ${true}
${'Grumpy'} | ${undefined} | ${true}
`('isNoSuggestWord of "$word" ignoreCase: $ignoreCase', async ({ word, ignoreCase, expected }) => {
expect(dict.isNoSuggestWord(word, { ignoreCase })).toEqual(expected);
});

test.each`
word | expected
${'avocado'} | ${false}
${'Avocado'} | ${false}
${''} | ${false}
${'English'} | ${false}
${'english'} | ${false}
${'avocadoS'} | ${false}
${'AvocadoS'} | ${false}
${'grumpy'} | ${false}
${'Grumpy'} | ${false}
`('isForbidden of "$word"', async ({ word, expected }) => {
expect(dict.isForbidden(word)).toEqual(expected);
});

test.each`
word | expected
${'avocado'} | ${[]}
${'Avocado'} | ${[]}
${''} | ${[]}
${'English'} | ${[]}
${'english'} | ${[]}
${'avocadoS'} | ${[]}
${'AvocadoS'} | ${[]}
${'grumpy'} | ${[]}
${'Grumpy'} | ${[]}
`('suggest of "$word"', async ({ word, expected }) => {
expect(dict.suggest(word)).toEqual(expected);
});
});
@@ -0,0 +1,128 @@
import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
import { CompoundWordsMethod, parseDictionaryLines, SuggestionResult } from 'cspell-trie-lib';
import {
FindResult,
HasOptions,
SearchOptions,
SpellingDictionary,
SpellingDictionaryOptions,
SuggestOptions,
} from './SpellingDictionary';
import * as Defaults from './defaults';
import { createSpellingDictionary } from './createSpellingDictionary';

const NormalizeForm = 'NFC' as const;

class IgnoreWordsDictionary implements SpellingDictionary {
private dict: Set<string>;
private dictNonStrict: Set<string>;
readonly containsNoSuggestWords = true;
readonly options: SpellingDictionaryOptions = {};
readonly type = 'forbidden';
constructor(readonly name: string, readonly source: string, words: Iterable<string>) {
this.dict = new Set(words);
this.dictNonStrict = new Set(
pipe(
this.dict,
opFilter((w) => w.startsWith('~')),
opMap((w) => w.slice(1))
)
);
}

/**
* A Forbidden word list does not "have" valid words.
* Therefore it always returns false.
* @param _word - the word
* @param _options - options
* @returns always false
*/
has(word: string, options?: HasOptions): boolean {
const nWord = word.normalize(NormalizeForm);
if (this.dict.has(nWord)) return true;
const lcWord = nWord.toLowerCase();
if (this.dict.has(lcWord)) return true;
const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
return ignoreCase && (this.dictNonStrict.has(nWord) || this.dictNonStrict.has(lcWord));
}

/** A more detailed search for a word, might take longer than `has` */
find(word: string, options?: SearchOptions): FindResult | undefined {
const nWord = word.normalize(NormalizeForm);
if (this.dict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
const lcWord = nWord.toLowerCase();
if (this.dict.has(lcWord)) return { found: lcWord, forbidden: false, noSuggest: true };

const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
if (!ignoreCase) return undefined;

if (this.dictNonStrict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
return (this.dictNonStrict.has(lcWord) && { found: lcWord, forbidden: false, noSuggest: true }) || undefined;
}

isForbidden(_word: string): boolean {
return false;
}

isNoSuggestWord(word: string, options: HasOptions): boolean {
return this.has(word, options);
}

suggest(
word: string,
numSuggestions?: number,
compoundMethod?: CompoundWordsMethod,
numChanges?: number,
ignoreCase?: boolean
): SuggestionResult[];
suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[];
suggest() {
return [];
}
genSuggestions(): void {
return;
}
mapWord(word: string): string {
return word;
}
get size() {
return this.dict.size;
}
readonly isDictionaryCaseSensitive: boolean = true;
getErrors?(): Error[] {
return [];
}
}

/**
* Create a dictionary where all words are to be ignored.
* Ignored words override forbidden words.
* @param wordList - list of words
* @param name - name of dictionary
* @param source - dictionary source
* @returns
*/
export function createIgnoreWordsDictionary(
wordList: readonly string[],
name: string,
source: string
): SpellingDictionary {
const testSpecialCharacters = /[*+]/;

const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) =>
w.normalize(NormalizeForm)
);

const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0;

if (hasSpecial) {
return createSpellingDictionary(words, name, source, {
caseSensitive: true,
noSuggest: true,
weightMap: undefined,
supportNonStrictSearches: true,
});
}

return new IgnoreWordsDictionary(name, source, words);
}
Expand Up @@ -2,6 +2,7 @@ import * as Trie from 'cspell-trie-lib';
import { SpellingDictionaryOptions } from '.';
import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary';
import { createForbiddenWordsDictionary } from './ForbiddenWordsDictionary';
import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';
import { CompoundWordsMethod } from './SpellingDictionary';
import { createCollection } from './SpellingDictionaryCollection';
import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';
Expand Down Expand Up @@ -29,12 +30,14 @@ describe('Verify using multiple dictionaries', () => {
const wordsG = ['café', 'accent'];

const wordsLegacy = ['error', 'code', 'system', 'ctrl'];
const wordsIgnore = ['ignored'];

// cspell:ignore pinkberry behaviour colour
const wordsNoSug = ['colour', 'behaviour', 'favour', 'pinkberry'];

const dictNoSug = createSpellingDictionary(wordsNoSug, 'words-no-suggest', 'test', opts({ noSuggest: true }));
const dictLegacy = createSpellingDictionary(wordsLegacy, 'legacy-dict', 'test', opts({ useCompounds: true }));
const dictIgnore = createIgnoreWordsDictionary(wordsIgnore, '[ignore]', 'test');

test.each`
word | expected
Expand Down Expand Up @@ -62,6 +65,28 @@ describe('Verify using multiple dictionaries', () => {
expect(dictCollection.has(word)).toEqual(expected);
});

test.each`
word | expected
${'colour'} | ${true}
${'Colour'} | ${true}
${'tree'} | ${false}
${''} | ${false}
${'ignored'} | ${true}
${'Ignored'} | ${true}
${'behaviour'} | ${true}
${'guava'} | ${false}
`('isNoSuggestWord "$word"', async ({ word, expected }) => {
const dicts = await Promise.all([
createSpellingDictionary(wordsA, 'wordsA', 'test', opts()),
createForbiddenWordsDictionary(['behaviour', 'guava', 'Ignored', 'Colour'], 'flag_words', 'test'),
dictNoSug,
dictIgnore,
]);

const dictCollection = createCollection(dicts, 'test');
expect(dictCollection.isNoSuggestWord(word, {})).toEqual(expected);
});

test('has for forbidden word.', () => {
const word = 'guava';
const expected = false;
Expand Down Expand Up @@ -220,6 +245,7 @@ describe('Verify using multiple dictionaries', () => {
${'áccent'} | ${true /* ignore the accent. cspell:disable-line */}
${'a\u0301ccent'} | ${true /* ignore the accent. cspell:disable-line */}
${'applé'} | ${true /* ignore the accent. cspell:disable-line */}
${'ignored'} | ${true}
`('checks has word: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', { dictionaryInformation: { ignore: '\u0300-\u0362' } }),
Expand All @@ -232,6 +258,7 @@ describe('Verify using multiple dictionaries', () => {
caseSensitive: true,
}),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand All @@ -248,6 +275,7 @@ describe('Verify using multiple dictionaries', () => {
${'pinkbug'} | ${{ found: 'pinkbug', forbidden: false, noSuggest: false }}
${'colour'} | ${{ found: 'colour', forbidden: false, noSuggest: true }}
${'behaviour'} | ${{ found: 'behaviour', forbidden: false, noSuggest: true }}
${'ignored'} | ${{ found: 'ignored', forbidden: false, noSuggest: true }}
`('find: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
Expand All @@ -257,6 +285,7 @@ describe('Verify using multiple dictionaries', () => {
createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictNoSug,
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down Expand Up @@ -287,6 +316,7 @@ describe('Verify using multiple dictionaries', () => {
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictNoSug,
dictLegacy,
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down Expand Up @@ -328,6 +358,7 @@ describe('Verify using multiple dictionaries', () => {
${'bug'} | ${[sr('bug', 5)]}
${'blackberry'} | ${[sr('blackberry', 0), sr('black berry', 98)]}
${'stinkbug'} | ${[sr('stink bug', 103), sr('pinkbug', 198)]}
${'ignored'} | ${[]}
`('checks suggestions word: "$word"', ({ word, expected }) => {
const dicts = [
createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
Expand All @@ -336,6 +367,7 @@ describe('Verify using multiple dictionaries', () => {
createSpellingDictionary(wordsD, 'wordsD', 'test', undefined),
createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
dictIgnore,
];

const dictCollection = createCollection(dicts, 'test');
Expand Down
@@ -0,0 +1,18 @@
import { createMapper } from '../util/repMap';
import { __testing__ } from './SpellingDictionaryFromTrie';

const { outerWordForms } = __testing__;

// cspell:ignore guenstig günstig

describe('SpellingDictionaryFromTrie', () => {
test.each`
word | repMap | expected
${'hello'} | ${undefined} | ${['hello']}
${'guenstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['guenstig', 'günstig']}
${'günstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['günstig', 'günstig'.normalize('NFD')]}
`('outerWordForms $word', ({ word, repMap, expected }) => {
const mapWord = createMapper(repMap);
expect(outerWordForms(word, mapWord ?? ((a) => a))).toEqual(new Set(expected));
});
});

0 comments on commit 9455ccc

Please sign in to comment.