fix: improve replace map used with dictionaries (#3679)

* create IgnoreWordsDictionary * improve search speed. * improve replace map used with dictionaries. Related to #3592 * fix: Add IgnoreWordsDictionary to cspell-dictionary * fix: Fix in document directive detection. * fix: add createIgnoreWordsDictionary to cspell-lib * set IgnoreWordsDictionary containsNoSuggestWords
streetsidesoftware · Sep 29, 2022 · 9455ccc · 9455ccc
1 parent 5d34d46
commit 9455ccc
Show file tree

Hide file tree

Showing 17 changed files with 398 additions and 59 deletions.
diff --git a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts
@@ -0,0 +1,92 @@
+import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';
+
+// const oc = expect.objectContaining;
+
+// cspell:ignore êphone îphone geschäft
+
+describe('IgnoreWordsDictionary', () => {
+    const dictWords = ['  English', 'grumpy', 'Avocado', 'avocados', 'Café', ' êphone', 'îphone', 'geschäft'];
+    const dict = createIgnoreWordsDictionary(dictWords, 'ignore words', 'test');
+
+    test.each`
+        word         | expected
+        ${''}        | ${false}
+        ${'avocado'} | ${true}
+        ${'Avocado'} | ${true}
+        ${'english'} | ${true}
+        ${'English'} | ${true}
+        ${'grumpy'}  | ${true}
+        ${'Grumpy'}  | ${true}
+    `('has of "$word"', async ({ word, expected }) => {
+        expect(dict.has(word)).toEqual(expected);
+    });
+
+    test.each`
+        word                         | options                  | expected
+        ${'avocado'}                 | ${undefined}             | ${{ found: 'avocado', forbidden: false, noSuggest: true }}
+        ${'Avocado'}                 | ${undefined}             | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
+        ${'Avocado'}                 | ${{ ignoreCase: true }}  | ${{ found: 'Avocado', forbidden: false, noSuggest: true }}
+        ${''}                        | ${undefined}             | ${undefined}
+        ${'English'}                 | ${{ ignoreCase: true }}  | ${{ found: 'English', forbidden: false, noSuggest: true }}
+        ${'English'}                 | ${{ ignoreCase: false }} | ${{ found: 'English', forbidden: false, noSuggest: true }}
+        ${'english'}                 | ${{ ignoreCase: true }}  | ${{ found: 'english', forbidden: false, noSuggest: true }}
+        ${'english'}                 | ${{ ignoreCase: false }} | ${undefined}
+        ${'îphone'.normalize('NFC')} | ${undefined}             | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
+        ${'îphone'.normalize('NFD')} | ${undefined}             | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }}
+        ${'iphone'}                  | ${undefined}             | ${{ found: 'iphone', forbidden: false, noSuggest: true }}
+    `('find "$word" $options', async ({ word, options, expected }) => {
+        expect(dict.find(word, options)).toEqual(expected);
+    });
+
+    test.each`
+        word                         | ignoreCase   | expected
+        ${''}                        | ${undefined} | ${false}
+        ${'avocado'}                 | ${false}     | ${false}
+        ${'avocado'}                 | ${undefined} | ${true}
+        ${'avocado'}                 | ${true}      | ${true}
+        ${'Avocado'}                 | ${undefined} | ${true}
+        ${'avocadoS'}                | ${undefined} | ${true}
+        ${'AvocadoS'}                | ${undefined} | ${true}
+        ${'Café'}                    | ${false}     | ${true}
+        ${'cafe'}                    | ${false}     | ${false}
+        ${'cafe'}                    | ${true}      | ${true}
+        ${'îphone'.normalize('NFC')} | ${true}      | ${true}
+        ${'îphone'.normalize('NFD')} | ${true}      | ${true}
+        ${'english'}                 | ${undefined} | ${true}
+        ${'English'}                 | ${undefined} | ${true}
+        ${'grumpy'}                  | ${undefined} | ${true}
+        ${'Grumpy'}                  | ${undefined} | ${true}
+    `('isNoSuggestWord of "$word" ignoreCase: $ignoreCase', async ({ word, ignoreCase, expected }) => {
+        expect(dict.isNoSuggestWord(word, { ignoreCase })).toEqual(expected);
+    });
+
+    test.each`
+        word          | expected
+        ${'avocado'}  | ${false}
+        ${'Avocado'}  | ${false}
+        ${''}         | ${false}
+        ${'English'}  | ${false}
+        ${'english'}  | ${false}
+        ${'avocadoS'} | ${false}
+        ${'AvocadoS'} | ${false}
+        ${'grumpy'}   | ${false}
+        ${'Grumpy'}   | ${false}
+    `('isForbidden of "$word"', async ({ word, expected }) => {
+        expect(dict.isForbidden(word)).toEqual(expected);
+    });
+
+    test.each`
+        word          | expected
+        ${'avocado'}  | ${[]}
+        ${'Avocado'}  | ${[]}
+        ${''}         | ${[]}
+        ${'English'}  | ${[]}
+        ${'english'}  | ${[]}
+        ${'avocadoS'} | ${[]}
+        ${'AvocadoS'} | ${[]}
+        ${'grumpy'}   | ${[]}
+        ${'Grumpy'}   | ${[]}
+    `('suggest of "$word"', async ({ word, expected }) => {
+        expect(dict.suggest(word)).toEqual(expected);
+    });
+});
diff --git a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts
@@ -0,0 +1,128 @@
+import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync';
+import { CompoundWordsMethod, parseDictionaryLines, SuggestionResult } from 'cspell-trie-lib';
+import {
+    FindResult,
+    HasOptions,
+    SearchOptions,
+    SpellingDictionary,
+    SpellingDictionaryOptions,
+    SuggestOptions,
+} from './SpellingDictionary';
+import * as Defaults from './defaults';
+import { createSpellingDictionary } from './createSpellingDictionary';
+
+const NormalizeForm = 'NFC' as const;
+
+class IgnoreWordsDictionary implements SpellingDictionary {
+    private dict: Set<string>;
+    private dictNonStrict: Set<string>;
+    readonly containsNoSuggestWords = true;
+    readonly options: SpellingDictionaryOptions = {};
+    readonly type = 'forbidden';
+    constructor(readonly name: string, readonly source: string, words: Iterable<string>) {
+        this.dict = new Set(words);
+        this.dictNonStrict = new Set(
+            pipe(
+                this.dict,
+                opFilter((w) => w.startsWith('~')),
+                opMap((w) => w.slice(1))
+            )
+        );
+    }
+
+    /**
+     * A Forbidden word list does not "have" valid words.
+     * Therefore it always returns false.
+     * @param _word - the word
+     * @param _options - options
+     * @returns always false
+     */
+    has(word: string, options?: HasOptions): boolean {
+        const nWord = word.normalize(NormalizeForm);
+        if (this.dict.has(nWord)) return true;
+        const lcWord = nWord.toLowerCase();
+        if (this.dict.has(lcWord)) return true;
+        const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
+        return ignoreCase && (this.dictNonStrict.has(nWord) || this.dictNonStrict.has(lcWord));
+    }
+
+    /** A more detailed search for a word, might take longer than `has` */
+    find(word: string, options?: SearchOptions): FindResult | undefined {
+        const nWord = word.normalize(NormalizeForm);
+        if (this.dict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
+        const lcWord = nWord.toLowerCase();
+        if (this.dict.has(lcWord)) return { found: lcWord, forbidden: false, noSuggest: true };
+
+        const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase;
+        if (!ignoreCase) return undefined;
+
+        if (this.dictNonStrict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true };
+        return (this.dictNonStrict.has(lcWord) && { found: lcWord, forbidden: false, noSuggest: true }) || undefined;
+    }
+
+    isForbidden(_word: string): boolean {
+        return false;
+    }
+
+    isNoSuggestWord(word: string, options: HasOptions): boolean {
+        return this.has(word, options);
+    }
+
+    suggest(
+        word: string,
+        numSuggestions?: number,
+        compoundMethod?: CompoundWordsMethod,
+        numChanges?: number,
+        ignoreCase?: boolean
+    ): SuggestionResult[];
+    suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[];
+    suggest() {
+        return [];
+    }
+    genSuggestions(): void {
+        return;
+    }
+    mapWord(word: string): string {
+        return word;
+    }
+    get size() {
+        return this.dict.size;
+    }
+    readonly isDictionaryCaseSensitive: boolean = true;
+    getErrors?(): Error[] {
+        return [];
+    }
+}
+
+/**
+ * Create a dictionary where all words are to be ignored.
+ * Ignored words override forbidden words.
+ * @param wordList - list of words
+ * @param name - name of dictionary
+ * @param source - dictionary source
+ * @returns
+ */
+export function createIgnoreWordsDictionary(
+    wordList: readonly string[],
+    name: string,
+    source: string
+): SpellingDictionary {
+    const testSpecialCharacters = /[*+]/;
+
+    const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) =>
+        w.normalize(NormalizeForm)
+    );
+
+    const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0;
+
+    if (hasSpecial) {
+        return createSpellingDictionary(words, name, source, {
+            caseSensitive: true,
+            noSuggest: true,
+            weightMap: undefined,
+            supportNonStrictSearches: true,
+        });
+    }
+
+    return new IgnoreWordsDictionary(name, source, words);
+}
diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts
@@ -2,6 +2,7 @@ import * as Trie from 'cspell-trie-lib';
 import { SpellingDictionaryOptions } from '.';
 import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary';
 import { createForbiddenWordsDictionary } from './ForbiddenWordsDictionary';
+import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary';
 import { CompoundWordsMethod } from './SpellingDictionary';
 import { createCollection } from './SpellingDictionaryCollection';
 import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie';
@@ -29,12 +30,14 @@ describe('Verify using multiple dictionaries', () => {
     const wordsG = ['café', 'accent'];
 
     const wordsLegacy = ['error', 'code', 'system', 'ctrl'];
+    const wordsIgnore = ['ignored'];
 
     // cspell:ignore pinkberry behaviour colour
     const wordsNoSug = ['colour', 'behaviour', 'favour', 'pinkberry'];
 
     const dictNoSug = createSpellingDictionary(wordsNoSug, 'words-no-suggest', 'test', opts({ noSuggest: true }));
     const dictLegacy = createSpellingDictionary(wordsLegacy, 'legacy-dict', 'test', opts({ useCompounds: true }));
+    const dictIgnore = createIgnoreWordsDictionary(wordsIgnore, '[ignore]', 'test');
 
     test.each`
         word            | expected
@@ -62,6 +65,28 @@ describe('Verify using multiple dictionaries', () => {
         expect(dictCollection.has(word)).toEqual(expected);
     });
 
+    test.each`
+        word           | expected
+        ${'colour'}    | ${true}
+        ${'Colour'}    | ${true}
+        ${'tree'}      | ${false}
+        ${''}          | ${false}
+        ${'ignored'}   | ${true}
+        ${'Ignored'}   | ${true}
+        ${'behaviour'} | ${true}
+        ${'guava'}     | ${false}
+    `('isNoSuggestWord "$word"', async ({ word, expected }) => {
+        const dicts = await Promise.all([
+            createSpellingDictionary(wordsA, 'wordsA', 'test', opts()),
+            createForbiddenWordsDictionary(['behaviour', 'guava', 'Ignored', 'Colour'], 'flag_words', 'test'),
+            dictNoSug,
+            dictIgnore,
+        ]);
+
+        const dictCollection = createCollection(dicts, 'test');
+        expect(dictCollection.isNoSuggestWord(word, {})).toEqual(expected);
+    });
+
     test('has for forbidden word.', () => {
         const word = 'guava';
         const expected = false;
@@ -220,6 +245,7 @@ describe('Verify using multiple dictionaries', () => {
         ${'áccent'}       | ${true /* ignore the accent. cspell:disable-line */}
         ${'a\u0301ccent'} | ${true /* ignore the accent. cspell:disable-line */}
         ${'applé'}        | ${true /* ignore the accent. cspell:disable-line */}
+        ${'ignored'}      | ${true}
     `('checks has word: "$word"', ({ word, expected }) => {
         const dicts = [
             createSpellingDictionary(wordsA, 'wordsA', 'test', { dictionaryInformation: { ignore: '\u0300-\u0362' } }),
@@ -232,6 +258,7 @@ describe('Verify using multiple dictionaries', () => {
                 caseSensitive: true,
             }),
             createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
+            dictIgnore,
         ];
 
         const dictCollection = createCollection(dicts, 'test');
@@ -248,6 +275,7 @@ describe('Verify using multiple dictionaries', () => {
         ${'pinkbug'}    | ${{ found: 'pinkbug', forbidden: false, noSuggest: false }}
         ${'colour'}     | ${{ found: 'colour', forbidden: false, noSuggest: true }}
         ${'behaviour'}  | ${{ found: 'behaviour', forbidden: false, noSuggest: true }}
+        ${'ignored'}    | ${{ found: 'ignored', forbidden: false, noSuggest: true }}
     `('find: "$word"', ({ word, expected }) => {
         const dicts = [
             createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
@@ -257,6 +285,7 @@ describe('Verify using multiple dictionaries', () => {
             createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
             createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
             dictNoSug,
+            dictIgnore,
         ];
 
         const dictCollection = createCollection(dicts, 'test');
@@ -287,6 +316,7 @@ describe('Verify using multiple dictionaries', () => {
             createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
             dictNoSug,
             dictLegacy,
+            dictIgnore,
         ];
 
         const dictCollection = createCollection(dicts, 'test');
@@ -328,6 +358,7 @@ describe('Verify using multiple dictionaries', () => {
         ${'bug'}        | ${[sr('bug', 5)]}
         ${'blackberry'} | ${[sr('blackberry', 0), sr('black berry', 98)]}
         ${'stinkbug'}   | ${[sr('stink bug', 103), sr('pinkbug', 198)]}
+        ${'ignored'}    | ${[]}
     `('checks suggestions word: "$word"', ({ word, expected }) => {
         const dicts = [
             createSpellingDictionary(wordsA, 'wordsA', 'test', undefined),
@@ -336,6 +367,7 @@ describe('Verify using multiple dictionaries', () => {
             createSpellingDictionary(wordsD, 'wordsD', 'test', undefined),
             createSpellingDictionary(wordsF, 'wordsF', 'test', undefined),
             createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'),
+            dictIgnore,
         ];
 
         const dictCollection = createCollection(dicts, 'test');

diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts
@@ -0,0 +1,18 @@
+import { createMapper } from '../util/repMap';
+import { __testing__ } from './SpellingDictionaryFromTrie';
+
+const { outerWordForms } = __testing__;
+
+// cspell:ignore guenstig günstig
+
+describe('SpellingDictionaryFromTrie', () => {
+    test.each`
+        word          | repMap                                                  | expected
+        ${'hello'}    | ${undefined}                                            | ${['hello']}
+        ${'guenstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['guenstig', 'günstig']}
+        ${'günstig'}  | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['günstig', 'günstig'.normalize('NFD')]}
+    `('outerWordForms $word', ({ word, repMap, expected }) => {
+        const mapWord = createMapper(repMap);
+        expect(outerWordForms(word, mapWord ?? ((a) => a))).toEqual(new Set(expected));
+    });
+});