From 023fb98568ad3f6f02cf25526139f34ba1d1f524 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Thu, 29 Sep 2022 16:06:09 +0200 Subject: [PATCH 1/4] fix: Add IgnoreWordsDictionary to cspell-dictionary --- .../IgnoreWordsDictionary.test.ts | 92 +++++++++++++ .../IgnoreWordsDictionary.ts | 127 ++++++++++++++++++ .../SpellingDictionaryFromTrie.test.ts | 18 +++ .../SpellingDictionaryFromTrie.ts | 33 +++-- .../src/SpellingDictionary/charset.ts | 12 -- .../src/SpellingDictionary/defaults.ts | 1 + .../src/SpellingDictionary/index.ts | 1 + .../src/__snapshots__/index.test.ts.snap | 1 + packages/cspell-dictionary/src/index.ts | 1 + .../cspell-dictionary/src/util/repMap.test.ts | 46 +++++-- packages/cspell-dictionary/src/util/repMap.ts | 54 ++++++-- 11 files changed, 339 insertions(+), 47 deletions(-) create mode 100644 packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts create mode 100644 packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts create mode 100644 packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts delete mode 100644 packages/cspell-dictionary/src/SpellingDictionary/charset.ts create mode 100644 packages/cspell-dictionary/src/SpellingDictionary/defaults.ts diff --git a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts new file mode 100644 index 00000000000..946a610dfbe --- /dev/null +++ b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts @@ -0,0 +1,92 @@ +import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary'; + +// const oc = expect.objectContaining; + +// cspell:ignore êphone îphone geschäft + +describe('IgnoreWordsDictionary', () => { + const dictWords = [' English', 'grumpy', 'Avocado', 'avocados', 'Café', ' êphone', 'îphone', 'geschäft']; + const dict = createIgnoreWordsDictionary(dictWords, 'ignore words', 'test'); + + test.each` + word | expected + ${''} | ${false} + ${'avocado'} | ${true} + ${'Avocado'} | ${true} + ${'english'} | ${true} + ${'English'} | ${true} + ${'grumpy'} | ${true} + ${'Grumpy'} | ${true} + `('has of "$word"', async ({ word, expected }) => { + expect(dict.has(word)).toEqual(expected); + }); + + test.each` + word | options | expected + ${'avocado'} | ${undefined} | ${{ found: 'avocado', forbidden: false, noSuggest: true }} + ${'Avocado'} | ${undefined} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }} + ${'Avocado'} | ${{ ignoreCase: true }} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }} + ${''} | ${undefined} | ${undefined} + ${'English'} | ${{ ignoreCase: true }} | ${{ found: 'English', forbidden: false, noSuggest: true }} + ${'English'} | ${{ ignoreCase: false }} | ${{ found: 'English', forbidden: false, noSuggest: true }} + ${'english'} | ${{ ignoreCase: true }} | ${{ found: 'english', forbidden: false, noSuggest: true }} + ${'english'} | ${{ ignoreCase: false }} | ${undefined} + ${'îphone'.normalize('NFC')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }} + ${'îphone'.normalize('NFD')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }} + ${'iphone'} | ${undefined} | ${{ found: 'iphone', forbidden: false, noSuggest: true }} + `('find "$word" $options', async ({ word, options, expected }) => { + expect(dict.find(word, options)).toEqual(expected); + }); + + test.each` + word | ignoreCase | expected + ${''} | ${undefined} | ${false} + ${'avocado'} | ${false} | ${false} + ${'avocado'} | ${undefined} | ${true} + ${'avocado'} | ${true} | ${true} + ${'Avocado'} | ${undefined} | ${true} + ${'avocadoS'} | ${undefined} | ${true} + ${'AvocadoS'} | ${undefined} | ${true} + ${'Café'} | ${false} | ${true} + ${'cafe'} | ${false} | ${false} + ${'cafe'} | ${true} | ${true} + ${'îphone'.normalize('NFC')} | ${true} | ${true} + ${'îphone'.normalize('NFD')} | ${true} | ${true} + ${'english'} | ${undefined} | ${true} + ${'English'} | ${undefined} | ${true} + ${'grumpy'} | ${undefined} | ${true} + ${'Grumpy'} | ${undefined} | ${true} + `('isNoSuggestWord of "$word" ignoreCase: $ignoreCase', async ({ word, ignoreCase, expected }) => { + expect(dict.isNoSuggestWord(word, { ignoreCase })).toEqual(expected); + }); + + test.each` + word | expected + ${'avocado'} | ${false} + ${'Avocado'} | ${false} + ${''} | ${false} + ${'English'} | ${false} + ${'english'} | ${false} + ${'avocadoS'} | ${false} + ${'AvocadoS'} | ${false} + ${'grumpy'} | ${false} + ${'Grumpy'} | ${false} + `('isForbidden of "$word"', async ({ word, expected }) => { + expect(dict.isForbidden(word)).toEqual(expected); + }); + + test.each` + word | expected + ${'avocado'} | ${[]} + ${'Avocado'} | ${[]} + ${''} | ${[]} + ${'English'} | ${[]} + ${'english'} | ${[]} + ${'avocadoS'} | ${[]} + ${'AvocadoS'} | ${[]} + ${'grumpy'} | ${[]} + ${'Grumpy'} | ${[]} + `('suggest of "$word"', async ({ word, expected }) => { + expect(dict.suggest(word)).toEqual(expected); + }); +}); diff --git a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts new file mode 100644 index 00000000000..f374325b6e3 --- /dev/null +++ b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts @@ -0,0 +1,127 @@ +import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync'; +import { CompoundWordsMethod, parseDictionaryLines, SuggestionResult } from 'cspell-trie-lib'; +import { + FindResult, + HasOptions, + SearchOptions, + SpellingDictionary, + SpellingDictionaryOptions, + SuggestOptions, +} from './SpellingDictionary'; +import * as Defaults from './defaults'; +import { createSpellingDictionary } from './createSpellingDictionary'; + +const NormalizeForm = 'NFC' as const; + +class IgnoreWordsDictionary implements SpellingDictionary { + private dict: Set; + private dictNonStrict: Set; + readonly containsNoSuggestWords = false; + readonly options: SpellingDictionaryOptions = {}; + readonly type = 'forbidden'; + constructor(readonly name: string, readonly source: string, words: Iterable) { + this.dict = new Set(words); + this.dictNonStrict = new Set( + pipe( + this.dict, + opFilter((w) => w.startsWith('~')), + opMap((w) => w.slice(1)) + ) + ); + } + + /** + * A Forbidden word list does not "have" valid words. + * Therefore it always returns false. + * @param _word - the word + * @param _options - options + * @returns always false + */ + has(word: string, options?: HasOptions): boolean { + const nWord = word.normalize(NormalizeForm); + if (this.dict.has(nWord)) return true; + const lcWord = nWord.toLowerCase(); + if (this.dict.has(lcWord)) return true; + const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase; + return ignoreCase && (this.dictNonStrict.has(nWord) || this.dictNonStrict.has(lcWord)); + } + + /** A more detailed search for a word, might take longer than `has` */ + find(word: string, options?: SearchOptions): FindResult | undefined { + const nWord = word.normalize(NormalizeForm); + if (this.dict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true }; + const lcWord = nWord.toLowerCase(); + if (this.dict.has(lcWord)) return { found: lcWord, forbidden: false, noSuggest: true }; + + const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase; + if (!ignoreCase) return undefined; + + if (this.dictNonStrict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true }; + return (this.dictNonStrict.has(lcWord) && { found: lcWord, forbidden: false, noSuggest: true }) || undefined; + } + + isForbidden(_word: string): boolean { + return false; + } + + isNoSuggestWord(word: string, options: HasOptions): boolean { + return this.has(word, options); + } + + suggest( + word: string, + numSuggestions?: number, + compoundMethod?: CompoundWordsMethod, + numChanges?: number, + ignoreCase?: boolean + ): SuggestionResult[]; + suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[]; + suggest() { + return []; + } + genSuggestions(): void { + return; + } + mapWord(word: string): string { + return word; + } + get size() { + return this.dict.size; + } + readonly isDictionaryCaseSensitive: boolean = true; + getErrors?(): Error[] { + return []; + } +} + +/** + * Create a dictionary where all words are to be ignored. + * Ignored words override forbidden words. + * @param wordList - list of words + * @param name - name of dictionary + * @param source - dictionary source + * @returns + */ +export function createIgnoreWordsDictionary( + wordList: readonly string[], + name: string, + source: string +): SpellingDictionary { + const testSpecialCharacters = /[*+]/; + + const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) => + w.normalize(NormalizeForm) + ); + const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0; + + if (hasSpecial) { + return createSpellingDictionary(words, name, source, { + caseSensitive: true, + noSuggest: true, + weightMap: undefined, + supportNonStrictSearches: true, + }); + } + + return new IgnoreWordsDictionary(name, source, words); +} diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts new file mode 100644 index 00000000000..c11ae70112a --- /dev/null +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts @@ -0,0 +1,18 @@ +import { createMapper } from '../util/repMap'; +import { __testing__ } from './SpellingDictionaryFromTrie'; + +const { outerWordForms } = __testing__; + +// cspell:ignore guenstig günstig + +describe('SpellingDictionaryFromTrie', () => { + test.each` + word | repMap | expected + ${'hello'} | ${undefined} | ${['hello']} + ${'guenstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['guenstig', 'günstig']} + ${'günstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['günstig', 'günstig'.normalize('NFD')]} + `('outerWordForms $word', ({ word, repMap, expected }) => { + const mapWord = createMapper(repMap); + expect(outerWordForms(word, mapWord ?? ((a) => a))).toEqual(new Set(expected)); + }); +}); diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts index c1f3f00490e..059f11cc5bd 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.ts @@ -8,7 +8,6 @@ import type { import { CompoundWordsMethod, importTrie, suggestionCollector, Trie } from 'cspell-trie-lib'; import { createMapper } from '../util/repMap'; import { clean } from '../util/clean'; -import { charsetToRegExp } from './charset'; import { FindResult, HasOptions, @@ -27,6 +26,8 @@ import { wordSuggestFormsArray, } from './SpellingDictionaryMethods'; import { autoCache, createCache01 } from '../util/AutoCache'; +import { pipe, opConcatMap } from '@cspell/cspell-pipe/sync'; +import * as Defaults from './defaults'; const findWordOptionsCaseSensitive: FindWordOptions = Object.freeze({ caseSensitive: true }); const findWordOptionsNotCaseSensitive: FindWordOptions = Object.freeze({ caseSensitive: false }); @@ -40,7 +41,6 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { readonly type = 'SpellingDictionaryFromTrie'; readonly isDictionaryCaseSensitive: boolean; readonly containsNoSuggestWords: boolean; - readonly ignoreCharactersRegExp: RegExp | undefined; private weightMap: WeightMap | undefined; @@ -51,12 +51,11 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { readonly source = 'from trie', size?: number ) { - this.mapWord = createMapper(options.repMap || []); + this.mapWord = createMapper(options.repMap, options.dictionaryInformation?.ignore); this.isDictionaryCaseSensitive = options.caseSensitive ?? !trie.isLegacy; this.containsNoSuggestWords = options.noSuggest || false; this._size = size || 0; this.weightMap = options.weightMap || createWeightMapFromDictionaryInformation(options.dictionaryInformation); - this.ignoreCharactersRegExp = charsetToRegExp(this.options.dictionaryInformation?.ignore); } public get size(): number { @@ -95,7 +94,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { useCompounds: HasOptions['useCompounds'] | undefined; ignoreCase: boolean; } { - const { useCompounds = this.options.useCompounds, ignoreCase = true } = hasOptionToSearchOption(hasOptions); + const { useCompounds = this.options.useCompounds, ignoreCase = Defaults.ignoreCase } = + hasOptionToSearchOption(hasOptions); return { useCompounds, ignoreCase }; } @@ -108,12 +108,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { useCompounds: number | boolean | undefined, ignoreCase: boolean ): FindAnyFormResult | undefined { - const outerForms = new Set([word]); - if (this.ignoreCharactersRegExp) { - outerForms.add(word.replace(this.ignoreCharactersRegExp, '')); - outerForms.add(word.normalize('NFD').replace(this.ignoreCharactersRegExp, '')); - outerForms.add(word.normalize('NFC').replace(this.ignoreCharactersRegExp, '')); - } + const outerForms = outerWordForms(word, this.mapWord); + for (const form of outerForms) { const r = this._findAnyForm(form, useCompounds, ignoreCase); if (r) return r; @@ -122,11 +118,10 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary { } private _findAnyForm( - word: string, + mWord: string, useCompounds: number | boolean | undefined, ignoreCase: boolean ): FindAnyFormResult | undefined { - const mWord = this.mapWord(word.normalize('NFC')); const opts: FindWordOptions = ignoreCase ? findWordOptionsNotCaseSensitive : findWordOptionsCaseSensitive; const findResult = this.trie.findWord(mWord, opts); if (findResult.found !== false) { @@ -268,3 +263,15 @@ function findCache(fn: FindFunction, size = 2000): FindFunction { return find; } + +function outerWordForms(word: string, mapWord: (word: string) => string): Set { + const forms = pipe( + [word], + opConcatMap((word) => [word, word.normalize('NFC'), word.normalize('NFD')]), + opConcatMap((word) => [word, mapWord(word)]) + ); + + return new Set(forms); +} + +export const __testing__ = { outerWordForms }; diff --git a/packages/cspell-dictionary/src/SpellingDictionary/charset.ts b/packages/cspell-dictionary/src/SpellingDictionary/charset.ts deleted file mode 100644 index 8b4211d829e..00000000000 --- a/packages/cspell-dictionary/src/SpellingDictionary/charset.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { CharacterSet } from '@cspell/cspell-types'; - -export function charsetToRegExp(charset: CharacterSet | undefined): RegExp | undefined { - if (!charset) return undefined; - - try { - const reg = `[${charset.replace(/[\][\\]/g, '\\$&')}]`; - return new RegExp(reg, 'g'); - } catch (e) { - return undefined; - } -} diff --git a/packages/cspell-dictionary/src/SpellingDictionary/defaults.ts b/packages/cspell-dictionary/src/SpellingDictionary/defaults.ts new file mode 100644 index 00000000000..73107f2b929 --- /dev/null +++ b/packages/cspell-dictionary/src/SpellingDictionary/defaults.ts @@ -0,0 +1 @@ +export const ignoreCase = true; diff --git a/packages/cspell-dictionary/src/SpellingDictionary/index.ts b/packages/cspell-dictionary/src/SpellingDictionary/index.ts index eaa9d1c7c0f..45319d14561 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/index.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/index.ts @@ -1,6 +1,7 @@ export { CachingDictionary, createCachingDictionary } from './CachingDictionary'; export { createSpellingDictionary } from './createSpellingDictionary'; export { createForbiddenWordsDictionary } from './ForbiddenWordsDictionary'; +export { createIgnoreWordsDictionary } from './IgnoreWordsDictionary'; export type { FindOptions, FindResult, diff --git a/packages/cspell-dictionary/src/__snapshots__/index.test.ts.snap b/packages/cspell-dictionary/src/__snapshots__/index.test.ts.snap index d47f6468792..afafa3c1b69 100644 --- a/packages/cspell-dictionary/src/__snapshots__/index.test.ts.snap +++ b/packages/cspell-dictionary/src/__snapshots__/index.test.ts.snap @@ -5,6 +5,7 @@ exports[`index verify api 1`] = ` "createCachingDictionary", "createCollection", "createForbiddenWordsDictionary", + "createIgnoreWordsDictionary", "createSpellingDictionary", "createSpellingDictionaryFromTrieFile", ] diff --git a/packages/cspell-dictionary/src/index.ts b/packages/cspell-dictionary/src/index.ts index bc652ea8fe3..89a2b858e44 100644 --- a/packages/cspell-dictionary/src/index.ts +++ b/packages/cspell-dictionary/src/index.ts @@ -2,6 +2,7 @@ export { createCachingDictionary, createCollection, createForbiddenWordsDictionary, + createIgnoreWordsDictionary, createSpellingDictionary, createSpellingDictionaryFromTrieFile, SpellingDictionaryCollection, diff --git a/packages/cspell-dictionary/src/util/repMap.test.ts b/packages/cspell-dictionary/src/util/repMap.test.ts index 16e1a7322a9..a42c8f195ee 100644 --- a/packages/cspell-dictionary/src/util/repMap.test.ts +++ b/packages/cspell-dictionary/src/util/repMap.test.ts @@ -1,19 +1,21 @@ -import * as repMap from './repMap'; +import { __testing__, createMapper } from './repMap'; + +const { createMapperRegExp, charsetToRepMap } = __testing__; describe('ReMap Tests', () => { test('empty replace map', () => { - const mapper = repMap.createMapper([]); + const mapper = createMapper([]); expect(mapper('hello')).toBe('hello'); }); test('punctuation replacement', () => { - const mapper = repMap.createMapper([['`', "'"]]); + const mapper = createMapper([['`', "'"]]); expect(mapper('hello')).toBe('hello'); expect(mapper('don`t')).toBe("don't"); }); test('multiple replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['a', 'A'], ['b', 'B'], ]); @@ -22,7 +24,7 @@ describe('ReMap Tests', () => { }); test('empty replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['a', 'A'], ['b', 'B'], ['', ''], @@ -32,7 +34,7 @@ describe('ReMap Tests', () => { }); test('regex replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['!|@|#|\\$', '_'], ['a', 'A'], ]); @@ -40,7 +42,7 @@ describe('ReMap Tests', () => { }); test('repeated replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['a', 'A'], ['a', 'X'], ]); @@ -48,7 +50,7 @@ describe('ReMap Tests', () => { }); test('nested regex replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['(!)', '_'], ['((\\$))', '#'], ['a', 'A'], @@ -57,7 +59,7 @@ describe('ReMap Tests', () => { }); test('bad regex replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['(', '_'], ['a', 'A'], ]); @@ -65,7 +67,7 @@ describe('ReMap Tests', () => { }); test('empty regex replacements', () => { - const mapper = repMap.createMapper([ + const mapper = createMapper([ ['', '_'], ['a', 'A'], ]); @@ -81,7 +83,29 @@ describe('ReMap Tests', () => { ${[['ae', 'ä'], ['s{2}', 'ß']]} | ${'strasse'} | ${'straße'} ${[['ae', 'ä'], ['ss', 'ß']]} | ${'STRASSE'} | ${'STRASSE'} `('map with word $map / $word', ({ map, word, expected }) => { - const mapper = repMap.createMapper(map); + const mapper = createMapper(map); expect(mapper(word)).toBe(expected); }); + + test.each` + map | expected + ${[]} | ${/$^/} + ${[['ae', 'ä'], ['s{2}', 'ß']]} | ${/(ae)|(s{2})/g} + ${[['ae', 'ä'], ['ss', 'ß']]} | ${/(ae)|(ss)/g} + `('createMapperRegExp $map', ({ map, expected }) => { + const reg = createMapperRegExp(map); + expect(reg).toEqual(expected); + }); + + test.each` + charset | expected + ${undefined} | ${undefined} + ${''} | ${undefined} + ${'a-z'} | ${[['[a-z]', '']]} + ${'0x300-0x308'} | ${[['[0x300-0x308]', '']]} + ${'0x300-0x308|a-z'} | ${[['[0x300-0x308]', ''], ['[a-z]', '']]} + `('charsetToRepMap $charset', ({ charset, expected }) => { + const reg = charsetToRepMap(charset); + expect(reg).toEqual(expected); + }); }); diff --git a/packages/cspell-dictionary/src/util/repMap.ts b/packages/cspell-dictionary/src/util/repMap.ts index 42370a23aa0..6d0c9170067 100644 --- a/packages/cspell-dictionary/src/util/repMap.ts +++ b/packages/cspell-dictionary/src/util/repMap.ts @@ -1,13 +1,48 @@ -import type { ReplaceMap } from '@cspell/cspell-types'; +import type { CharacterSet, ReplaceMap } from '@cspell/cspell-types'; import { escapeRegEx } from './regexHelper'; export type ReplaceMapper = (src: string) => string; -export function createMapper(repMap: ReplaceMap): ReplaceMapper { +export function createMapper(repMap: ReplaceMap | undefined, ignoreCharset?: string): ReplaceMapper { + if (!repMap && !ignoreCharset) return (a) => a; + repMap = repMap || []; + const charsetMap = charsetToRepMap(ignoreCharset); + if (charsetMap) { + repMap = repMap.concat(charsetMap); + } + const filteredMap = repMap.filter(([match, _]) => !!match); if (!filteredMap.length) { return (a) => a; } + + const regEx = createMapperRegExp(repMap); + const values = repMap.filter(([match, _]) => !!match).map(([_, into]) => into); + + function resolve(m: string, ...matches: unknown[]) { + const index = matches.findIndex((a) => !!a); + return 0 <= index && index < values.length ? values[index] : m; + } + + return function (s: string) { + return s.replace(regEx, resolve); + }; +} + +function charsetToRepMap(charset: CharacterSet | undefined, replaceWith = ''): ReplaceMap | undefined { + if (!charset) return undefined; + + return charset + .split('|') + .map((chars) => `[${chars.replace(/[\][\\]/g, '\\$&')}]`) + .map((map) => [map, replaceWith]); +} + +function createMapperRegExp(repMap: ReplaceMap): RegExp { + const filteredMap = repMap.filter(([match, _]) => !!match); + if (!filteredMap.length) { + return /$^/; + } const regExStr = filteredMap .map(([from, _]) => from) // make sure it compiles into a regex @@ -26,14 +61,11 @@ export function createMapper(repMap: ReplaceMap): ReplaceMapper { .join('|'); const regEx = new RegExp(regExStr, 'g'); - const values = repMap.filter(([match, _]) => !!match).map(([_, into]) => into); - function resolve(m: string, ...matches: unknown[]) { - const index = matches.findIndex((a) => !!a); - return 0 <= index && index < values.length ? values[index] : m; - } - - return function (s: string) { - return s.replace(regEx, resolve); - }; + return regEx; } + +export const __testing__ = { + charsetToRepMap, + createMapperRegExp, +}; From 3593ed36c2c4eed6e9a3894687e75180e577621c Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Thu, 29 Sep 2022 16:07:17 +0200 Subject: [PATCH 2/4] fix: Fix in document directive detection. --- packages/cspell-lib/src/Settings/InDocSettings.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/cspell-lib/src/Settings/InDocSettings.ts b/packages/cspell-lib/src/Settings/InDocSettings.ts index cb4887fa167..1422f55df5b 100644 --- a/packages/cspell-lib/src/Settings/InDocSettings.ts +++ b/packages/cspell-lib/src/Settings/InDocSettings.ts @@ -8,9 +8,9 @@ import { mergeInDocSettings } from './CSpellSettingsServer'; // cspell:ignore gimuy const regExMatchRegEx = /\/.*\/[gimuy]*/; -const regExCSpellInDocDirective = /(?:spell-?checker|c?spell)::?(.*)/gi; -const regExCSpellDirectiveKey = /(?<=(?:spell-?checker|c?spell)::?)(?!:)(.*)/i; -const regExInFileSettings = [regExCSpellInDocDirective, /(LocalWords:?.*)/g]; +const regExCSpellInDocDirective = /\b(?:spell-?checker|c?spell)::?(.*)/gi; +const regExCSpellDirectiveKey = /(?<=\b(?:spell-?checker|c?spell)::?)(?!:)(.*)/i; +const regExInFileSettings = [regExCSpellInDocDirective, /\b(LocalWords:?.*)/g]; export type CSpellUserSettingsKeys = keyof CSpellUserSettings; From 81792f564149be44f78ce8cc84078e8237ca5560 Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Thu, 29 Sep 2022 16:08:00 +0200 Subject: [PATCH 3/4] fix: add createIgnoreWordsDictionary to cspell-lib --- .../src/SpellingDictionary/Dictionaries.ts | 12 ++++-------- .../src/SpellingDictionary/SpellingDictionary.ts | 3 ++- .../createSpellingDictionary.ts | 15 +++++++++++++++ .../SpellingDictionaryLibOld/index.ts | 4 +++- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/packages/cspell-lib/src/SpellingDictionary/Dictionaries.ts b/packages/cspell-lib/src/SpellingDictionary/Dictionaries.ts index ddc0e58f074..45b0f2830a8 100644 --- a/packages/cspell-lib/src/SpellingDictionary/Dictionaries.ts +++ b/packages/cspell-lib/src/SpellingDictionary/Dictionaries.ts @@ -33,7 +33,8 @@ function _getDictionaryInternal( spellDictionaries: SpellingDictionary[] ): SpellingDictionaryCollection { const { words = emptyWords, userWords = emptyWords, flagWords = emptyWords, ignoreWords = emptyWords } = settings; - const { createSpellingDictionary, createCollection, createForbiddenWordsDictionary } = getSpellDictInterface(); + const { createSpellingDictionary, createIgnoreWordsDictionary, createCollection, createForbiddenWordsDictionary } = + getSpellDictInterface(); const settingsWordsDictionary = createSpellingDictionary(words, '[words]', 'From Settings `words`', { caseSensitive: true, @@ -45,15 +46,10 @@ function _getDictionaryInternal( weightMap: undefined, }) : undefined; - const ignoreWordsDictionary = createSpellingDictionary( + const ignoreWordsDictionary = createIgnoreWordsDictionary( ignoreWords, '[ignoreWords]', - 'From Settings `ignoreWords`', - { - caseSensitive: true, - noSuggest: true, - weightMap: undefined, - } + 'From Settings `ignoreWords`' ); const flagWordsDictionary = createForbiddenWordsDictionary(flagWords, '[flagWords]', 'From Settings `flagWords`', { weightMap: undefined, diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts index 14c2053641e..91fe43bb558 100644 --- a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts +++ b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts @@ -7,6 +7,7 @@ const SpellingDictionaryModule = { createCollection: cspellDictModule.createCollection, createForbiddenWordsDictionary: cspellDictModule.createForbiddenWordsDictionary, createSpellingDictionary: cspellDictModule.createSpellingDictionary, + createIgnoreWordsDictionary: cspellDictModule.createIgnoreWordsDictionary, } as const; type SpellDictInterface = typeof SpellingDictionaryModule | typeof SpellingDictionaryLibOld; @@ -28,7 +29,7 @@ export type { } from 'cspell-dictionary'; export function getSpellDictInterface(): SpellDictInterface { - const useModule = getSystemFeatureFlags().getFlagBool(flagUseCSpellDictionary) ?? true; + const useModule = getSystemFeatureFlags().getFlagBool(flagUseCSpellDictionary) ?? false; return useModule ? SpellingDictionaryModule : SpellingDictionaryLibOld; } diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/createSpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/createSpellingDictionary.ts index 5bbc7348538..f3fe7dc1a93 100644 --- a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/createSpellingDictionary.ts +++ b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/createSpellingDictionary.ts @@ -100,3 +100,18 @@ export function createFailedToLoadDictionary(error: SpellingDictionaryLoadError) getErrors: () => errors, }; } + +export function createIgnoreWordsDictionary( + wordList: readonly string[], + name: string, + source: string +): SpellingDictionary { + // console.log('createIgnoreWordsDictionary %o', wordList); + + return createSpellingDictionary(wordList, name, source, { + caseSensitive: true, + noSuggest: true, + weightMap: undefined, + supportNonStrictSearches: true, + }); +} diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/index.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/index.ts index ba60607a1b9..9bf1f0b9765 100644 --- a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/index.ts +++ b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionaryLibOld/index.ts @@ -2,14 +2,16 @@ export * from './SpellingDictionary'; import { createCollection } from './SpellingDictionaryCollection'; import { - createSpellingDictionary, createFailedToLoadDictionary, createForbiddenWordsDictionary, + createIgnoreWordsDictionary, + createSpellingDictionary, } from './createSpellingDictionary'; export const SpellingDictionaryLibOld = { createCollection, createFailedToLoadDictionary, createForbiddenWordsDictionary, + createIgnoreWordsDictionary, createSpellingDictionary, } as const; From 91356560bac161436aaf813d9c174163faf2fd4b Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Thu, 29 Sep 2022 16:40:51 +0200 Subject: [PATCH 4/4] set IgnoreWordsDictionary containsNoSuggestWords --- .../IgnoreWordsDictionary.ts | 3 +- .../SpellingDictionaryCollection.test.ts | 32 +++++++++++++++++++ .../SpellingDictionary/SpellingDictionary.ts | 2 +- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts index f374325b6e3..51422e384b6 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts @@ -16,7 +16,7 @@ const NormalizeForm = 'NFC' as const; class IgnoreWordsDictionary implements SpellingDictionary { private dict: Set; private dictNonStrict: Set; - readonly containsNoSuggestWords = false; + readonly containsNoSuggestWords = true; readonly options: SpellingDictionaryOptions = {}; readonly type = 'forbidden'; constructor(readonly name: string, readonly source: string, words: Iterable) { @@ -112,6 +112,7 @@ export function createIgnoreWordsDictionary( const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) => w.normalize(NormalizeForm) ); + const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0; if (hasSpecial) { diff --git a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts index b6ac1805776..a9ceb480574 100644 --- a/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts +++ b/packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryCollection.test.ts @@ -2,6 +2,7 @@ import * as Trie from 'cspell-trie-lib'; import { SpellingDictionaryOptions } from '.'; import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary'; import { createForbiddenWordsDictionary } from './ForbiddenWordsDictionary'; +import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary'; import { CompoundWordsMethod } from './SpellingDictionary'; import { createCollection } from './SpellingDictionaryCollection'; import { SpellingDictionaryFromTrie } from './SpellingDictionaryFromTrie'; @@ -29,12 +30,14 @@ describe('Verify using multiple dictionaries', () => { const wordsG = ['café', 'accent']; const wordsLegacy = ['error', 'code', 'system', 'ctrl']; + const wordsIgnore = ['ignored']; // cspell:ignore pinkberry behaviour colour const wordsNoSug = ['colour', 'behaviour', 'favour', 'pinkberry']; const dictNoSug = createSpellingDictionary(wordsNoSug, 'words-no-suggest', 'test', opts({ noSuggest: true })); const dictLegacy = createSpellingDictionary(wordsLegacy, 'legacy-dict', 'test', opts({ useCompounds: true })); + const dictIgnore = createIgnoreWordsDictionary(wordsIgnore, '[ignore]', 'test'); test.each` word | expected @@ -62,6 +65,28 @@ describe('Verify using multiple dictionaries', () => { expect(dictCollection.has(word)).toEqual(expected); }); + test.each` + word | expected + ${'colour'} | ${true} + ${'Colour'} | ${true} + ${'tree'} | ${false} + ${''} | ${false} + ${'ignored'} | ${true} + ${'Ignored'} | ${true} + ${'behaviour'} | ${true} + ${'guava'} | ${false} + `('isNoSuggestWord "$word"', async ({ word, expected }) => { + const dicts = await Promise.all([ + createSpellingDictionary(wordsA, 'wordsA', 'test', opts()), + createForbiddenWordsDictionary(['behaviour', 'guava', 'Ignored', 'Colour'], 'flag_words', 'test'), + dictNoSug, + dictIgnore, + ]); + + const dictCollection = createCollection(dicts, 'test'); + expect(dictCollection.isNoSuggestWord(word, {})).toEqual(expected); + }); + test('has for forbidden word.', () => { const word = 'guava'; const expected = false; @@ -220,6 +245,7 @@ describe('Verify using multiple dictionaries', () => { ${'áccent'} | ${true /* ignore the accent. cspell:disable-line */} ${'a\u0301ccent'} | ${true /* ignore the accent. cspell:disable-line */} ${'applé'} | ${true /* ignore the accent. cspell:disable-line */} + ${'ignored'} | ${true} `('checks has word: "$word"', ({ word, expected }) => { const dicts = [ createSpellingDictionary(wordsA, 'wordsA', 'test', { dictionaryInformation: { ignore: '\u0300-\u0362' } }), @@ -232,6 +258,7 @@ describe('Verify using multiple dictionaries', () => { caseSensitive: true, }), createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'), + dictIgnore, ]; const dictCollection = createCollection(dicts, 'test'); @@ -248,6 +275,7 @@ describe('Verify using multiple dictionaries', () => { ${'pinkbug'} | ${{ found: 'pinkbug', forbidden: false, noSuggest: false }} ${'colour'} | ${{ found: 'colour', forbidden: false, noSuggest: true }} ${'behaviour'} | ${{ found: 'behaviour', forbidden: false, noSuggest: true }} + ${'ignored'} | ${{ found: 'ignored', forbidden: false, noSuggest: true }} `('find: "$word"', ({ word, expected }) => { const dicts = [ createSpellingDictionary(wordsA, 'wordsA', 'test', undefined), @@ -257,6 +285,7 @@ describe('Verify using multiple dictionaries', () => { createSpellingDictionary(wordsF, 'wordsF', 'test', undefined), createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'), dictNoSug, + dictIgnore, ]; const dictCollection = createCollection(dicts, 'test'); @@ -287,6 +316,7 @@ describe('Verify using multiple dictionaries', () => { createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'), dictNoSug, dictLegacy, + dictIgnore, ]; const dictCollection = createCollection(dicts, 'test'); @@ -328,6 +358,7 @@ describe('Verify using multiple dictionaries', () => { ${'bug'} | ${[sr('bug', 5)]} ${'blackberry'} | ${[sr('blackberry', 0), sr('black berry', 98)]} ${'stinkbug'} | ${[sr('stink bug', 103), sr('pinkbug', 198)]} + ${'ignored'} | ${[]} `('checks suggestions word: "$word"', ({ word, expected }) => { const dicts = [ createSpellingDictionary(wordsA, 'wordsA', 'test', undefined), @@ -336,6 +367,7 @@ describe('Verify using multiple dictionaries', () => { createSpellingDictionary(wordsD, 'wordsD', 'test', undefined), createSpellingDictionary(wordsF, 'wordsF', 'test', undefined), createForbiddenWordsDictionary(['Avocado'], 'flag_words', 'test'), + dictIgnore, ]; const dictCollection = createCollection(dicts, 'test'); diff --git a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts index 91fe43bb558..3308dc4d427 100644 --- a/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts +++ b/packages/cspell-lib/src/SpellingDictionary/SpellingDictionary.ts @@ -29,7 +29,7 @@ export type { } from 'cspell-dictionary'; export function getSpellDictInterface(): SpellDictInterface { - const useModule = getSystemFeatureFlags().getFlagBool(flagUseCSpellDictionary) ?? false; + const useModule = getSystemFeatureFlags().getFlagBool(flagUseCSpellDictionary) ?? true; return useModule ? SpellingDictionaryModule : SpellingDictionaryLibOld; }