Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: improve replace map used with dictionaries (#3679)
* create IgnoreWordsDictionary * improve search speed. * improve replace map used with dictionaries. Related to #3592 * fix: Add IgnoreWordsDictionary to cspell-dictionary * fix: Fix in document directive detection. * fix: add createIgnoreWordsDictionary to cspell-lib * set IgnoreWordsDictionary containsNoSuggestWords
- Loading branch information
Showing
17 changed files
with
398 additions
and
59 deletions.
There are no files selected for viewing
92 changes: 92 additions & 0 deletions
92
packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import { createIgnoreWordsDictionary } from './IgnoreWordsDictionary'; | ||
|
||
// const oc = expect.objectContaining; | ||
|
||
// cspell:ignore êphone îphone geschäft | ||
|
||
describe('IgnoreWordsDictionary', () => { | ||
const dictWords = [' English', 'grumpy', 'Avocado', 'avocados', 'Café', ' êphone', 'îphone', 'geschäft']; | ||
const dict = createIgnoreWordsDictionary(dictWords, 'ignore words', 'test'); | ||
|
||
test.each` | ||
word | expected | ||
${''} | ${false} | ||
${'avocado'} | ${true} | ||
${'Avocado'} | ${true} | ||
${'english'} | ${true} | ||
${'English'} | ${true} | ||
${'grumpy'} | ${true} | ||
${'Grumpy'} | ${true} | ||
`('has of "$word"', async ({ word, expected }) => { | ||
expect(dict.has(word)).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
word | options | expected | ||
${'avocado'} | ${undefined} | ${{ found: 'avocado', forbidden: false, noSuggest: true }} | ||
${'Avocado'} | ${undefined} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }} | ||
${'Avocado'} | ${{ ignoreCase: true }} | ${{ found: 'Avocado', forbidden: false, noSuggest: true }} | ||
${''} | ${undefined} | ${undefined} | ||
${'English'} | ${{ ignoreCase: true }} | ${{ found: 'English', forbidden: false, noSuggest: true }} | ||
${'English'} | ${{ ignoreCase: false }} | ${{ found: 'English', forbidden: false, noSuggest: true }} | ||
${'english'} | ${{ ignoreCase: true }} | ${{ found: 'english', forbidden: false, noSuggest: true }} | ||
${'english'} | ${{ ignoreCase: false }} | ${undefined} | ||
${'îphone'.normalize('NFC')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }} | ||
${'îphone'.normalize('NFD')} | ${undefined} | ${{ found: 'îphone'.normalize('NFC'), forbidden: false, noSuggest: true }} | ||
${'iphone'} | ${undefined} | ${{ found: 'iphone', forbidden: false, noSuggest: true }} | ||
`('find "$word" $options', async ({ word, options, expected }) => { | ||
expect(dict.find(word, options)).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
word | ignoreCase | expected | ||
${''} | ${undefined} | ${false} | ||
${'avocado'} | ${false} | ${false} | ||
${'avocado'} | ${undefined} | ${true} | ||
${'avocado'} | ${true} | ${true} | ||
${'Avocado'} | ${undefined} | ${true} | ||
${'avocadoS'} | ${undefined} | ${true} | ||
${'AvocadoS'} | ${undefined} | ${true} | ||
${'Café'} | ${false} | ${true} | ||
${'cafe'} | ${false} | ${false} | ||
${'cafe'} | ${true} | ${true} | ||
${'îphone'.normalize('NFC')} | ${true} | ${true} | ||
${'îphone'.normalize('NFD')} | ${true} | ${true} | ||
${'english'} | ${undefined} | ${true} | ||
${'English'} | ${undefined} | ${true} | ||
${'grumpy'} | ${undefined} | ${true} | ||
${'Grumpy'} | ${undefined} | ${true} | ||
`('isNoSuggestWord of "$word" ignoreCase: $ignoreCase', async ({ word, ignoreCase, expected }) => { | ||
expect(dict.isNoSuggestWord(word, { ignoreCase })).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
word | expected | ||
${'avocado'} | ${false} | ||
${'Avocado'} | ${false} | ||
${''} | ${false} | ||
${'English'} | ${false} | ||
${'english'} | ${false} | ||
${'avocadoS'} | ${false} | ||
${'AvocadoS'} | ${false} | ||
${'grumpy'} | ${false} | ||
${'Grumpy'} | ${false} | ||
`('isForbidden of "$word"', async ({ word, expected }) => { | ||
expect(dict.isForbidden(word)).toEqual(expected); | ||
}); | ||
|
||
test.each` | ||
word | expected | ||
${'avocado'} | ${[]} | ||
${'Avocado'} | ${[]} | ||
${''} | ${[]} | ||
${'English'} | ${[]} | ||
${'english'} | ${[]} | ||
${'avocadoS'} | ${[]} | ||
${'AvocadoS'} | ${[]} | ||
${'grumpy'} | ${[]} | ||
${'Grumpy'} | ${[]} | ||
`('suggest of "$word"', async ({ word, expected }) => { | ||
expect(dict.suggest(word)).toEqual(expected); | ||
}); | ||
}); |
128 changes: 128 additions & 0 deletions
128
packages/cspell-dictionary/src/SpellingDictionary/IgnoreWordsDictionary.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
import { opFilter, opMap, pipe } from '@cspell/cspell-pipe/sync'; | ||
import { CompoundWordsMethod, parseDictionaryLines, SuggestionResult } from 'cspell-trie-lib'; | ||
import { | ||
FindResult, | ||
HasOptions, | ||
SearchOptions, | ||
SpellingDictionary, | ||
SpellingDictionaryOptions, | ||
SuggestOptions, | ||
} from './SpellingDictionary'; | ||
import * as Defaults from './defaults'; | ||
import { createSpellingDictionary } from './createSpellingDictionary'; | ||
|
||
const NormalizeForm = 'NFC' as const; | ||
|
||
class IgnoreWordsDictionary implements SpellingDictionary { | ||
private dict: Set<string>; | ||
private dictNonStrict: Set<string>; | ||
readonly containsNoSuggestWords = true; | ||
readonly options: SpellingDictionaryOptions = {}; | ||
readonly type = 'forbidden'; | ||
constructor(readonly name: string, readonly source: string, words: Iterable<string>) { | ||
this.dict = new Set(words); | ||
this.dictNonStrict = new Set( | ||
pipe( | ||
this.dict, | ||
opFilter((w) => w.startsWith('~')), | ||
opMap((w) => w.slice(1)) | ||
) | ||
); | ||
} | ||
|
||
/** | ||
* A Forbidden word list does not "have" valid words. | ||
* Therefore it always returns false. | ||
* @param _word - the word | ||
* @param _options - options | ||
* @returns always false | ||
*/ | ||
has(word: string, options?: HasOptions): boolean { | ||
const nWord = word.normalize(NormalizeForm); | ||
if (this.dict.has(nWord)) return true; | ||
const lcWord = nWord.toLowerCase(); | ||
if (this.dict.has(lcWord)) return true; | ||
const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase; | ||
return ignoreCase && (this.dictNonStrict.has(nWord) || this.dictNonStrict.has(lcWord)); | ||
} | ||
|
||
/** A more detailed search for a word, might take longer than `has` */ | ||
find(word: string, options?: SearchOptions): FindResult | undefined { | ||
const nWord = word.normalize(NormalizeForm); | ||
if (this.dict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true }; | ||
const lcWord = nWord.toLowerCase(); | ||
if (this.dict.has(lcWord)) return { found: lcWord, forbidden: false, noSuggest: true }; | ||
|
||
const ignoreCase = options?.ignoreCase ?? Defaults.ignoreCase; | ||
if (!ignoreCase) return undefined; | ||
|
||
if (this.dictNonStrict.has(nWord)) return { found: nWord, forbidden: false, noSuggest: true }; | ||
return (this.dictNonStrict.has(lcWord) && { found: lcWord, forbidden: false, noSuggest: true }) || undefined; | ||
} | ||
|
||
isForbidden(_word: string): boolean { | ||
return false; | ||
} | ||
|
||
isNoSuggestWord(word: string, options: HasOptions): boolean { | ||
return this.has(word, options); | ||
} | ||
|
||
suggest( | ||
word: string, | ||
numSuggestions?: number, | ||
compoundMethod?: CompoundWordsMethod, | ||
numChanges?: number, | ||
ignoreCase?: boolean | ||
): SuggestionResult[]; | ||
suggest(word: string, suggestOptions: SuggestOptions): SuggestionResult[]; | ||
suggest() { | ||
return []; | ||
} | ||
genSuggestions(): void { | ||
return; | ||
} | ||
mapWord(word: string): string { | ||
return word; | ||
} | ||
get size() { | ||
return this.dict.size; | ||
} | ||
readonly isDictionaryCaseSensitive: boolean = true; | ||
getErrors?(): Error[] { | ||
return []; | ||
} | ||
} | ||
|
||
/** | ||
* Create a dictionary where all words are to be ignored. | ||
* Ignored words override forbidden words. | ||
* @param wordList - list of words | ||
* @param name - name of dictionary | ||
* @param source - dictionary source | ||
* @returns | ||
*/ | ||
export function createIgnoreWordsDictionary( | ||
wordList: readonly string[], | ||
name: string, | ||
source: string | ||
): SpellingDictionary { | ||
const testSpecialCharacters = /[*+]/; | ||
|
||
const words = [...parseDictionaryLines(wordList, { stripCaseAndAccents: true })].map((w) => | ||
w.normalize(NormalizeForm) | ||
); | ||
|
||
const hasSpecial = words.findIndex((word) => testSpecialCharacters.test(word)) >= 0; | ||
|
||
if (hasSpecial) { | ||
return createSpellingDictionary(words, name, source, { | ||
caseSensitive: true, | ||
noSuggest: true, | ||
weightMap: undefined, | ||
supportNonStrictSearches: true, | ||
}); | ||
} | ||
|
||
return new IgnoreWordsDictionary(name, source, words); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
packages/cspell-dictionary/src/SpellingDictionary/SpellingDictionaryFromTrie.test.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import { createMapper } from '../util/repMap'; | ||
import { __testing__ } from './SpellingDictionaryFromTrie'; | ||
|
||
const { outerWordForms } = __testing__; | ||
|
||
// cspell:ignore guenstig günstig | ||
|
||
describe('SpellingDictionaryFromTrie', () => { | ||
test.each` | ||
word | repMap | expected | ||
${'hello'} | ${undefined} | ${['hello']} | ||
${'guenstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['guenstig', 'günstig']} | ||
${'günstig'} | ${[['ae', 'ä'], ['oe', 'ö'], ['ue', 'ü'], ['ss', 'ß']]} | ${['günstig', 'günstig'.normalize('NFD')]} | ||
`('outerWordForms $word', ({ word, repMap, expected }) => { | ||
const mapWord = createMapper(repMap); | ||
expect(outerWordForms(word, mapWord ?? ((a) => a))).toEqual(new Set(expected)); | ||
}); | ||
}); |
Oops, something went wrong.