diff --git a/packages/cspell-trie-lib/src/lib/find.dutch.test.ts b/packages/cspell-trie-lib/src/lib/find.dutch.test.ts index 99fe954e14a..143daee0848 100644 --- a/packages/cspell-trie-lib/src/lib/find.dutch.test.ts +++ b/packages/cspell-trie-lib/src/lib/find.dutch.test.ts @@ -22,7 +22,7 @@ describe('Validate findWord', () => { matchCase: true, compoundMode: 'none', }) - ).toEqual(frFound('aanvaardbaard', { forbidden: true })); + ).toEqual({ ...frFound(false), forbidden: true }); expect(findWord(trie, 'code', { matchCase: true, compoundMode: 'none' })).toEqual({ found: 'code', @@ -40,10 +40,14 @@ describe('Validate findWord', () => { }); const tests: [string, PartialFindOptions, FindFullResult][] = [ - ['Code', { matchCase: true, compoundMode: 'none' }, frNotFound()], - ['code', { matchCase: true, compoundMode: 'none' }, frFound('code')], - ['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound()], - ['cafe', { matchCase: false, compoundMode: 'none' }, frFound('cafe', { caseMatched: false })], + ['Code', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })], + ['code', { matchCase: true, compoundMode: 'none' }, frFound('code', { forbidden: false })], + ['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })], + [ + 'cafe', + { matchCase: false, compoundMode: 'none' }, + frFound('cafe', { caseMatched: false, forbidden: undefined }), + ], // Compounding enabled, but matching whole words (compounding not used). ['Code', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)], @@ -84,7 +88,7 @@ describe('Validate findWord', () => { compoundMode: 'compound', })); expect(r2.found).toEqual(word); - expect(r2.forbidden).toBe(false); + expect(r2.forbidden).toBeFalsy(); }); test.each(sampleWords())('Find Word case insensitive: %s', async (word) => { @@ -94,7 +98,7 @@ describe('Validate findWord', () => { compoundMode: 'compound', }); expect(r.found).toEqual(normalizeWordToLowercase(word)); - expect(r.forbidden).toBe(false); + expect(r.forbidden).toBeFalsy(); }); test.each(sampleMisspellings())(`Check misspelled words: %s`, async (word) => { @@ -113,7 +117,7 @@ describe('Validate findWord', () => { compoundMode: 'compound', })); expect(r2.found).toEqual(false); - expect(r2.forbidden).toBe(false); + expect(r2.forbidden).toBeFalsy(); }); test.each(sampleMisspellings())(`Check misspelled words case insensitive: %s`, async (word) => { @@ -123,7 +127,7 @@ describe('Validate findWord', () => { compoundMode: 'compound', }); expect(r.found).toEqual(false); - expect(r.forbidden).toBe(false); + expect(r.forbidden).toBeFalsy(); }); }); @@ -192,14 +196,14 @@ function processText(text: string): string[] { } function testCompound(word: string, found = true): [string, PartialFindOptions, FindFullResult] { - return [word, { matchCase: true, compoundMode: 'compound' }, frCompoundFound(found && word)]; + return [word, { matchCase: true, compoundMode: 'compound' }, frCompoundFound(found && word, { forbidden: false })]; } type PartialFindFullResult = Partial; function fr({ found = false, - forbidden = false, + forbidden = undefined, compoundUsed = false, caseMatched = true, }: PartialFindFullResult): FindFullResult { diff --git a/packages/cspell-trie-lib/src/lib/find.test.ts b/packages/cspell-trie-lib/src/lib/find.test.ts index c1480343c33..10db976b50b 100644 --- a/packages/cspell-trie-lib/src/lib/find.test.ts +++ b/packages/cspell-trie-lib/src/lib/find.test.ts @@ -14,37 +14,36 @@ const findLegacyCompoundWord = __testing__.findLegacyCompoundWord; describe('Validate findWord', () => { const trie = dictionary().root; - test('find exact words preserve case', () => { + // cspell:ignore bluemsg + test.each` + word | opts | expected + ${'blueerror'} | ${{ matchCase: true, compoundMode: 'none' }} | ${{ found: false, compoundUsed: false, forbidden: true, caseMatched: true }} + ${'blueerror'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: false, compoundUsed: true, forbidden: undefined, caseMatched: true }} + ${'blueCode'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: 'blueCode', compoundUsed: true, forbidden: true, caseMatched: true }} + ${'bluecode'} | ${{ matchCase: false, compoundMode: 'compound' }} | ${{ found: 'bluecode', compoundUsed: true, forbidden: true, caseMatched: false }} + ${'bluemsg'} | ${{ matchCase: false, compoundMode: 'compound' }} | ${{ found: 'bluemsg', compoundUsed: true, forbidden: false, caseMatched: true }} + ${'code'} | ${{ matchCase: true, compoundMode: 'none' }} | ${{ found: 'code', compoundUsed: false, forbidden: false, caseMatched: true }} + ${'code'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: 'code', compoundUsed: false, forbidden: undefined, caseMatched: true }} + `('find exact words preserve case "$word" $opts', ({ word, opts, expected }) => { // Code is not allowed as a full word. - expect( - findWord(trie, 'blueerror', { - matchCase: true, - compoundMode: 'none', - }) - ).toEqual({ found: 'blueerror', compoundUsed: false, forbidden: true, caseMatched: true }); - - expect(findWord(trie, 'code', { matchCase: true, compoundMode: 'none' })).toEqual({ - found: 'code', - compoundUsed: false, - forbidden: false, - caseMatched: true, - }); - - expect( - findWord(trie, 'code', { - matchCase: true, - compoundMode: 'compound', - }) - ).toEqual(expect.objectContaining({ found: 'code', compoundUsed: false, forbidden: false, caseMatched: true })); + expect(findWord(trie, word, opts)).toEqual(expected); }); const tests: [string, PartialFindOptions, FindFullResult][] = [ - ['errorCodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorCodes')], - ['errorcodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorcodes')], - ['Code', { matchCase: true, compoundMode: 'none' }, frNotFound()], - ['code', { matchCase: true, compoundMode: 'none' }, frFound('code')], - ['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound()], - ['café', { matchCase: true, compoundMode: 'none' }, frFound('café')], + [ + 'errorCodes', + { matchCase: false, compoundMode: 'compound' }, + frCompoundFound('errorCodes', { forbidden: false }), + ], + [ + 'errorcodes', + { matchCase: false, compoundMode: 'compound' }, + frCompoundFound('errorcodes', { forbidden: false, caseMatched: false }), + ], + ['Code', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })], + ['code', { matchCase: true, compoundMode: 'none' }, frFound('code', { forbidden: false })], + ['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })], + ['café', { matchCase: true, compoundMode: 'none' }, frFound('café', { forbidden: false })], // non-normalized words ['café', { matchCase: false, compoundMode: 'none' }, frFound('café')], @@ -53,10 +52,10 @@ describe('Validate findWord', () => { ['Code', { matchCase: false, compoundMode: 'none' }, frNotFound()], // It will find the special characters. Might not be desired. - ['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+')], - ['+Code+', { matchCase: true, compoundMode: 'none' }, frFound('+Code+')], - ['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+')], - ['~+code+', { matchCase: true, compoundMode: 'none' }, frFound('~+code+')], + ['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+', { forbidden: false })], + ['+Code+', { matchCase: true, compoundMode: 'none' }, frFound('+Code+', { forbidden: false })], + ['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+', { forbidden: false })], + ['~+code+', { matchCase: true, compoundMode: 'none' }, frFound('~+code+', { forbidden: false })], // Compounding enabled, but matching whole words (compounding not used). ['Code', { matchCase: true, compoundMode: 'compound' }, frFound(false)], @@ -64,15 +63,27 @@ describe('Validate findWord', () => { ['cafe', { matchCase: true, compoundMode: 'compound' }, frFound(false)], ['cafe', { matchCase: false, compoundMode: 'compound' }, frFound('cafe', { caseMatched: false })], - ['errorCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound('errorCodes')], - ['errorCodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorCodes')], + [ + 'errorCodes', + { matchCase: true, compoundMode: 'compound' }, + frCompoundFound('errorCodes', { forbidden: false }), + ], + [ + 'errorCodes', + { matchCase: false, compoundMode: 'compound' }, + frCompoundFound('errorCodes', { forbidden: false }), + ], ['errorsCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)], ['errorsCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)], - ['codeErrors', { matchCase: true, compoundMode: 'compound' }, frCompoundFound('codeErrors')], + [ + 'codeErrors', + { matchCase: true, compoundMode: 'compound' }, + frCompoundFound('codeErrors', { forbidden: false }), + ], [ 'codeCodeCodeCodeError', { matchCase: true, compoundMode: 'compound' }, - frCompoundFound('codeCodeCodeCodeError'), + frCompoundFound('codeCodeCodeCodeError', { forbidden: false }), ], // Legacy compounding @@ -82,14 +93,14 @@ describe('Validate findWord', () => { ['cafe', { matchCase: true, compoundMode: 'legacy' }, frFound(false)], ['cafe', { matchCase: false, compoundMode: 'legacy' }, frFound('cafe', { caseMatched: false })], ['codeErrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound(false)], - ['errmsg', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('errmsg')], - ['errmsgerr', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('errmsgerr')], - ['code+Errors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code+Errors')], - ['codeerrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('codeerrors')], + ['errmsg', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('err+msg')], + ['errmsgerr', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('err+msg+err')], + ['code+Errors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code++Errors')], + ['codeerrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code+errors')], ]; test.each(tests)('%s %j %j', (word, options, exResult) => { - expect(findWord(trie, word, options)).toEqual(expect.objectContaining(exResult)); + expect(findWord(trie, word, options)).toEqual(oc(exResult)); }); }); @@ -149,7 +160,7 @@ type PartialFindFullResult = Partial; function fr({ found = false, - forbidden = false, + forbidden = undefined, compoundUsed = false, caseMatched = true, }: PartialFindFullResult): FindFullResult { @@ -178,7 +189,7 @@ function frCompoundFound(found: string | false, r: PartialFindFullResult = {}): return frFound(found, { ...r, compoundUsed }); } -// cspell:ignore blueerror +// cspell:ignore blueerror bluecode function dictionary(): Trie { // camel case dictionary return parseDictionary(` @@ -197,6 +208,8 @@ function dictionary(): Trie { *msg* blue* !blueerror + ~!bluecode + !blueCode `); } @@ -282,3 +295,7 @@ const sampleWords = [ 'joyrode', 'joystick', ]; + +function oc(t: Partial): T { + return expect.objectContaining(t); +} diff --git a/packages/cspell-trie-lib/src/lib/find.ts b/packages/cspell-trie-lib/src/lib/find.ts index 88472655c63..95f18591819 100644 --- a/packages/cspell-trie-lib/src/lib/find.ts +++ b/packages/cspell-trie-lib/src/lib/find.ts @@ -2,7 +2,23 @@ import { TrieNode, FLAG_WORD } from './TrieNode'; import { mergeDefaults } from './util'; import { FORBID_PREFIX, COMPOUND_FIX, CASE_INSENSITIVE_PREFIX } from './constants'; -export type CompoundModes = 'none' | 'compound' | 'legacy'; +/** + * No compounding allowed. + */ +export type CompoundModeNone = 'none'; + +/** + * Allow natural compounding in the dictionary + * using compound prefixes + */ +export type CompoundModesCompound = 'compound'; + +/** + * Allow all possible compounds -- Very slow. + */ +export type CompoundModesLegacy = 'legacy'; + +export type CompoundModes = CompoundModeNone | CompoundModesCompound | CompoundModesLegacy; const defaultLegacyMinCompoundLength = 3; export interface FindOptions { @@ -27,7 +43,13 @@ export interface FindResult { } export interface FindFullResult extends FindResult { - forbidden: boolean; + /** + * Is the word explicitly forbidden. + * - `true` - word is in the forbidden list. + * - `false` - word is not in the forbidden list. + * - `undefined` - unknown - was not checked. + * */ + forbidden: boolean | undefined; } export interface FindFullNodeResult extends FindNodeResult, FindFullResult {} @@ -88,28 +110,23 @@ function _findWordNode(root: TrieNode, word: string, options: FindOptions): Find function __findCompound(): FindFullNodeResult { const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix); - let forbidden = false; + const result: FindFullNodeResult = { ...f }; if (f.found !== false && f.compoundUsed) { // If case was ignored when searching for the word, then check the forbidden // in the ignore case forbidden list. const r = !f.caseMatched ? walk(root, options.caseInsensitivePrefix) : root; - forbidden = isForbiddenWord(r, word, options.forbidPrefix); + result.forbidden = isForbiddenWord(r, word, options.forbidPrefix); } - const result: FindFullNodeResult = { ...f, forbidden }; return result; } function __findExact(): FindFullNodeResult { const n = walk(root, word); const isFound = isEndOfWordNode(n); - let forbidden = false; - if (!isFound) { - forbidden = isForbiddenWord(root, word, options.forbidPrefix); - } const result: FindFullNodeResult = { - found: (isFound || forbidden) && word, + found: isFound && word, compoundUsed: false, - forbidden, + forbidden: isForbiddenWord(root, word, options.forbidPrefix), node: n, caseMatched: true, }; @@ -131,10 +148,7 @@ export function findLegacyCompound(root: TrieNode, word: string, options: FindOp if (!options.matchCase) { roots.push(walk(root, options.caseInsensitivePrefix)); } - const f = findLegacyCompoundNode(roots, word, options.legacyMinCompoundLength); - const forbidden = false; - const result: FindFullNodeResult = { ...f, forbidden }; - return result; + return findLegacyCompoundNode(roots, word, options.legacyMinCompoundLength); } interface FindCompoundChain { @@ -165,7 +179,7 @@ export function findCompoundNode( for (i = 0; i < prefix.length && r; ++i) { r = r.c?.get(prefix[i]); } - const caseMatched = s.caseMatched && prefix !== ignoreCasePrefix; + const caseMatched = s.caseMatched && prefix[0] !== ignoreCasePrefix; return { n: s.n, compoundPrefix: prefix === compoundPrefix ? possibleCompoundPrefix : '', @@ -220,7 +234,7 @@ export function findCompoundNode( } const found = (i && i === word.length && word) || false; - const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: false, caseMatched }; + const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: undefined, caseMatched }; return result; } @@ -238,9 +252,9 @@ function findCompoundWord( ); // Was it a word? if (!node || !node.f) { - return { found: false, compoundUsed, node, forbidden: false, caseMatched }; + return { found: false, compoundUsed, node, forbidden: undefined, caseMatched }; } - return { found, compoundUsed, node, forbidden: false, caseMatched }; + return { found, compoundUsed, node, forbidden: undefined, caseMatched }; } export function findWordExact(root: TrieNode | undefined, word: string): boolean { @@ -337,8 +351,25 @@ function findLegacyCompoundNode( } } - const found = (i && i === word.length && word) || false; - const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: false, caseMatched }; + function extractWord(): string | false { + if (!word || i < word.length) return false; + + const letters: string[] = []; + + let subLen = 0; + for (let j = 0; j < i; ++j) { + const { subLength } = stack[j]; + if (subLength < subLen) { + letters.push('+'); + } + letters.push(word[j]); + subLen = subLength; + } + return letters.join(''); + } + + const found = extractWord(); + const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: undefined, caseMatched }; return result; }