Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: clean up find results #1550

Merged
merged 2 commits into from Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 15 additions & 11 deletions packages/cspell-trie-lib/src/lib/find.dutch.test.ts
Expand Up @@ -22,7 +22,7 @@ describe('Validate findWord', () => {
matchCase: true,
compoundMode: 'none',
})
).toEqual(frFound('aanvaardbaard', { forbidden: true }));
).toEqual({ ...frFound(false), forbidden: true });

expect(findWord(trie, 'code', { matchCase: true, compoundMode: 'none' })).toEqual({
found: 'code',
Expand All @@ -40,10 +40,14 @@ describe('Validate findWord', () => {
});

const tests: [string, PartialFindOptions, FindFullResult][] = [
['Code', { matchCase: true, compoundMode: 'none' }, frNotFound()],
['code', { matchCase: true, compoundMode: 'none' }, frFound('code')],
['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound()],
['cafe', { matchCase: false, compoundMode: 'none' }, frFound('cafe', { caseMatched: false })],
['Code', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })],
['code', { matchCase: true, compoundMode: 'none' }, frFound('code', { forbidden: false })],
['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })],
[
'cafe',
{ matchCase: false, compoundMode: 'none' },
frFound('cafe', { caseMatched: false, forbidden: undefined }),
],

// Compounding enabled, but matching whole words (compounding not used).
['Code', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)],
Expand Down Expand Up @@ -84,7 +88,7 @@ describe('Validate findWord', () => {
compoundMode: 'compound',
}));
expect(r2.found).toEqual(word);
expect(r2.forbidden).toBe(false);
expect(r2.forbidden).toBeFalsy();
});

test.each(sampleWords())('Find Word case insensitive: %s', async (word) => {
Expand All @@ -94,7 +98,7 @@ describe('Validate findWord', () => {
compoundMode: 'compound',
});
expect(r.found).toEqual(normalizeWordToLowercase(word));
expect(r.forbidden).toBe(false);
expect(r.forbidden).toBeFalsy();
});

test.each(sampleMisspellings())(`Check misspelled words: %s`, async (word) => {
Expand All @@ -113,7 +117,7 @@ describe('Validate findWord', () => {
compoundMode: 'compound',
}));
expect(r2.found).toEqual(false);
expect(r2.forbidden).toBe(false);
expect(r2.forbidden).toBeFalsy();
});

test.each(sampleMisspellings())(`Check misspelled words case insensitive: %s`, async (word) => {
Expand All @@ -123,7 +127,7 @@ describe('Validate findWord', () => {
compoundMode: 'compound',
});
expect(r.found).toEqual(false);
expect(r.forbidden).toBe(false);
expect(r.forbidden).toBeFalsy();
});
});

Expand Down Expand Up @@ -192,14 +196,14 @@ function processText(text: string): string[] {
}

function testCompound(word: string, found = true): [string, PartialFindOptions, FindFullResult] {
return [word, { matchCase: true, compoundMode: 'compound' }, frCompoundFound(found && word)];
return [word, { matchCase: true, compoundMode: 'compound' }, frCompoundFound(found && word, { forbidden: false })];
}

type PartialFindFullResult = Partial<FindFullResult>;

function fr({
found = false,
forbidden = false,
forbidden = undefined,
compoundUsed = false,
caseMatched = true,
}: PartialFindFullResult): FindFullResult {
Expand Down
101 changes: 59 additions & 42 deletions packages/cspell-trie-lib/src/lib/find.test.ts
Expand Up @@ -14,37 +14,36 @@ const findLegacyCompoundWord = __testing__.findLegacyCompoundWord;
describe('Validate findWord', () => {
const trie = dictionary().root;

test('find exact words preserve case', () => {
// cspell:ignore bluemsg
test.each`
word | opts | expected
${'blueerror'} | ${{ matchCase: true, compoundMode: 'none' }} | ${{ found: false, compoundUsed: false, forbidden: true, caseMatched: true }}
${'blueerror'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: false, compoundUsed: true, forbidden: undefined, caseMatched: true }}
${'blueCode'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: 'blueCode', compoundUsed: true, forbidden: true, caseMatched: true }}
${'bluecode'} | ${{ matchCase: false, compoundMode: 'compound' }} | ${{ found: 'bluecode', compoundUsed: true, forbidden: true, caseMatched: false }}
${'bluemsg'} | ${{ matchCase: false, compoundMode: 'compound' }} | ${{ found: 'bluemsg', compoundUsed: true, forbidden: false, caseMatched: true }}
${'code'} | ${{ matchCase: true, compoundMode: 'none' }} | ${{ found: 'code', compoundUsed: false, forbidden: false, caseMatched: true }}
${'code'} | ${{ matchCase: true, compoundMode: 'compound' }} | ${{ found: 'code', compoundUsed: false, forbidden: undefined, caseMatched: true }}
`('find exact words preserve case "$word" $opts', ({ word, opts, expected }) => {
// Code is not allowed as a full word.
expect(
findWord(trie, 'blueerror', {
matchCase: true,
compoundMode: 'none',
})
).toEqual({ found: 'blueerror', compoundUsed: false, forbidden: true, caseMatched: true });

expect(findWord(trie, 'code', { matchCase: true, compoundMode: 'none' })).toEqual({
found: 'code',
compoundUsed: false,
forbidden: false,
caseMatched: true,
});

expect(
findWord(trie, 'code', {
matchCase: true,
compoundMode: 'compound',
})
).toEqual(expect.objectContaining({ found: 'code', compoundUsed: false, forbidden: false, caseMatched: true }));
expect(findWord(trie, word, opts)).toEqual(expected);
});

const tests: [string, PartialFindOptions, FindFullResult][] = [
['errorCodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorCodes')],
['errorcodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorcodes')],
['Code', { matchCase: true, compoundMode: 'none' }, frNotFound()],
['code', { matchCase: true, compoundMode: 'none' }, frFound('code')],
['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound()],
['café', { matchCase: true, compoundMode: 'none' }, frFound('café')],
[
'errorCodes',
{ matchCase: false, compoundMode: 'compound' },
frCompoundFound('errorCodes', { forbidden: false }),
],
[
'errorcodes',
{ matchCase: false, compoundMode: 'compound' },
frCompoundFound('errorcodes', { forbidden: false, caseMatched: false }),
],
['Code', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })],
['code', { matchCase: true, compoundMode: 'none' }, frFound('code', { forbidden: false })],
['cafe', { matchCase: true, compoundMode: 'none' }, frNotFound({ forbidden: false })],
['café', { matchCase: true, compoundMode: 'none' }, frFound('café', { forbidden: false })],

// non-normalized words
['café', { matchCase: false, compoundMode: 'none' }, frFound('café')],
Expand All @@ -53,26 +52,38 @@ describe('Validate findWord', () => {
['Code', { matchCase: false, compoundMode: 'none' }, frNotFound()],

// It will find the special characters. Might not be desired.
['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+')],
['+Code+', { matchCase: true, compoundMode: 'none' }, frFound('+Code+')],
['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+')],
['~+code+', { matchCase: true, compoundMode: 'none' }, frFound('~+code+')],
['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+', { forbidden: false })],
['+Code+', { matchCase: true, compoundMode: 'none' }, frFound('+Code+', { forbidden: false })],
['code+', { matchCase: true, compoundMode: 'none' }, frFound('code+', { forbidden: false })],
['~+code+', { matchCase: true, compoundMode: 'none' }, frFound('~+code+', { forbidden: false })],

// Compounding enabled, but matching whole words (compounding not used).
['Code', { matchCase: true, compoundMode: 'compound' }, frFound(false)],
['code', { matchCase: true, compoundMode: 'compound' }, frFound('code')],
['cafe', { matchCase: true, compoundMode: 'compound' }, frFound(false)],
['cafe', { matchCase: false, compoundMode: 'compound' }, frFound('cafe', { caseMatched: false })],

['errorCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound('errorCodes')],
['errorCodes', { matchCase: false, compoundMode: 'compound' }, frCompoundFound('errorCodes')],
[
'errorCodes',
{ matchCase: true, compoundMode: 'compound' },
frCompoundFound('errorCodes', { forbidden: false }),
],
[
'errorCodes',
{ matchCase: false, compoundMode: 'compound' },
frCompoundFound('errorCodes', { forbidden: false }),
],
['errorsCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)],
['errorsCodes', { matchCase: true, compoundMode: 'compound' }, frCompoundFound(false)],
['codeErrors', { matchCase: true, compoundMode: 'compound' }, frCompoundFound('codeErrors')],
[
'codeErrors',
{ matchCase: true, compoundMode: 'compound' },
frCompoundFound('codeErrors', { forbidden: false }),
],
[
'codeCodeCodeCodeError',
{ matchCase: true, compoundMode: 'compound' },
frCompoundFound('codeCodeCodeCodeError'),
frCompoundFound('codeCodeCodeCodeError', { forbidden: false }),
],

// Legacy compounding
Expand All @@ -82,14 +93,14 @@ describe('Validate findWord', () => {
['cafe', { matchCase: true, compoundMode: 'legacy' }, frFound(false)],
['cafe', { matchCase: false, compoundMode: 'legacy' }, frFound('cafe', { caseMatched: false })],
['codeErrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound(false)],
['errmsg', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('errmsg')],
['errmsgerr', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('errmsgerr')],
['code+Errors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code+Errors')],
['codeerrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('codeerrors')],
['errmsg', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('err+msg')],
['errmsgerr', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('err+msg+err')],
['code+Errors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code++Errors')],
['codeerrors', { matchCase: true, compoundMode: 'legacy' }, frCompoundFound('code+errors')],
];

test.each(tests)('%s %j %j', (word, options, exResult) => {
expect(findWord(trie, word, options)).toEqual(expect.objectContaining(exResult));
expect(findWord(trie, word, options)).toEqual(oc(exResult));
});
});

Expand Down Expand Up @@ -149,7 +160,7 @@ type PartialFindFullResult = Partial<FindFullResult>;

function fr({
found = false,
forbidden = false,
forbidden = undefined,
compoundUsed = false,
caseMatched = true,
}: PartialFindFullResult): FindFullResult {
Expand Down Expand Up @@ -178,7 +189,7 @@ function frCompoundFound(found: string | false, r: PartialFindFullResult = {}):
return frFound(found, { ...r, compoundUsed });
}

// cspell:ignore blueerror
// cspell:ignore blueerror bluecode
function dictionary(): Trie {
// camel case dictionary
return parseDictionary(`
Expand All @@ -197,6 +208,8 @@ function dictionary(): Trie {
*msg*
blue*
!blueerror
~!bluecode
!blueCode
`);
}

Expand Down Expand Up @@ -282,3 +295,7 @@ const sampleWords = [
'joyrode',
'joystick',
];

function oc<T>(t: Partial<T>): T {
return expect.objectContaining(t);
}
73 changes: 52 additions & 21 deletions packages/cspell-trie-lib/src/lib/find.ts
Expand Up @@ -2,7 +2,23 @@ import { TrieNode, FLAG_WORD } from './TrieNode';
import { mergeDefaults } from './util';
import { FORBID_PREFIX, COMPOUND_FIX, CASE_INSENSITIVE_PREFIX } from './constants';

export type CompoundModes = 'none' | 'compound' | 'legacy';
/**
* No compounding allowed.
*/
export type CompoundModeNone = 'none';

/**
* Allow natural compounding in the dictionary
* using compound prefixes
*/
export type CompoundModesCompound = 'compound';

/**
* Allow all possible compounds -- Very slow.
*/
export type CompoundModesLegacy = 'legacy';

export type CompoundModes = CompoundModeNone | CompoundModesCompound | CompoundModesLegacy;

const defaultLegacyMinCompoundLength = 3;
export interface FindOptions {
Expand All @@ -27,7 +43,13 @@ export interface FindResult {
}

export interface FindFullResult extends FindResult {
forbidden: boolean;
/**
* Is the word explicitly forbidden.
* - `true` - word is in the forbidden list.
* - `false` - word is not in the forbidden list.
* - `undefined` - unknown - was not checked.
* */
forbidden: boolean | undefined;
}

export interface FindFullNodeResult extends FindNodeResult, FindFullResult {}
Expand Down Expand Up @@ -88,28 +110,23 @@ function _findWordNode(root: TrieNode, word: string, options: FindOptions): Find

function __findCompound(): FindFullNodeResult {
const f = findCompoundWord(root, word, compoundPrefix, ignoreCasePrefix);
let forbidden = false;
const result: FindFullNodeResult = { ...f };
if (f.found !== false && f.compoundUsed) {
// If case was ignored when searching for the word, then check the forbidden
// in the ignore case forbidden list.
const r = !f.caseMatched ? walk(root, options.caseInsensitivePrefix) : root;
forbidden = isForbiddenWord(r, word, options.forbidPrefix);
result.forbidden = isForbiddenWord(r, word, options.forbidPrefix);
}
const result: FindFullNodeResult = { ...f, forbidden };
return result;
}

function __findExact(): FindFullNodeResult {
const n = walk(root, word);
const isFound = isEndOfWordNode(n);
let forbidden = false;
if (!isFound) {
forbidden = isForbiddenWord(root, word, options.forbidPrefix);
}
const result: FindFullNodeResult = {
found: (isFound || forbidden) && word,
found: isFound && word,
compoundUsed: false,
forbidden,
forbidden: isForbiddenWord(root, word, options.forbidPrefix),
node: n,
caseMatched: true,
};
Expand All @@ -131,10 +148,7 @@ export function findLegacyCompound(root: TrieNode, word: string, options: FindOp
if (!options.matchCase) {
roots.push(walk(root, options.caseInsensitivePrefix));
}
const f = findLegacyCompoundNode(roots, word, options.legacyMinCompoundLength);
const forbidden = false;
const result: FindFullNodeResult = { ...f, forbidden };
return result;
return findLegacyCompoundNode(roots, word, options.legacyMinCompoundLength);
}

interface FindCompoundChain {
Expand Down Expand Up @@ -165,7 +179,7 @@ export function findCompoundNode(
for (i = 0; i < prefix.length && r; ++i) {
r = r.c?.get(prefix[i]);
}
const caseMatched = s.caseMatched && prefix !== ignoreCasePrefix;
const caseMatched = s.caseMatched && prefix[0] !== ignoreCasePrefix;
return {
n: s.n,
compoundPrefix: prefix === compoundPrefix ? possibleCompoundPrefix : '',
Expand Down Expand Up @@ -220,7 +234,7 @@ export function findCompoundNode(
}

const found = (i && i === word.length && word) || false;
const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: false, caseMatched };
const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: undefined, caseMatched };
return result;
}

Expand All @@ -238,9 +252,9 @@ function findCompoundWord(
);
// Was it a word?
if (!node || !node.f) {
return { found: false, compoundUsed, node, forbidden: false, caseMatched };
return { found: false, compoundUsed, node, forbidden: undefined, caseMatched };
}
return { found, compoundUsed, node, forbidden: false, caseMatched };
return { found, compoundUsed, node, forbidden: undefined, caseMatched };
}

export function findWordExact(root: TrieNode | undefined, word: string): boolean {
Expand Down Expand Up @@ -337,8 +351,25 @@ function findLegacyCompoundNode(
}
}

const found = (i && i === word.length && word) || false;
const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: false, caseMatched };
function extractWord(): string | false {
if (!word || i < word.length) return false;

const letters: string[] = [];

let subLen = 0;
for (let j = 0; j < i; ++j) {
const { subLength } = stack[j];
if (subLength < subLen) {
letters.push('+');
}
letters.push(word[j]);
subLen = subLength;
}
return letters.join('');
}

const found = extractWord();
const result: FindFullNodeResult = { found, compoundUsed, node, forbidden: undefined, caseMatched };
return result;
}

Expand Down