Skip to content

Commit

Permalink
fix: Fix suggestions when working with case aware dictionaries. (#1674)
Browse files Browse the repository at this point in the history
* fix: Be able to filter suggestions as part of the search process.

* fix: Update the walker to understand compound Tries with case insensitive

* dev: add isDefined filter function

* dev: clean up trie to use the collector's filter function.

* refactor: refactor the collector for consistency.

* Update launch.json

* fix: adjust to the renamed options and better suggestions.
  • Loading branch information
Jason3S committed Sep 11, 2021
1 parent 3bd5ca7 commit 0ba056d
Show file tree
Hide file tree
Showing 16 changed files with 312 additions and 167 deletions.
15 changes: 11 additions & 4 deletions packages/cspell-trie-lib/.vscode/launch.json
Expand Up @@ -4,12 +4,16 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
{
"type": "node",
"request": "launch",
"name": "trie-lib: Jest current-file",
"program": "${workspaceFolder}/../../node_modules/.bin/jest",
"args": ["--runInBand", "${file}"],
"cwd": "${workspaceFolder}",
"args": [
"--runInBand",
"${fileBasename}"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"disableOptimisticBPs": true,
Expand All @@ -22,7 +26,10 @@
"request": "launch",
"name": "trie-lib: Jest All",
"program": "${workspaceFolder}/../../node_modules/.bin/jest",
"args": ["--runInBand"],
"cwd": "${workspaceFolder}",
"args": [
"--runInBand"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"disableOptimisticBPs": true,
Expand All @@ -31,4 +38,4 @@
}
}
]
}
}
35 changes: 30 additions & 5 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts
Expand Up @@ -75,9 +75,34 @@ describe('Validate SimpleDictionaryParser', () => {
expect(result).toEqual(words);
});

function toL(a: string): string {
return a.toLowerCase();
}
// cspell:ignore begin* *middle* *end
const bme1 = [
'beginmiddleend',
'beginmiddleEnd',
'beginMiddleend',
'Beginmiddleend',
'beginMiddleEnd',
'BeginmiddleEnd',
'BeginMiddleend',
'BeginMiddleEnd',
'beginend',
'beginEnd',
'Beginend',
];

const bme2 = [
'BeginmiddleEnd',
'beginmiddleEnd',
'Beginmiddleend',
'BeginMiddleEnd',
'beginmiddleend',
'beginMiddleEnd',
'BeginMiddleend',
'beginMiddleend',
'Beginend',
'beginend',
'BeginEnd',
];

// cspell:ignore Midle beginmidleend gescháft cafë resumé
test.each`
Expand All @@ -86,8 +111,8 @@ describe('Validate SimpleDictionaryParser', () => {
${'begin'} | ${false} | ${false} | ${['Begin']}
${'BeginEn'} | ${false} | ${false} | ${['BeginEnd', 'Begin']}
${'BeginMidleEnd'} | ${false} | ${false} | ${['BeginMiddleEnd', 'BeginEnd']}
${'beginmidleend'} | ${true} | ${false} | ${[toL('BeginMiddleEnd'), 'BeginMiddleEnd', toL('BeginEnd'), 'BeginEnd']}
${'BeginmidleEnd'} | ${true} | ${false} | ${['BeginMiddleEnd', toL('BeginMiddleEnd'), toL('BeginEnd'), 'BeginEnd']}
${'beginmidleend'} | ${true} | ${false} | ${bme1}
${'BeginmidleEnd'} | ${true} | ${false} | ${bme2}
${'cafe'} | ${false} | ${false} | ${['Café']}
${'cafe'} | ${true} | ${true} | ${['cafe', 'café', 'Café']}
${'cafë'} | ${true} | ${false} | ${['cafe', 'café', 'Café']}
Expand Down
8 changes: 5 additions & 3 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts
Expand Up @@ -18,17 +18,19 @@ export interface ParseDictionaryOptions {
caseInsensitivePrefix: string;
/**
* Start of a single-line comment.
* @default "#"
*/
commentCharacter: string;

/**
* if word starts with prefix, do not strip case or accents.
* Prefix is not stored.
* If word starts with prefix, do not strip case or accents.
* @default false;
*/
keepExactPrefix: string;

/**
* Tell the parser to NOT automatically strip case and accents.
* Tell the parser to automatically strip case and accents.
* @default true
*/
stripCaseAndAccents: boolean;
}
Expand Down
38 changes: 35 additions & 3 deletions packages/cspell-trie-lib/src/lib/genSuggestionsOptions.ts
Expand Up @@ -6,15 +6,17 @@ export interface GenSuggestionOptionsStrict {
* @default CompoundWordsMethod.NONE
*/
compoundMethod?: CompoundWordsMethod;

/**
* ignore case when searching.
*/
ignoreCase: boolean;

/**
* Maximum number of "edits" allowed.
* 3 is a good number. Above 5 can be very slow.
*/
maxNumChanges: number;
changeLimit: number;
}

export type GenSuggestionOptions = Partial<GenSuggestionOptionsStrict>;
Expand All @@ -24,23 +26,31 @@ export interface SuggestionOptionsStrict extends GenSuggestionOptionsStrict {
* Maximum number of suggestions to make.
*/
numSuggestions: number;

/**
* Allow ties when making suggestions.
* if `true` it is possible to have more than `numSuggestions`.
*/
allowTies: boolean;

/**
* Time alloted in milliseconds to generate suggestions.
*/
timeout: number;

/**
* Optional filter function.
* return true to keep the candidate.
*/
filter?: (word: string, cost: number) => boolean;
}

export type SuggestionOptions = Partial<SuggestionOptionsStrict>;

export const defaultGenSuggestionOptions: GenSuggestionOptionsStrict = {
compoundMethod: CompoundWordsMethod.NONE,
ignoreCase: true,
maxNumChanges: 5,
changeLimit: 5,
};

export const defaultSuggestionOptions: SuggestionOptionsStrict = {
Expand All @@ -50,14 +60,36 @@ export const defaultSuggestionOptions: SuggestionOptionsStrict = {
timeout: 5000,
};

type KeyMapOfGenSuggestionOptionsStrict = {
[K in keyof GenSuggestionOptionsStrict]: K;
};

type KeyMapOfSuggestionOptionsStrict = {
[K in keyof SuggestionOptionsStrict]: K;
};

const keyMapOfGenSuggestionOptionsStrict: KeyMapOfGenSuggestionOptionsStrict = {
changeLimit: 'changeLimit',
compoundMethod: 'compoundMethod',
ignoreCase: 'ignoreCase',
} as const;

const keyMapOfSuggestionOptionsStrict: KeyMapOfSuggestionOptionsStrict = {
...keyMapOfGenSuggestionOptionsStrict,
allowTies: 'allowTies',
filter: 'filter',
numSuggestions: 'numSuggestions',
timeout: 'timeout',
};

/**
* Create suggestion options using composition.
* @param opts - partial options.
* @returns Options - with defaults.
*/
export function createSuggestionOptions(...opts: SuggestionOptions[]): SuggestionOptionsStrict {
const options = { ...defaultSuggestionOptions };
const keys = Object.keys(defaultSuggestionOptions) as (keyof SuggestionOptions)[];
const keys = Object.keys(keyMapOfSuggestionOptionsStrict) as (keyof SuggestionOptions)[];
for (const opt of opts) {
for (const key of keys) {
assign(options, opt, key);
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/suggest-en-a-star.test.ts
Expand Up @@ -219,11 +219,11 @@ describe('Validate English Suggestions', () => {
});

function sugGenOptsFromCollector(collector: SuggestionCollector, compoundMethod?: CompoundWordsMethod) {
const { ignoreCase, maxNumChanges } = collector;
const { ignoreCase, changeLimit } = collector;
const ops: GenSuggestionOptionsStrict = {
compoundMethod,
ignoreCase,
maxNumChanges,
changeLimit,
};
return ops;
}
Expand Down
49 changes: 22 additions & 27 deletions packages/cspell-trie-lib/src/lib/suggest-nl.test.ts
Expand Up @@ -22,35 +22,22 @@ describe('Validate Dutch Suggestions', () => {
);

// cspell:ignore burtbewoners burgbewoners
test(
'Tests suggestions "burtbewoners"',
async () => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('burtbewoners', { numSuggestions: 5 });
const suggestions = results.map((s) => s.word);
expect(suggestions).toEqual(expect.arrayContaining(['buurtbewoners', 'burgbewoners']));
},
timeout
);

// cspell:ignore buurtbwoners
test(
'Tests suggestions "buurtbwoners"',
async () => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('buurtbwoners', { numSuggestions: 1 });
const suggestions = results.map((s) => s.word);
expect(suggestions).toEqual(expect.arrayContaining(['buurtbewoners']));
},
timeout
);
// cspell:ignore buurtbwoners buurtbewoner

test(
'Tests suggestions "buurtbewoners" with cost',
async () => {
test.each`
word | numSuggestions | expected
${'Mexico-Stad'} | ${2} | ${[sr('Mexico-Stad', 0), sr('mexico-stad', 2)]}
${'mexico-stad'} | ${2} | ${[sr('mexico-stad', 0), sr('Mexico-Stad', 2)]}
${'buurtbewoners'} | ${3} | ${[sr('buurtbewoners', 0), sr('buurtbewoners-', 86), sr('buurtbewoner', 88)]}
${'burtbewoners'} | ${2} | ${ac(sr('burgbewoners', 96), sr('buurtbewoners', 97))}
${'buurtbwoners'} | ${1} | ${[sr('buurtbewoners', 93)]}
${'buurtbewoners'} | ${1} | ${[sr('buurtbewoners', 0)]}
`(
'Tests suggestions $word',
async ({ word, numSuggestions, expected }) => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('buurtbewoners', { numSuggestions: 1 });
expect(results).toEqual([{ word: 'buurtbewoners', cost: 0 }]);
const results = trie.suggestWithCost(word, { numSuggestions });
expect(results).toEqual(expected);
},
timeout
);
Expand All @@ -67,4 +54,12 @@ describe('Validate Dutch Suggestions', () => {
},
timeout
);

function ac<T>(...args: T[]): T[] {
return expect.arrayContaining(args);
}

function sr(word: string, cost: number) {
return { word, cost };
}
});
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/suggest.legacy.test.ts
Expand Up @@ -75,7 +75,7 @@ const defaultMaxNumberSuggestions = 10;
const baseCost = 100;
const swapCost = 75;
const postSwapCost = swapCost - baseCost;
const maxNumChanges = 5;
const MAX_NUM_CHANGES = 5;

function legacySuggest(
root: TrieNode,
Expand All @@ -89,7 +89,7 @@ function legacySuggest(
const x = ' ' + word;
const mx = x.length - 1;

let costLimit = Math.min((bc * word.length) / 2, bc * maxNumChanges);
let costLimit = Math.min((bc * word.length) / 2, bc * MAX_NUM_CHANGES);

function comp(a: SuggestionResult, b: SuggestionResult): number {
return a.cost - b.cost || a.word.length - b.word.length || (a.word < b.word ? -1 : 1);
Expand Down
70 changes: 59 additions & 11 deletions packages/cspell-trie-lib/src/lib/suggest.test.ts
Expand Up @@ -196,25 +196,63 @@ describe('Validate Suggest', () => {
expect(suggestions).toEqual(['walked', 'walker', 'talked']);
});

// cspell:ignore walkingtree talkingtree
test('that forbidden words are not included (collector)', () => {
function sr(word: string, cost: number) {
return { word, cost };
}

// cspell:ignore walking* *tree talking* *stick running* *pod *trick
test.each`
word | ignoreCase | numSuggestions | changeLimit | expected
${'Runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 1)]}
${'runningtree'} | ${undefined} | ${2} | ${undefined} | ${[sr('runningtree', 0), sr('Runningtree', 1)]}
${'Runningpod'} | ${undefined} | ${5} | ${1} | ${[sr('Runningpod', 0), sr('runningpod', 1), sr('RunningPod', 1), sr('runningPod', 2)]}
${'runningpod'} | ${undefined} | ${2} | ${undefined} | ${[sr('runningpod', 0), sr('runningPod', 1)]}
${'walkingstick'} | ${undefined} | ${2} | ${undefined} | ${[sr('walkingstick', 0), sr('talkingstick', 99)]}
${'walkingtree'} | ${undefined} | ${2} | ${undefined} | ${[sr('talkingtree', 99), sr('walkingstick', 359)]}
${'running'} | ${undefined} | ${2} | ${undefined} | ${[sr('running', 0), sr('Running', 1)]}
`('test suggestion results $word', ({ word, ignoreCase, numSuggestions, changeLimit, expected }) => {
const trie = parseDictionary(`
walk
Running*
walking*
*stick
talking*
*tree
+Pod
!walkingtree
`);
expect(trie.suggest('walkingstick', numSugs(1))).toEqual(['walkingstick']);
expect(trie.suggest('walkingtree', numSugs(1))).toEqual([]);
expect(trie.suggest('walking*', numSugs(1))).toEqual(['walking']);
const collector = suggestionCollector('walkingtree', sugOptsMaxNum(2));
const collector = suggestionCollector(word, sugOpts({ numSuggestions, changeLimit, ignoreCase }));
trie.genSuggestions(collector);
expect(collector.suggestions).toEqual([
{ word: 'talkingtree', cost: 99 },
{ word: 'walkingstick', cost: 359 },
]);
const r = collector.suggestions;
expect(r).toEqual(expected);
});

test.each`
word | ignoreCase | numSuggestions | changeLimit | expected
${'runningtree'} | ${undefined} | ${2} | ${3} | ${[sr('runningtree', 0), sr('Runningtree', 1)]}
${'Runningpod'} | ${undefined} | ${4} | ${1} | ${[sr('Runningpod', 0), sr('runningpod', 1), sr('RunningPod', 1), sr('runningPod', 2)]}
${'Runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 1)]}
${'runningpod'} | ${undefined} | ${4} | ${1} | ${[sr('runningpod', 0), sr('runningPod', 1), sr('Runningpod', 1), sr('RunningPod', 2)]}
${'runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 2)]}
${'walkingstick'} | ${undefined} | ${2} | ${3} | ${[sr('walkingstick', 0), sr('talkingstick', 99)]}
${'walkingtree'} | ${undefined} | ${2} | ${4} | ${[sr('talkingtree', 99), sr('walkingstick', 359)]}
${'talkingtrick'} | ${undefined} | ${2} | ${4} | ${[sr('talkingstick', 183), sr('talkingtree', 268)]}
${'running'} | ${undefined} | ${2} | ${3} | ${[sr('running', 0), sr('Running', 1)]}
${'free'} | ${undefined} | ${2} | ${2} | ${[sr('tree', 99)]}
${'stock'} | ${undefined} | ${2} | ${2} | ${[sr('stick', 97)]}
`('test suggestWithCost results $word', ({ word, ignoreCase, numSuggestions, changeLimit, expected }) => {
const trie = parseDictionary(`
walk
Running*
walking*
*stick
talking*
*tree
+Pod
!walkingtree
`);
const r = trie.suggestWithCost(word, { numSuggestions, ignoreCase, changeLimit });
expect(r).toEqual(expected);
});
});

Expand All @@ -225,7 +263,7 @@ function numSugs(numSuggestions: number): SuggestionOptions {
function sugOpts(opts: Partial<SuggestionCollectorOptions>): SuggestionCollectorOptions {
return {
...defaultOptions,
...opts,
...clean(opts),
};
}

Expand Down Expand Up @@ -282,3 +320,13 @@ const sampleWords = [
'joyrode',
'joystick',
];

function clean<T>(t: Partial<T>): Partial<T> {
const r: Partial<T> = {};
for (const k of Object.keys(t) as (keyof T)[]) {
if (t[k] !== undefined) {
r[k] = t[k];
}
}
return r;
}

0 comments on commit 0ba056d

Please sign in to comment.