Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix suggestions when working with case aware dictionaries. #1674

Merged
merged 7 commits into from Sep 11, 2021
Merged
15 changes: 11 additions & 4 deletions packages/cspell-trie-lib/.vscode/launch.json
Expand Up @@ -4,12 +4,16 @@
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
{
"type": "node",
"request": "launch",
"name": "trie-lib: Jest current-file",
"program": "${workspaceFolder}/../../node_modules/.bin/jest",
"args": ["--runInBand", "${file}"],
"cwd": "${workspaceFolder}",
"args": [
"--runInBand",
"${fileBasename}"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"disableOptimisticBPs": true,
Expand All @@ -22,7 +26,10 @@
"request": "launch",
"name": "trie-lib: Jest All",
"program": "${workspaceFolder}/../../node_modules/.bin/jest",
"args": ["--runInBand"],
"cwd": "${workspaceFolder}",
"args": [
"--runInBand"
],
"console": "integratedTerminal",
"internalConsoleOptions": "neverOpen",
"disableOptimisticBPs": true,
Expand All @@ -31,4 +38,4 @@
}
}
]
}
}
35 changes: 30 additions & 5 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.test.ts
Expand Up @@ -75,9 +75,34 @@ describe('Validate SimpleDictionaryParser', () => {
expect(result).toEqual(words);
});

function toL(a: string): string {
return a.toLowerCase();
}
// cspell:ignore begin* *middle* *end
const bme1 = [
'beginmiddleend',
'beginmiddleEnd',
'beginMiddleend',
'Beginmiddleend',
'beginMiddleEnd',
'BeginmiddleEnd',
'BeginMiddleend',
'BeginMiddleEnd',
'beginend',
'beginEnd',
'Beginend',
];

const bme2 = [
'BeginmiddleEnd',
'beginmiddleEnd',
'Beginmiddleend',
'BeginMiddleEnd',
'beginmiddleend',
'beginMiddleEnd',
'BeginMiddleend',
'beginMiddleend',
'Beginend',
'beginend',
'BeginEnd',
];

// cspell:ignore Midle beginmidleend gescháft cafë resumé
test.each`
Expand All @@ -86,8 +111,8 @@ describe('Validate SimpleDictionaryParser', () => {
${'begin'} | ${false} | ${false} | ${['Begin']}
${'BeginEn'} | ${false} | ${false} | ${['BeginEnd', 'Begin']}
${'BeginMidleEnd'} | ${false} | ${false} | ${['BeginMiddleEnd', 'BeginEnd']}
${'beginmidleend'} | ${true} | ${false} | ${[toL('BeginMiddleEnd'), 'BeginMiddleEnd', toL('BeginEnd'), 'BeginEnd']}
${'BeginmidleEnd'} | ${true} | ${false} | ${['BeginMiddleEnd', toL('BeginMiddleEnd'), toL('BeginEnd'), 'BeginEnd']}
${'beginmidleend'} | ${true} | ${false} | ${bme1}
${'BeginmidleEnd'} | ${true} | ${false} | ${bme2}
${'cafe'} | ${false} | ${false} | ${['Café']}
${'cafe'} | ${true} | ${true} | ${['cafe', 'café', 'Café']}
${'cafë'} | ${true} | ${false} | ${['cafe', 'café', 'Café']}
Expand Down
8 changes: 5 additions & 3 deletions packages/cspell-trie-lib/src/lib/SimpleDictionaryParser.ts
Expand Up @@ -18,17 +18,19 @@ export interface ParseDictionaryOptions {
caseInsensitivePrefix: string;
/**
* Start of a single-line comment.
* @default "#"
*/
commentCharacter: string;

/**
* if word starts with prefix, do not strip case or accents.
* Prefix is not stored.
* If word starts with prefix, do not strip case or accents.
* @default false;
*/
keepExactPrefix: string;

/**
* Tell the parser to NOT automatically strip case and accents.
* Tell the parser to automatically strip case and accents.
* @default true
*/
stripCaseAndAccents: boolean;
}
Expand Down
38 changes: 35 additions & 3 deletions packages/cspell-trie-lib/src/lib/genSuggestionsOptions.ts
Expand Up @@ -6,15 +6,17 @@ export interface GenSuggestionOptionsStrict {
* @default CompoundWordsMethod.NONE
*/
compoundMethod?: CompoundWordsMethod;

/**
* ignore case when searching.
*/
ignoreCase: boolean;

/**
* Maximum number of "edits" allowed.
* 3 is a good number. Above 5 can be very slow.
*/
maxNumChanges: number;
changeLimit: number;
}

export type GenSuggestionOptions = Partial<GenSuggestionOptionsStrict>;
Expand All @@ -24,23 +26,31 @@ export interface SuggestionOptionsStrict extends GenSuggestionOptionsStrict {
* Maximum number of suggestions to make.
*/
numSuggestions: number;

/**
* Allow ties when making suggestions.
* if `true` it is possible to have more than `numSuggestions`.
*/
allowTies: boolean;

/**
* Time alloted in milliseconds to generate suggestions.
*/
timeout: number;

/**
* Optional filter function.
* return true to keep the candidate.
*/
filter?: (word: string, cost: number) => boolean;
}

export type SuggestionOptions = Partial<SuggestionOptionsStrict>;

export const defaultGenSuggestionOptions: GenSuggestionOptionsStrict = {
compoundMethod: CompoundWordsMethod.NONE,
ignoreCase: true,
maxNumChanges: 5,
changeLimit: 5,
};

export const defaultSuggestionOptions: SuggestionOptionsStrict = {
Expand All @@ -50,14 +60,36 @@ export const defaultSuggestionOptions: SuggestionOptionsStrict = {
timeout: 5000,
};

type KeyMapOfGenSuggestionOptionsStrict = {
[K in keyof GenSuggestionOptionsStrict]: K;
};

type KeyMapOfSuggestionOptionsStrict = {
[K in keyof SuggestionOptionsStrict]: K;
};

const keyMapOfGenSuggestionOptionsStrict: KeyMapOfGenSuggestionOptionsStrict = {
changeLimit: 'changeLimit',
compoundMethod: 'compoundMethod',
ignoreCase: 'ignoreCase',
} as const;

const keyMapOfSuggestionOptionsStrict: KeyMapOfSuggestionOptionsStrict = {
...keyMapOfGenSuggestionOptionsStrict,
allowTies: 'allowTies',
filter: 'filter',
numSuggestions: 'numSuggestions',
timeout: 'timeout',
};

/**
* Create suggestion options using composition.
* @param opts - partial options.
* @returns Options - with defaults.
*/
export function createSuggestionOptions(...opts: SuggestionOptions[]): SuggestionOptionsStrict {
const options = { ...defaultSuggestionOptions };
const keys = Object.keys(defaultSuggestionOptions) as (keyof SuggestionOptions)[];
const keys = Object.keys(keyMapOfSuggestionOptionsStrict) as (keyof SuggestionOptions)[];
for (const opt of opts) {
for (const key of keys) {
assign(options, opt, key);
Expand Down
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/suggest-en-a-star.test.ts
Expand Up @@ -219,11 +219,11 @@ describe('Validate English Suggestions', () => {
});

function sugGenOptsFromCollector(collector: SuggestionCollector, compoundMethod?: CompoundWordsMethod) {
const { ignoreCase, maxNumChanges } = collector;
const { ignoreCase, changeLimit } = collector;
const ops: GenSuggestionOptionsStrict = {
compoundMethod,
ignoreCase,
maxNumChanges,
changeLimit,
};
return ops;
}
Expand Down
49 changes: 22 additions & 27 deletions packages/cspell-trie-lib/src/lib/suggest-nl.test.ts
Expand Up @@ -22,35 +22,22 @@ describe('Validate Dutch Suggestions', () => {
);

// cspell:ignore burtbewoners burgbewoners
test(
'Tests suggestions "burtbewoners"',
async () => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('burtbewoners', { numSuggestions: 5 });
const suggestions = results.map((s) => s.word);
expect(suggestions).toEqual(expect.arrayContaining(['buurtbewoners', 'burgbewoners']));
},
timeout
);

// cspell:ignore buurtbwoners
test(
'Tests suggestions "buurtbwoners"',
async () => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('buurtbwoners', { numSuggestions: 1 });
const suggestions = results.map((s) => s.word);
expect(suggestions).toEqual(expect.arrayContaining(['buurtbewoners']));
},
timeout
);
// cspell:ignore buurtbwoners buurtbewoner

test(
'Tests suggestions "buurtbewoners" with cost',
async () => {
test.each`
word | numSuggestions | expected
${'Mexico-Stad'} | ${2} | ${[sr('Mexico-Stad', 0), sr('mexico-stad', 2)]}
${'mexico-stad'} | ${2} | ${[sr('mexico-stad', 0), sr('Mexico-Stad', 2)]}
${'buurtbewoners'} | ${3} | ${[sr('buurtbewoners', 0), sr('buurtbewoners-', 86), sr('buurtbewoner', 88)]}
${'burtbewoners'} | ${2} | ${ac(sr('burgbewoners', 96), sr('buurtbewoners', 97))}
${'buurtbwoners'} | ${1} | ${[sr('buurtbewoners', 93)]}
${'buurtbewoners'} | ${1} | ${[sr('buurtbewoners', 0)]}
`(
'Tests suggestions $word',
async ({ word, numSuggestions, expected }) => {
const trie = await pTrieNL;
const results = trie.suggestWithCost('buurtbewoners', { numSuggestions: 1 });
expect(results).toEqual([{ word: 'buurtbewoners', cost: 0 }]);
const results = trie.suggestWithCost(word, { numSuggestions });
expect(results).toEqual(expected);
},
timeout
);
Expand All @@ -67,4 +54,12 @@ describe('Validate Dutch Suggestions', () => {
},
timeout
);

function ac<T>(...args: T[]): T[] {
return expect.arrayContaining(args);
}

function sr(word: string, cost: number) {
return { word, cost };
}
});
4 changes: 2 additions & 2 deletions packages/cspell-trie-lib/src/lib/suggest.legacy.test.ts
Expand Up @@ -75,7 +75,7 @@ const defaultMaxNumberSuggestions = 10;
const baseCost = 100;
const swapCost = 75;
const postSwapCost = swapCost - baseCost;
const maxNumChanges = 5;
const MAX_NUM_CHANGES = 5;

function legacySuggest(
root: TrieNode,
Expand All @@ -89,7 +89,7 @@ function legacySuggest(
const x = ' ' + word;
const mx = x.length - 1;

let costLimit = Math.min((bc * word.length) / 2, bc * maxNumChanges);
let costLimit = Math.min((bc * word.length) / 2, bc * MAX_NUM_CHANGES);

function comp(a: SuggestionResult, b: SuggestionResult): number {
return a.cost - b.cost || a.word.length - b.word.length || (a.word < b.word ? -1 : 1);
Expand Down
70 changes: 59 additions & 11 deletions packages/cspell-trie-lib/src/lib/suggest.test.ts
Expand Up @@ -196,25 +196,63 @@ describe('Validate Suggest', () => {
expect(suggestions).toEqual(['walked', 'walker', 'talked']);
});

// cspell:ignore walkingtree talkingtree
test('that forbidden words are not included (collector)', () => {
function sr(word: string, cost: number) {
return { word, cost };
}

// cspell:ignore walking* *tree talking* *stick running* *pod *trick
test.each`
word | ignoreCase | numSuggestions | changeLimit | expected
${'Runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 1)]}
${'runningtree'} | ${undefined} | ${2} | ${undefined} | ${[sr('runningtree', 0), sr('Runningtree', 1)]}
${'Runningpod'} | ${undefined} | ${5} | ${1} | ${[sr('Runningpod', 0), sr('runningpod', 1), sr('RunningPod', 1), sr('runningPod', 2)]}
${'runningpod'} | ${undefined} | ${2} | ${undefined} | ${[sr('runningpod', 0), sr('runningPod', 1)]}
${'walkingstick'} | ${undefined} | ${2} | ${undefined} | ${[sr('walkingstick', 0), sr('talkingstick', 99)]}
${'walkingtree'} | ${undefined} | ${2} | ${undefined} | ${[sr('talkingtree', 99), sr('walkingstick', 359)]}
${'running'} | ${undefined} | ${2} | ${undefined} | ${[sr('running', 0), sr('Running', 1)]}
`('test suggestion results $word', ({ word, ignoreCase, numSuggestions, changeLimit, expected }) => {
const trie = parseDictionary(`
walk
Running*
walking*
*stick
talking*
*tree
+Pod
!walkingtree
`);
expect(trie.suggest('walkingstick', numSugs(1))).toEqual(['walkingstick']);
expect(trie.suggest('walkingtree', numSugs(1))).toEqual([]);
expect(trie.suggest('walking*', numSugs(1))).toEqual(['walking']);
const collector = suggestionCollector('walkingtree', sugOptsMaxNum(2));
const collector = suggestionCollector(word, sugOpts({ numSuggestions, changeLimit, ignoreCase }));
trie.genSuggestions(collector);
expect(collector.suggestions).toEqual([
{ word: 'talkingtree', cost: 99 },
{ word: 'walkingstick', cost: 359 },
]);
const r = collector.suggestions;
expect(r).toEqual(expected);
});

test.each`
word | ignoreCase | numSuggestions | changeLimit | expected
${'runningtree'} | ${undefined} | ${2} | ${3} | ${[sr('runningtree', 0), sr('Runningtree', 1)]}
${'Runningpod'} | ${undefined} | ${4} | ${1} | ${[sr('Runningpod', 0), sr('runningpod', 1), sr('RunningPod', 1), sr('runningPod', 2)]}
${'Runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 1)]}
${'runningpod'} | ${undefined} | ${4} | ${1} | ${[sr('runningpod', 0), sr('runningPod', 1), sr('Runningpod', 1), sr('RunningPod', 2)]}
${'runningpod'} | ${false} | ${4} | ${1} | ${[sr('RunningPod', 2)]}
${'walkingstick'} | ${undefined} | ${2} | ${3} | ${[sr('walkingstick', 0), sr('talkingstick', 99)]}
${'walkingtree'} | ${undefined} | ${2} | ${4} | ${[sr('talkingtree', 99), sr('walkingstick', 359)]}
${'talkingtrick'} | ${undefined} | ${2} | ${4} | ${[sr('talkingstick', 183), sr('talkingtree', 268)]}
${'running'} | ${undefined} | ${2} | ${3} | ${[sr('running', 0), sr('Running', 1)]}
${'free'} | ${undefined} | ${2} | ${2} | ${[sr('tree', 99)]}
${'stock'} | ${undefined} | ${2} | ${2} | ${[sr('stick', 97)]}
`('test suggestWithCost results $word', ({ word, ignoreCase, numSuggestions, changeLimit, expected }) => {
const trie = parseDictionary(`
walk
Running*
walking*
*stick
talking*
*tree
+Pod
!walkingtree
`);
const r = trie.suggestWithCost(word, { numSuggestions, ignoreCase, changeLimit });
expect(r).toEqual(expected);
});
});

Expand All @@ -225,7 +263,7 @@ function numSugs(numSuggestions: number): SuggestionOptions {
function sugOpts(opts: Partial<SuggestionCollectorOptions>): SuggestionCollectorOptions {
return {
...defaultOptions,
...opts,
...clean(opts),
};
}

Expand Down Expand Up @@ -282,3 +320,13 @@ const sampleWords = [
'joyrode',
'joystick',
];

function clean<T>(t: Partial<T>): Partial<T> {
const r: Partial<T> = {};
for (const k of Object.keys(t) as (keyof T)[]) {
if (t[k] !== undefined) {
r[k] = t[k];
}
}
return r;
}