Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cspell-lib Api change - remove GenSequence #2491

Merged
merged 2 commits into from Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 9 additions & 10 deletions packages/cspell-lib/api/api.d.ts
Expand Up @@ -3,7 +3,6 @@ import { Glob, CSpellSettingsWithSourceTrace, ReplaceMap, DictionaryInformation,
export * from '@cspell/cspell-types';
import { CompoundWordsMethod, SuggestionResult, SuggestionCollector, WeightMap } from 'cspell-trie-lib';
export { CompoundWordsMethod, SuggestionCollector, SuggestionResult } from 'cspell-trie-lib';
import { Sequence } from 'gensequence';
export * from 'cspell-io';
import { URI } from 'vscode-uri';

Expand Down Expand Up @@ -251,26 +250,26 @@ declare function splitCamelCaseWord(word: string): string[];
/**
* This function lets you iterate over regular expression matches.
*/
declare function match(reg: RegExp, text: string): Sequence<RegExpExecArray>;
declare function matchStringToTextOffset(reg: RegExp, text: string): Sequence<TextOffset>;
declare function matchToTextOffset(reg: RegExp, text: TextOffset): Sequence<TextOffset>;
declare function extractLinesOfText(text: string): Sequence<TextOffset>;
declare function match(reg: RegExp, text: string): Iterable<RegExpExecArray>;
declare function matchStringToTextOffset(reg: RegExp, text: string): Iterable<TextOffset>;
declare function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable<TextOffset>;
declare function extractLinesOfText(text: string): Iterable<TextOffset>;
/**
* Extract out whole words from a string of text.
*/
declare function extractWordsFromText(text: string): Sequence<TextOffset>;
declare function extractWordsFromText(text: string): Iterable<TextOffset>;
/**
* Extract out whole words from a string of text.
*/
declare function extractWordsFromTextOffset(text: TextOffset): Sequence<TextOffset>;
declare function extractWordsFromTextOffset(text: TextOffset): Iterable<TextOffset>;
declare function cleanText(text: string): string;
declare function cleanTextOffset(text: TextOffset): TextOffset;
/**
* Extract out whole words and words containing numbers from a string of text.
*/
declare function extractPossibleWordsFromTextOffset(text: TextOffset): Sequence<TextOffset>;
declare function extractWordsFromCode(text: string): Sequence<TextOffset>;
declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Sequence<TextOffset>;
declare function extractPossibleWordsFromTextOffset(text: TextOffset): Iterable<TextOffset>;
declare function extractWordsFromCode(text: string): Iterable<TextOffset>;
declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Iterable<TextOffset>;
declare function isUpperCase(word: string): boolean;
declare function isLowerCase(word: string): boolean;
declare function isFirstCharacterUpper(word: string): boolean;
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-lib/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/cspell-lib/package.json
Expand Up @@ -49,6 +49,7 @@
"homepage": "https://github.com/streetsidesoftware/cspell#readme",
"dependencies": {
"@cspell/cspell-bundled-dicts": "^5.18.5",
"@cspell/cspell-pipe": "^5.18.5",
"@cspell/cspell-types": "^5.18.5",
"clear-module": "^4.1.2",
"comment-json": "^4.2.2",
Expand Down
17 changes: 9 additions & 8 deletions packages/cspell-lib/src/textValidator.test.ts
@@ -1,17 +1,18 @@
import { opConcatMap, pipeSync } from '@cspell/cspell-pipe';
import { genSequence } from 'gensequence';
import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef';
import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary';
import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary';
import {
validateText,
hasWordCheck,
calcTextInclusionRanges,
_testMethods,
hasWordCheck,
HasWordOptions,
validateText,
ValidationOptions,
_testMethods,
} from './textValidator';
import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef';
import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary';
import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary';
import { FreqCounter } from './util/FreqCounter';
import * as Text from './util/text';
import { genSequence } from 'gensequence';
import { settingsToValidateOptions as sToV } from './validator';

// cspell:ignore whiteberry redmango lightbrown redberry
Expand Down Expand Up @@ -198,7 +199,7 @@ describe('Validate textValidator functions', () => {
const text = '_Test the _line_breaks___from __begin to end__ _eol_';
const inclusionRanges = calcTextInclusionRanges(text, { ignoreRegExpList: [/_/g] });
const mapper = _testMethods.mapWordsAgainstRanges(inclusionRanges);
const results = Text.matchStringToTextOffset(/\w+/g, text).concatMap(mapper).toArray();
const results = [...pipeSync(Text.matchStringToTextOffset(/\w+/g, text), opConcatMap(mapper))];
const words = results.map((r) => r.text);
expect(words.join(' ')).toBe('Test the line breaks from begin to end eol');
});
Expand Down
88 changes: 51 additions & 37 deletions packages/cspell-lib/src/textValidator.ts
@@ -1,5 +1,6 @@
import { opConcatMap, opFilter, opMap, pipeSync as pipe, toArray } from '@cspell/cspell-pipe';
import type { TextOffset } from '@cspell/cspell-types';
import { Sequence } from 'gensequence';
import { genSequence, Sequence } from 'gensequence';
import * as RxPat from './Settings/RegExpPatterns';
import { HasOptions, SpellingDictionary } from './SpellingDictionary/SpellingDictionary';
import * as Text from './util/text';
Expand Down Expand Up @@ -57,18 +58,21 @@ export function validateText(

const validator = lineValidator(dict, options);

return Text.extractLinesOfText(text)
.concatMap(mapTextOffsetsAgainstRanges(includeRanges))
.concatMap(validator)
.filter((wo) => {
const word = wo.text;
// Keep track of the number of times we have seen the same problem
const n = (mapOfProblems.get(word) || 0) + 1;
mapOfProblems.set(word, n);
// Filter out if there is too many
return n <= maxDuplicateProblems;
})
.take(maxNumberOfProblems);
return genSequence(
pipe(
Text.extractLinesOfText(text),
opConcatMap(mapTextOffsetsAgainstRanges(includeRanges)),
opConcatMap(validator),
opFilter((wo) => {
const word = wo.text;
// Keep track of the number of times we have seen the same problem
const n = (mapOfProblems.get(word) || 0) + 1;
mapOfProblems.set(word, n);
// Filter out if there is too many
return n <= maxDuplicateProblems;
})
)
).take(maxNumberOfProblems);
}

export function calcTextInclusionRanges(text: string, options: IncludeExcludeOptions): TextRange.MatchRange[] {
Expand Down Expand Up @@ -155,20 +159,23 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return [vr];
}

const codeWordResults = Text.extractWordsFromCodeTextOffset(vr)
.filter(filterAlreadyChecked)
.map((t) => ({ ...t, line: vr.line }))
.map(checkFlagWords)
.filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged))
.map((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions)))
.filter(rememberFilter((wo) => wo.isFlagged || !wo.isFound))
.filter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))) // Filter out any repeated characters like xxxxxxxxxx
// get back the original text.
.map((wo) => ({
...wo,
text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length),
}))
.toArray();
const codeWordResults = toArray(
pipe(
Text.extractWordsFromCodeTextOffset(vr),
opFilter(filterAlreadyChecked),
opMap((t) => ({ ...t, line: vr.line })),
opMap(checkFlagWords),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opMap((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions))),
opFilter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)),
opFilter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))), // Filter out any repeated characters like xxxxxxxxxx
// get back the original text.
opMap((wo) => ({
...wo,
text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length),
}))
)
);

if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
rememberFilter((_) => false)(vr);
Expand All @@ -188,13 +195,16 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return [vr];
}

const mismatches: ValidationResult[] = Text.extractWordsFromTextOffset(possibleWord)
.filter(filterAlreadyChecked)
.map((wo) => ({ ...wo, line: lineSegment }))
.map(checkFlagWords)
.filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged))
.concatMap(checkFullWord)
.toArray();
const mismatches: ValidationResult[] = toArray(
pipe(
Text.extractWordsFromTextOffset(possibleWord),
opFilter(filterAlreadyChecked),
opMap((wo) => ({ ...wo, line: lineSegment })),
opMap(checkFlagWords),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opConcatMap(checkFullWord)
)
);
if (mismatches.length) {
// Try the more expensive word splitter
const splitResult = split(lineSegment, possibleWord.offset, splitterIsValid);
Expand All @@ -206,9 +216,13 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return mismatches;
}

const checkedPossibleWords: Sequence<ValidationResult> = Text.extractPossibleWordsFromTextOffset(lineSegment)
.filter(filterAlreadyChecked)
.concatMap(checkPossibleWords);
const checkedPossibleWords: Sequence<ValidationResult> = genSequence(
pipe(
Text.extractPossibleWordsFromTextOffset(lineSegment),
opFilter(filterAlreadyChecked),
opConcatMap(checkPossibleWords)
)
);
return checkedPossibleWords;
};

Expand Down