Skip to content

Commit

Permalink
feat: Use pipe instead of GenSequence (#2491)
Browse files Browse the repository at this point in the history
* feat: Use pipe instead of GenSequence
   This is a **minor api breakage**. The work around is to use `genSequence` to wrap any calls that used to be a sequence.
* Update text.ts
  • Loading branch information
Jason3S committed Feb 18, 2022
1 parent 7a2e3bc commit 65cb6f0
Show file tree
Hide file tree
Showing 15 changed files with 227 additions and 133 deletions.
19 changes: 9 additions & 10 deletions packages/cspell-lib/api/api.d.ts
Expand Up @@ -3,7 +3,6 @@ import { Glob, CSpellSettingsWithSourceTrace, ReplaceMap, DictionaryInformation,
export * from '@cspell/cspell-types';
import { CompoundWordsMethod, SuggestionResult, SuggestionCollector, WeightMap } from 'cspell-trie-lib';
export { CompoundWordsMethod, SuggestionCollector, SuggestionResult } from 'cspell-trie-lib';
import { Sequence } from 'gensequence';
export * from 'cspell-io';
import { URI } from 'vscode-uri';

Expand Down Expand Up @@ -251,26 +250,26 @@ declare function splitCamelCaseWord(word: string): string[];
/**
* This function lets you iterate over regular expression matches.
*/
declare function match(reg: RegExp, text: string): Sequence<RegExpExecArray>;
declare function matchStringToTextOffset(reg: RegExp, text: string): Sequence<TextOffset>;
declare function matchToTextOffset(reg: RegExp, text: TextOffset): Sequence<TextOffset>;
declare function extractLinesOfText(text: string): Sequence<TextOffset>;
declare function match(reg: RegExp, text: string): Iterable<RegExpExecArray>;
declare function matchStringToTextOffset(reg: RegExp, text: string): Iterable<TextOffset>;
declare function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable<TextOffset>;
declare function extractLinesOfText(text: string): Iterable<TextOffset>;
/**
* Extract out whole words from a string of text.
*/
declare function extractWordsFromText(text: string): Sequence<TextOffset>;
declare function extractWordsFromText(text: string): Iterable<TextOffset>;
/**
* Extract out whole words from a string of text.
*/
declare function extractWordsFromTextOffset(text: TextOffset): Sequence<TextOffset>;
declare function extractWordsFromTextOffset(text: TextOffset): Iterable<TextOffset>;
declare function cleanText(text: string): string;
declare function cleanTextOffset(text: TextOffset): TextOffset;
/**
* Extract out whole words and words containing numbers from a string of text.
*/
declare function extractPossibleWordsFromTextOffset(text: TextOffset): Sequence<TextOffset>;
declare function extractWordsFromCode(text: string): Sequence<TextOffset>;
declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Sequence<TextOffset>;
declare function extractPossibleWordsFromTextOffset(text: TextOffset): Iterable<TextOffset>;
declare function extractWordsFromCode(text: string): Iterable<TextOffset>;
declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Iterable<TextOffset>;
declare function isUpperCase(word: string): boolean;
declare function isLowerCase(word: string): boolean;
declare function isFirstCharacterUpper(word: string): boolean;
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-lib/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions packages/cspell-lib/package.json
Expand Up @@ -49,6 +49,7 @@
"homepage": "https://github.com/streetsidesoftware/cspell#readme",
"dependencies": {
"@cspell/cspell-bundled-dicts": "^5.18.5",
"@cspell/cspell-pipe": "^5.18.5",
"@cspell/cspell-types": "^5.18.5",
"clear-module": "^4.1.2",
"comment-json": "^4.2.2",
Expand Down
17 changes: 9 additions & 8 deletions packages/cspell-lib/src/textValidator.test.ts
@@ -1,17 +1,18 @@
import { opConcatMap, pipeSync } from '@cspell/cspell-pipe';
import { genSequence } from 'gensequence';
import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef';
import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary';
import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary';
import {
validateText,
hasWordCheck,
calcTextInclusionRanges,
_testMethods,
hasWordCheck,
HasWordOptions,
validateText,
ValidationOptions,
_testMethods,
} from './textValidator';
import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef';
import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary';
import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary';
import { FreqCounter } from './util/FreqCounter';
import * as Text from './util/text';
import { genSequence } from 'gensequence';
import { settingsToValidateOptions as sToV } from './validator';

// cspell:ignore whiteberry redmango lightbrown redberry
Expand Down Expand Up @@ -198,7 +199,7 @@ describe('Validate textValidator functions', () => {
const text = '_Test the _line_breaks___from __begin to end__ _eol_';
const inclusionRanges = calcTextInclusionRanges(text, { ignoreRegExpList: [/_/g] });
const mapper = _testMethods.mapWordsAgainstRanges(inclusionRanges);
const results = Text.matchStringToTextOffset(/\w+/g, text).concatMap(mapper).toArray();
const results = [...pipeSync(Text.matchStringToTextOffset(/\w+/g, text), opConcatMap(mapper))];
const words = results.map((r) => r.text);
expect(words.join(' ')).toBe('Test the line breaks from begin to end eol');
});
Expand Down
88 changes: 51 additions & 37 deletions packages/cspell-lib/src/textValidator.ts
@@ -1,5 +1,6 @@
import { opConcatMap, opFilter, opMap, pipeSync as pipe, toArray } from '@cspell/cspell-pipe';
import type { TextOffset } from '@cspell/cspell-types';
import { Sequence } from 'gensequence';
import { genSequence, Sequence } from 'gensequence';
import * as RxPat from './Settings/RegExpPatterns';
import { HasOptions, SpellingDictionary } from './SpellingDictionary/SpellingDictionary';
import * as Text from './util/text';
Expand Down Expand Up @@ -57,18 +58,21 @@ export function validateText(

const validator = lineValidator(dict, options);

return Text.extractLinesOfText(text)
.concatMap(mapTextOffsetsAgainstRanges(includeRanges))
.concatMap(validator)
.filter((wo) => {
const word = wo.text;
// Keep track of the number of times we have seen the same problem
const n = (mapOfProblems.get(word) || 0) + 1;
mapOfProblems.set(word, n);
// Filter out if there is too many
return n <= maxDuplicateProblems;
})
.take(maxNumberOfProblems);
return genSequence(
pipe(
Text.extractLinesOfText(text),
opConcatMap(mapTextOffsetsAgainstRanges(includeRanges)),
opConcatMap(validator),
opFilter((wo) => {
const word = wo.text;
// Keep track of the number of times we have seen the same problem
const n = (mapOfProblems.get(word) || 0) + 1;
mapOfProblems.set(word, n);
// Filter out if there is too many
return n <= maxDuplicateProblems;
})
)
).take(maxNumberOfProblems);
}

export function calcTextInclusionRanges(text: string, options: IncludeExcludeOptions): TextRange.MatchRange[] {
Expand Down Expand Up @@ -155,20 +159,23 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return [vr];
}

const codeWordResults = Text.extractWordsFromCodeTextOffset(vr)
.filter(filterAlreadyChecked)
.map((t) => ({ ...t, line: vr.line }))
.map(checkFlagWords)
.filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged))
.map((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions)))
.filter(rememberFilter((wo) => wo.isFlagged || !wo.isFound))
.filter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))) // Filter out any repeated characters like xxxxxxxxxx
// get back the original text.
.map((wo) => ({
...wo,
text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length),
}))
.toArray();
const codeWordResults = toArray(
pipe(
Text.extractWordsFromCodeTextOffset(vr),
opFilter(filterAlreadyChecked),
opMap((t) => ({ ...t, line: vr.line })),
opMap(checkFlagWords),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opMap((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions))),
opFilter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)),
opFilter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))), // Filter out any repeated characters like xxxxxxxxxx
// get back the original text.
opMap((wo) => ({
...wo,
text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length),
}))
)
);

if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) {
rememberFilter((_) => false)(vr);
Expand All @@ -188,13 +195,16 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return [vr];
}

const mismatches: ValidationResult[] = Text.extractWordsFromTextOffset(possibleWord)
.filter(filterAlreadyChecked)
.map((wo) => ({ ...wo, line: lineSegment }))
.map(checkFlagWords)
.filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged))
.concatMap(checkFullWord)
.toArray();
const mismatches: ValidationResult[] = toArray(
pipe(
Text.extractWordsFromTextOffset(possibleWord),
opFilter(filterAlreadyChecked),
opMap((wo) => ({ ...wo, line: lineSegment })),
opMap(checkFlagWords),
opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)),
opConcatMap(checkFullWord)
)
);
if (mismatches.length) {
// Try the more expensive word splitter
const splitResult = split(lineSegment, possibleWord.offset, splitterIsValid);
Expand All @@ -206,9 +216,13 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li
return mismatches;
}

const checkedPossibleWords: Sequence<ValidationResult> = Text.extractPossibleWordsFromTextOffset(lineSegment)
.filter(filterAlreadyChecked)
.concatMap(checkPossibleWords);
const checkedPossibleWords: Sequence<ValidationResult> = genSequence(
pipe(
Text.extractPossibleWordsFromTextOffset(lineSegment),
opFilter(filterAlreadyChecked),
opConcatMap(checkPossibleWords)
)
);
return checkedPossibleWords;
};

Expand Down

0 comments on commit 65cb6f0

Please sign in to comment.