diff --git a/packages/cspell-lib/api/api.d.ts b/packages/cspell-lib/api/api.d.ts index 578bc6f5e22..f99e857a6c6 100644 --- a/packages/cspell-lib/api/api.d.ts +++ b/packages/cspell-lib/api/api.d.ts @@ -3,7 +3,6 @@ import { Glob, CSpellSettingsWithSourceTrace, ReplaceMap, DictionaryInformation, export * from '@cspell/cspell-types'; import { CompoundWordsMethod, SuggestionResult, SuggestionCollector, WeightMap } from 'cspell-trie-lib'; export { CompoundWordsMethod, SuggestionCollector, SuggestionResult } from 'cspell-trie-lib'; -import { Sequence } from 'gensequence'; export * from 'cspell-io'; import { URI } from 'vscode-uri'; @@ -251,26 +250,26 @@ declare function splitCamelCaseWord(word: string): string[]; /** * This function lets you iterate over regular expression matches. */ -declare function match(reg: RegExp, text: string): Sequence; -declare function matchStringToTextOffset(reg: RegExp, text: string): Sequence; -declare function matchToTextOffset(reg: RegExp, text: TextOffset): Sequence; -declare function extractLinesOfText(text: string): Sequence; +declare function match(reg: RegExp, text: string): Iterable; +declare function matchStringToTextOffset(reg: RegExp, text: string): Iterable; +declare function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable; +declare function extractLinesOfText(text: string): Iterable; /** * Extract out whole words from a string of text. */ -declare function extractWordsFromText(text: string): Sequence; +declare function extractWordsFromText(text: string): Iterable; /** * Extract out whole words from a string of text. */ -declare function extractWordsFromTextOffset(text: TextOffset): Sequence; +declare function extractWordsFromTextOffset(text: TextOffset): Iterable; declare function cleanText(text: string): string; declare function cleanTextOffset(text: TextOffset): TextOffset; /** * Extract out whole words and words containing numbers from a string of text. */ -declare function extractPossibleWordsFromTextOffset(text: TextOffset): Sequence; -declare function extractWordsFromCode(text: string): Sequence; -declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Sequence; +declare function extractPossibleWordsFromTextOffset(text: TextOffset): Iterable; +declare function extractWordsFromCode(text: string): Iterable; +declare function extractWordsFromCodeTextOffset(textOffset: TextOffset): Iterable; declare function isUpperCase(word: string): boolean; declare function isLowerCase(word: string): boolean; declare function isFirstCharacterUpper(word: string): boolean; diff --git a/packages/cspell-lib/package-lock.json b/packages/cspell-lib/package-lock.json index 0b3a185921e..4fd538da1c7 100644 --- a/packages/cspell-lib/package-lock.json +++ b/packages/cspell-lib/package-lock.json @@ -10,6 +10,7 @@ "license": "MIT", "dependencies": { "@cspell/cspell-bundled-dicts": "^5.18.5", + "@cspell/cspell-pipe": "^5.18.5", "@cspell/cspell-types": "^5.18.5", "clear-module": "^4.1.2", "comment-json": "^4.2.2", diff --git a/packages/cspell-lib/package.json b/packages/cspell-lib/package.json index 18a5b45a606..7510bfc3d19 100644 --- a/packages/cspell-lib/package.json +++ b/packages/cspell-lib/package.json @@ -49,6 +49,7 @@ "homepage": "https://github.com/streetsidesoftware/cspell#readme", "dependencies": { "@cspell/cspell-bundled-dicts": "^5.18.5", + "@cspell/cspell-pipe": "^5.18.5", "@cspell/cspell-types": "^5.18.5", "clear-module": "^4.1.2", "comment-json": "^4.2.2", diff --git a/packages/cspell-lib/src/textValidator.test.ts b/packages/cspell-lib/src/textValidator.test.ts index e4eba1b90d5..e5b986965df 100644 --- a/packages/cspell-lib/src/textValidator.test.ts +++ b/packages/cspell-lib/src/textValidator.test.ts @@ -1,17 +1,18 @@ +import { opConcatMap, pipeSync } from '@cspell/cspell-pipe'; +import { genSequence } from 'gensequence'; +import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef'; +import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary'; +import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary'; import { - validateText, - hasWordCheck, calcTextInclusionRanges, - _testMethods, + hasWordCheck, HasWordOptions, + validateText, ValidationOptions, + _testMethods, } from './textValidator'; -import { createCSpellSettingsInternal as csi } from './Models/CSpellSettingsInternalDef'; -import { createCollection, getDictionaryInternal, SpellingDictionaryOptions } from './SpellingDictionary'; -import { createSpellingDictionary } from './SpellingDictionary/createSpellingDictionary'; import { FreqCounter } from './util/FreqCounter'; import * as Text from './util/text'; -import { genSequence } from 'gensequence'; import { settingsToValidateOptions as sToV } from './validator'; // cspell:ignore whiteberry redmango lightbrown redberry @@ -198,7 +199,7 @@ describe('Validate textValidator functions', () => { const text = '_Test the _line_breaks___from __begin to end__ _eol_'; const inclusionRanges = calcTextInclusionRanges(text, { ignoreRegExpList: [/_/g] }); const mapper = _testMethods.mapWordsAgainstRanges(inclusionRanges); - const results = Text.matchStringToTextOffset(/\w+/g, text).concatMap(mapper).toArray(); + const results = [...pipeSync(Text.matchStringToTextOffset(/\w+/g, text), opConcatMap(mapper))]; const words = results.map((r) => r.text); expect(words.join(' ')).toBe('Test the line breaks from begin to end eol'); }); diff --git a/packages/cspell-lib/src/textValidator.ts b/packages/cspell-lib/src/textValidator.ts index f79f3d0cf77..97864cd1200 100644 --- a/packages/cspell-lib/src/textValidator.ts +++ b/packages/cspell-lib/src/textValidator.ts @@ -1,5 +1,6 @@ +import { opConcatMap, opFilter, opMap, pipeSync as pipe, toArray } from '@cspell/cspell-pipe'; import type { TextOffset } from '@cspell/cspell-types'; -import { Sequence } from 'gensequence'; +import { genSequence, Sequence } from 'gensequence'; import * as RxPat from './Settings/RegExpPatterns'; import { HasOptions, SpellingDictionary } from './SpellingDictionary/SpellingDictionary'; import * as Text from './util/text'; @@ -57,18 +58,21 @@ export function validateText( const validator = lineValidator(dict, options); - return Text.extractLinesOfText(text) - .concatMap(mapTextOffsetsAgainstRanges(includeRanges)) - .concatMap(validator) - .filter((wo) => { - const word = wo.text; - // Keep track of the number of times we have seen the same problem - const n = (mapOfProblems.get(word) || 0) + 1; - mapOfProblems.set(word, n); - // Filter out if there is too many - return n <= maxDuplicateProblems; - }) - .take(maxNumberOfProblems); + return genSequence( + pipe( + Text.extractLinesOfText(text), + opConcatMap(mapTextOffsetsAgainstRanges(includeRanges)), + opConcatMap(validator), + opFilter((wo) => { + const word = wo.text; + // Keep track of the number of times we have seen the same problem + const n = (mapOfProblems.get(word) || 0) + 1; + mapOfProblems.set(word, n); + // Filter out if there is too many + return n <= maxDuplicateProblems; + }) + ) + ).take(maxNumberOfProblems); } export function calcTextInclusionRanges(text: string, options: IncludeExcludeOptions): TextRange.MatchRange[] { @@ -155,20 +159,23 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li return [vr]; } - const codeWordResults = Text.extractWordsFromCodeTextOffset(vr) - .filter(filterAlreadyChecked) - .map((t) => ({ ...t, line: vr.line })) - .map(checkFlagWords) - .filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)) - .map((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions))) - .filter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)) - .filter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))) // Filter out any repeated characters like xxxxxxxxxx - // get back the original text. - .map((wo) => ({ - ...wo, - text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length), - })) - .toArray(); + const codeWordResults = toArray( + pipe( + Text.extractWordsFromCodeTextOffset(vr), + opFilter(filterAlreadyChecked), + opMap((t) => ({ ...t, line: vr.line })), + opMap(checkFlagWords), + opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)), + opMap((wo) => (wo.isFlagged ? wo : checkWord(wo, hasWordOptions))), + opFilter(rememberFilter((wo) => wo.isFlagged || !wo.isFound)), + opFilter(rememberFilter((wo) => !RxPat.regExRepeatedChar.test(wo.text))), // Filter out any repeated characters like xxxxxxxxxx + // get back the original text. + opMap((wo) => ({ + ...wo, + text: Text.extractText(lineSegment, wo.offset, wo.offset + wo.text.length), + })) + ) + ); if (!codeWordResults.length || isWordIgnored(vr.text) || checkWord(vr, hasWordOptions).isFound) { rememberFilter((_) => false)(vr); @@ -188,13 +195,16 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li return [vr]; } - const mismatches: ValidationResult[] = Text.extractWordsFromTextOffset(possibleWord) - .filter(filterAlreadyChecked) - .map((wo) => ({ ...wo, line: lineSegment })) - .map(checkFlagWords) - .filter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)) - .concatMap(checkFullWord) - .toArray(); + const mismatches: ValidationResult[] = toArray( + pipe( + Text.extractWordsFromTextOffset(possibleWord), + opFilter(filterAlreadyChecked), + opMap((wo) => ({ ...wo, line: lineSegment })), + opMap(checkFlagWords), + opFilter(rememberFilter((wo) => wo.text.length >= minWordLength || !!wo.isFlagged)), + opConcatMap(checkFullWord) + ) + ); if (mismatches.length) { // Try the more expensive word splitter const splitResult = split(lineSegment, possibleWord.offset, splitterIsValid); @@ -206,9 +216,13 @@ function lineValidator(dict: SpellingDictionary, options: ValidationOptions): Li return mismatches; } - const checkedPossibleWords: Sequence = Text.extractPossibleWordsFromTextOffset(lineSegment) - .filter(filterAlreadyChecked) - .concatMap(checkPossibleWords); + const checkedPossibleWords: Sequence = genSequence( + pipe( + Text.extractPossibleWordsFromTextOffset(lineSegment), + opFilter(filterAlreadyChecked), + opConcatMap(checkPossibleWords) + ) + ); return checkedPossibleWords; }; diff --git a/packages/cspell-lib/src/util/text.test.ts b/packages/cspell-lib/src/util/text.test.ts index b31e9ef5f79..5b4ad30c7f2 100644 --- a/packages/cspell-lib/src/util/text.test.ts +++ b/packages/cspell-lib/src/util/text.test.ts @@ -1,3 +1,4 @@ +import { opConcatMap, pipeSync as pipe, toArray } from '@cspell/cspell-pipe'; import type { TextOffset } from '@cspell/cspell-types'; import * as Text from './text'; import { splitCamelCaseWord } from './text'; @@ -32,11 +33,13 @@ describe('Util Text', () => { test('extract word from text', () => { expect( - Text.extractWordsFromText( - ` + toArray( + Text.extractWordsFromText( + ` // could've, would've, couldn't've, wasn't, y'all, 'twas, shouldn’t ` - ).toArray() + ) + ) ).toEqual([ { text: "could've", offset: 16 }, { text: "would've", offset: 26 }, @@ -50,11 +53,13 @@ describe('Util Text', () => { test('extract words', () => { expect( - Text.extractWordsFromText( - ` + toArray( + Text.extractWordsFromText( + ` expect(splitCamelCaseWord('hello')).to.deep.equal(['hello']); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'splitCamelCaseWord', offset: 20 }, @@ -65,11 +70,13 @@ describe('Util Text', () => { { text: 'hello', offset: 65 }, ]); expect( - Text.extractWordsFromText( - ` + toArray( + Text.extractWordsFromText( + ` expect(splitCamelCaseWord('hello')).to.deep.equal(['hello']); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'splitCamelCaseWord', offset: 20 }, @@ -80,11 +87,13 @@ describe('Util Text', () => { { text: 'hello', offset: 65 }, ]); expect( - Text.extractWordsFromText( - ` + toArray( + Text.extractWordsFromText( + ` expect(splitCamelCaseWord('hello')); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'splitCamelCaseWord', offset: 20 }, @@ -94,11 +103,13 @@ describe('Util Text', () => { test('extract words from code', () => { expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` expect(splitCamelCaseWord('hello')).to.deep.equal(['hello']); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'split', offset: 20 }, @@ -112,11 +123,13 @@ describe('Util Text', () => { { text: 'hello', offset: 65 }, ]); expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` expect(regExp.match(first_line)); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'reg', offset: 20 }, @@ -126,11 +139,13 @@ describe('Util Text', () => { { text: 'line', offset: 39 }, ]); expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` expect(aHELLO); ` - ).toArray() + ) + ) ).toEqual([ { text: 'expect', offset: 13 }, { text: 'a', offset: 20 }, @@ -139,7 +154,7 @@ describe('Util Text', () => { // cspell:ignore shouldn const t = "\n const x = 'shouldn\\'t';\n "; - expect(Text.extractWordsFromCode(t).toArray()).toEqual([ + expect(toArray(Text.extractWordsFromCode(t))).toEqual([ { text: 'const', offset: 13 }, { text: 'x', offset: 19 }, { text: "shouldn\\'t", offset: 24 }, @@ -147,9 +162,7 @@ describe('Util Text', () => { }); test('splits words like HTMLInput', () => { - const words = Text.extractWordsFromCode('var value = HTMLInput.value;') - .map(({ text }) => text) - .toArray(); + const words = toArray(Text.extractWordsFromCode('var value = HTMLInput.value;')).map(({ text }) => text); expect(words).toEqual(['var', 'value', 'HTML', 'Input', 'value']); }); @@ -164,39 +177,39 @@ describe('Util Text', () => { test('tests skipping Chinese characters', () => { expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` 携程旅行网 ` - ) - .map((wo) => wo.text) - .toArray() + ) + ).map((wo) => wo.text) ).toEqual(['a', 'href', 'http', 'www', 'ctrip', 'com', 'title', 'a']); }); test('tests skipping Japanese characters', () => { expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` Example text: gitのpackageのみ際インストール gitのpackageのみ際インストール title="携程旅行网" ` - ) - .map((wo) => wo.text) - .toArray() + ) + ).map((wo) => wo.text) ).toEqual(['Example', 'text', 'git', 'package', 'git', 'package', 'title']); }); test('tests Greek characters', () => { expect( - Text.extractWordsFromCode( - ` + toArray( + Text.extractWordsFromCode( + ` Γ γ gamma, γάμμα ` - ) - .map((wo) => wo.text) - .toArray() + ) + ).map((wo) => wo.text) ).toEqual(['Γ', 'γ', 'gamma', 'γάμμα']); }); @@ -209,9 +222,7 @@ describe('Util Text', () => { ${nfc('caféÁ')} | ${[nfc('café'), nfc('Á')]} ${nfd('caféÁ')} | ${[nfd('café'), nfd('Á')]} `('extractWordsFromCode "$text"', ({ text, expected }) => { - const r = Text.extractWordsFromCode(text) - .map((wo) => wo.text) - .toArray(); + const r = toArray(Text.extractWordsFromCode(text)).map((wo) => wo.text); expect(r).toEqual(expected); }); @@ -223,9 +234,7 @@ describe('Util Text', () => { test('tests breaking up text into lines', () => { const parts = ['', '/*', ' * this is a comment.\r', ' */', '']; const sampleText = parts.join('\n'); - const r = Text.extractLinesOfText(sampleText) - .map((a) => a.text) - .toArray(); + const r = toArray(Text.extractLinesOfText(sampleText)).map((a) => a.text); expect(r.join('')).toBe(parts.join('\n')); const lines = [...Text.extractLinesOfText(sampleCode)].map((m) => m.text); expect(lines.length).toBe(sampleCode.split('\n').length); @@ -234,9 +243,7 @@ describe('Util Text', () => { test('tests breaking up text into lines (single line)', () => { const parts = ['There is only one line.']; const sampleText = parts.join('\n'); - const r = Text.extractLinesOfText(sampleText) - .map((a) => a.text) - .toArray(); + const r = toArray(Text.extractLinesOfText(sampleText)).map((a) => a.text); const rText = r.join(''); expect(rText).toBe(parts.join('\n')); expect(rText).toBe(sampleText); @@ -244,17 +251,15 @@ describe('Util Text', () => { test('tests extractLinesOfText', () => { const linesA = [...Text.extractLinesOfText(sampleCode)].map((m) => m.text); - const linesB = Text.extractLinesOfText(sampleCode) - .map((m) => m.text) - .toArray(); + const linesB = toArray(Text.extractLinesOfText(sampleCode)).map((m) => m.text); expect(linesB).toEqual(linesA); }); test('extractText', () => { const line = Text.textOffset('This is a line of text to be processed.'); - const words = Text.extractWordsFromTextOffset(line); - const results = words.map((wo) => Text.extractText(line, wo.offset, wo.offset + wo.text.length)).toArray(); - const expected = words.map((wo) => wo.text).toArray(); + const words = toArray(Text.extractWordsFromTextOffset(line)); + const results = words.map((wo) => Text.extractText(line, wo.offset, wo.offset + wo.text.length)); + const expected = words.map((wo) => wo.text); expect(results).toEqual(expected); }); }); @@ -377,9 +382,12 @@ function nfd(s: string): string { } function match(regexp: RegExp, text: string): (string | number)[] { - const x = Text.matchStringToTextOffset(regexp, text) - .concatMap((t) => [t.text, t.offset]) - .toArray(); + const x = toArray( + pipe( + Text.matchStringToTextOffset(regexp, text), + opConcatMap((t) => [t.text, t.offset]) + ) + ); return x; } diff --git a/packages/cspell-lib/src/util/text.ts b/packages/cspell-lib/src/util/text.ts index b0019fe189b..1ab618e5dca 100644 --- a/packages/cspell-lib/src/util/text.ts +++ b/packages/cspell-lib/src/util/text.ts @@ -1,5 +1,6 @@ +import { opConcatMap, opMap, pipeSync as pipe } from '@cspell/cspell-pipe'; import type { TextDocumentOffset, TextOffset } from '@cspell/cspell-types'; -import { Sequence, sequenceFromRegExpMatch } from 'gensequence'; +import { sequenceFromRegExpMatch } from 'gensequence'; import { binarySearch } from './search'; import { regExAccents, @@ -32,7 +33,7 @@ export function splitCamelCaseWordWithOffset(wo: TextOffset): Array * Split camelCase words into an array of strings. */ export function splitCamelCaseWord(word: string): string[] { - const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.substr(1).toLowerCase()); + const wPrime = word.replace(regExUpperSOrIng, (s) => s[0] + s.slice(1).toLowerCase()); const separator = '_<^*_*^>_'; const pass1 = wPrime.replace(regExSplitWords, '$1' + separator + '$2'); const pass2 = pass1.replace(regExSplitWords2, '$1' + separator + '$2'); @@ -42,35 +43,39 @@ export function splitCamelCaseWord(word: string): string[] { /** * This function lets you iterate over regular expression matches. */ -export function match(reg: RegExp, text: string): Sequence { +export function match(reg: RegExp, text: string): Iterable { return sequenceFromRegExpMatch(reg, text); } -export function matchStringToTextOffset(reg: RegExp, text: string): Sequence { +export function matchStringToTextOffset(reg: RegExp, text: string): Iterable { return matchToTextOffset(reg, { text, offset: 0 }); } -export function matchToTextOffset(reg: RegExp, text: TextOffset): Sequence { +export function matchToTextOffset(reg: RegExp, text: TextOffset): Iterable { const textOffset = text; const fnOffsetMap = offsetMap(textOffset.offset); - return match(reg, textOffset.text).map((m) => fnOffsetMap({ text: m[0], offset: m.index })); + textOffset.text.matchAll(reg); + return pipe( + match(reg, textOffset.text), + opMap((m) => fnOffsetMap({ text: m[0], offset: m.index || 0 })) + ); } -export function extractLinesOfText(text: string): Sequence { +export function extractLinesOfText(text: string): Iterable { return matchStringToTextOffset(regExLines, text); } /** * Extract out whole words from a string of text. */ -export function extractWordsFromText(text: string): Sequence { +export function extractWordsFromText(text: string): Iterable { return extractWordsFromTextOffset(textOffset(text)); } /** * Extract out whole words from a string of text. */ -export function extractWordsFromTextOffset(text: TextOffset): Sequence { +export function extractWordsFromTextOffset(text: TextOffset): Iterable { const reg = new RegExp(regExWords); return matchToTextOffset(reg, cleanTextOffset(text)); } @@ -90,17 +95,17 @@ export function cleanTextOffset(text: TextOffset): TextOffset { /** * Extract out whole words and words containing numbers from a string of text. */ -export function extractPossibleWordsFromTextOffset(text: TextOffset): Sequence { +export function extractPossibleWordsFromTextOffset(text: TextOffset): Iterable { const reg = new RegExp(regExWordsAndDigits); return matchToTextOffset(reg, text); } -export function extractWordsFromCode(text: string): Sequence { +export function extractWordsFromCode(text: string): Iterable { return extractWordsFromCodeTextOffset(textOffset(text)); } -export function extractWordsFromCodeTextOffset(textOffset: TextOffset): Sequence { - return extractWordsFromTextOffset(textOffset).concatMap(splitCamelCaseWordWithOffset); +export function extractWordsFromCodeTextOffset(textOffset: TextOffset): Iterable { + return pipe(extractWordsFromTextOffset(textOffset), opConcatMap(splitCamelCaseWordWithOffset)); } export function isUpperCase(word: string): boolean { @@ -198,7 +203,14 @@ export function calculateTextDocumentOffsets( doc: string, wordOffsets: T[] ): (TextDocumentOffset & T)[] { - const lines = [-1, ...match(/\n/g, doc).map((a) => a.index), doc.length]; + const lines = [ + -1, + ...pipe( + match(/\n/g, doc), + opMap((a) => a.index) + ), + doc.length, + ]; let lastRow = -1; let lastOffset = doc.length + 1; diff --git a/packages/cspell-lib/src/wordListHelper.ts b/packages/cspell-lib/src/wordListHelper.ts index d5a76b22e47..d5ddd9457b0 100644 --- a/packages/cspell-lib/src/wordListHelper.ts +++ b/packages/cspell-lib/src/wordListHelper.ts @@ -1,8 +1,8 @@ // cSpell:enableCompoundWords -import * as Text from './util/text'; import { readLines } from './util/fileReader'; -import { toIterableIterator, concatIterables } from './util/iterableIteratorLib'; +import { concatIterables, toIterableIterator } from './util/iterableIteratorLib'; import { logError } from './util/logger'; +import * as Text from './util/text'; const regExpWordsWithSpaces = /^\s*\p{L}+(?:\s+\p{L}+){0,3}$/u; @@ -21,9 +21,7 @@ export function loadWordsNoError(filename: string): Promise text) - .toArray(); + return [...Text.extractWordsFromText(line)].map(({ text }) => text); } export function splitCodeWords(words: string[]): string[] { diff --git a/packages/cspell-pipe/src/__snapshots__/index.test.ts.snap b/packages/cspell-pipe/src/__snapshots__/index.test.ts.snap index f2303611cfd..1434445e361 100644 --- a/packages/cspell-pipe/src/__snapshots__/index.test.ts.snap +++ b/packages/cspell-pipe/src/__snapshots__/index.test.ts.snap @@ -5,6 +5,7 @@ Array [ "helpers", "isAsyncIterable", "opAwaitAsync", + "opConcatMap", "opFilter", "opFlatten", "opJoinStrings", diff --git a/packages/cspell-pipe/src/index.ts b/packages/cspell-pipe/src/index.ts index 6d846a60b5d..78f3556b57e 100644 --- a/packages/cspell-pipe/src/index.ts +++ b/packages/cspell-pipe/src/index.ts @@ -2,7 +2,7 @@ import * as _helpers from './helpers'; import * as _operators from './operators'; export { isAsyncIterable, toArray, toAsyncIterable } from './helpers'; -export { opAwaitAsync, opFilter, opFlatten, opJoinStrings, opMap, opTap, opUnique } from './operators'; +export { opAwaitAsync, opConcatMap, opFilter, opFlatten, opJoinStrings, opMap, opTap, opUnique } from './operators'; export { pipeAsync, pipeSync } from './pipe'; export const operators = _operators; diff --git a/packages/cspell-pipe/src/operators/__snapshots__/index.test.ts.snap b/packages/cspell-pipe/src/operators/__snapshots__/index.test.ts.snap index 29ff4cc3201..39c4f416bc5 100644 --- a/packages/cspell-pipe/src/operators/__snapshots__/index.test.ts.snap +++ b/packages/cspell-pipe/src/operators/__snapshots__/index.test.ts.snap @@ -3,6 +3,9 @@ exports[`Operators operators 1`] = ` Array [ "opAwaitAsync", + "opConcatMap", + "opConcatMapAsync", + "opConcatMapSync", "opFilter", "opFilterAsync", "opFilterSync", diff --git a/packages/cspell-pipe/src/operators/concatMap.test.ts b/packages/cspell-pipe/src/operators/concatMap.test.ts new file mode 100644 index 00000000000..31c0991dc8f --- /dev/null +++ b/packages/cspell-pipe/src/operators/concatMap.test.ts @@ -0,0 +1,28 @@ +import { opConcatMap } from '.'; +import { toArray } from '../helpers'; +import { pipeAsync, pipeSync } from '../pipe'; + +describe('Validate map', () => { + test('map', async () => { + const values = ['one', 'two', 'three']; + + const mapFn = (v: string) => v.split('').concat(['|']); + const mapFn2 = (v: string) => [v, v.toUpperCase()]; + + const expected = 'one|two|three|' + .split('') + .map(mapFn2) + .reduce((a, b) => a.concat(b), []); + + const mapToLen = opConcatMap(mapFn); + + const s = pipeSync(values, mapToLen, opConcatMap(mapFn2)); + const a = pipeAsync(values, mapToLen, opConcatMap(mapFn2)); + + const sync = toArray(s); + const async = await toArray(a); + + expect(sync).toEqual(expected); + expect(async).toEqual(expected); + }); +}); diff --git a/packages/cspell-pipe/src/operators/concatMap.ts b/packages/cspell-pipe/src/operators/concatMap.ts new file mode 100644 index 00000000000..5e685b39a43 --- /dev/null +++ b/packages/cspell-pipe/src/operators/concatMap.ts @@ -0,0 +1,24 @@ +import { toPipeFn } from '../helpers/util'; + +export function opConcatMapAsync( + mapFn: (v: T) => AsyncIterable | Iterable +): (iter: AsyncIterable) => AsyncIterable { + async function* fn(iter: Iterable | AsyncIterable) { + for await (const v of iter) { + yield* mapFn(v); + } + } + + return fn; +} + +export function opConcatMapSync(mapFn: (v: T) => Iterable): (iter: Iterable) => Iterable { + function* fn(iter: Iterable) { + for (const v of iter) { + yield* mapFn(v); + } + } + return fn; +} + +export const opConcatMap = (fn: (v: T) => Iterable) => toPipeFn(opConcatMapSync(fn), opConcatMapAsync(fn)); diff --git a/packages/cspell-pipe/src/operators/index.test.ts b/packages/cspell-pipe/src/operators/index.test.ts index ebddaac16ed..e855ebcffd7 100644 --- a/packages/cspell-pipe/src/operators/index.test.ts +++ b/packages/cspell-pipe/src/operators/index.test.ts @@ -5,6 +5,9 @@ describe('Operators', () => { expect(Object.keys(operators).sort()).toMatchSnapshot(); expect(operators.opAwaitAsync).toBeInstanceOf(Function); + expect(operators.opConcatMap).toBeInstanceOf(Function); + expect(operators.opConcatMapAsync).toBeInstanceOf(Function); + expect(operators.opConcatMapSync).toBeInstanceOf(Function); expect(operators.opFilter).toBeInstanceOf(Function); expect(operators.opFilterAsync).toBeInstanceOf(Function); expect(operators.opFilterSync).toBeInstanceOf(Function); diff --git a/packages/cspell-pipe/src/operators/index.ts b/packages/cspell-pipe/src/operators/index.ts index 1abf6396c52..d3f7c0d9a3c 100644 --- a/packages/cspell-pipe/src/operators/index.ts +++ b/packages/cspell-pipe/src/operators/index.ts @@ -1,4 +1,5 @@ export { opAwaitAsync } from './await'; +export { opConcatMap, opConcatMapAsync, opConcatMapSync } from './concatMap'; export { opFilter as opFilter, opFilterAsync, opFilterSync } from './filter'; export { opFlatten, opFlattenAsync, opFlattenSync } from './flatten'; export { opJoinStrings, opJoinStringsAsync, opJoinStringsSync } from './joinStrings';