From 75265094458d5f00482324ee188c0274f17c7475 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Sat, 3 Apr 2021 10:01:19 +0300 Subject: [PATCH 1/3] feat!: Turn the lexer into a generator BREAKING CHANGE: The `push` argument of `new Lexer()` is dropped, and instead its `lex()` method is now a Generator that allows for the lexemes to be iterated. --- docs/07_parsing_yaml.md | 11 +-- src/parse/lexer.ts | 210 +++++++++++++++++++--------------------- src/parse/parser.ts | 9 +- 3 files changed, 109 insertions(+), 121 deletions(-) diff --git a/docs/07_parsing_yaml.md b/docs/07_parsing_yaml.md index 2176eb56..3ede6889 100644 --- a/docs/07_parsing_yaml.md +++ b/docs/07_parsing_yaml.md @@ -28,10 +28,8 @@ Both the Lexer and Parser accept incomplete input, allowing for them and the Com ```js import { Lexer } from 'yaml' -const tokens = [] -const lexer = new Lexer(tok => tokens.push(tok)) -lexer.lex('foo: bar\nfee:\n [24,"42"]\n', false) -console.dir(tokens) +const tokens = new Lexer().lex('foo: bar\nfee:\n [24,"42"]\n') +console.dir(Array.from(tokens)) > [ '\x02', '\x1F', 'foo', ':', ' ', '\x1F', 'bar', '\n', @@ -41,12 +39,11 @@ console.dir(tokens) ] ``` -#### `new Lexer(push: (token: string) => void)` +#### `new Lexer()` -#### `lexer.lex(src: string, incomplete: boolean): void` +#### `lexer.lex(src: string, incomplete?: boolean): Generator` The API for the lexer is rather minimal, and offers no configuration. -The constructor accepts a single callback as argument, defining a function that will be called once for each lexical token. If the input stream is chunked, the `lex()` method may be called separately for each chunk if the `incomplete` argument is `true`. At the end of input, `lex()` should be called a final time with `incomplete: false` to ensure that the remaining tokens are emitted. diff --git a/src/parse/lexer.ts b/src/parse/lexer.ts index 22b72ccc..0a43d631 100644 --- a/src/parse/lexer.ts +++ b/src/parse/lexer.ts @@ -112,8 +112,6 @@ const isNotIdentifierChar = (ch: string) => * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents */ export class Lexer { - private push: (token: string) => void - /** * Flag indicating whether the end of the current buffer marks the end of * all input @@ -159,30 +157,20 @@ export class Lexer { private next: State | null = null /** A pointer to `buffer`; the current position of the lexer. */ - private pos = 0 + private pos = 0; /** - * Define/initialise a YAML lexer. `push` will be called separately with each - * token when `lex()` is passed an input string. + * Generate YAML tokens from the `source` string. If `incomplete`, + * a part of the last line may be left as a buffer for the next call. * - * @public + * @returns A generator of lexical tokens */ - constructor(push: (token: string) => void) { - this.push = push - } - - /** - * Read YAML tokens from the `source` string, calling the callback - * defined in the constructor for each one. If `incomplete`, a part - * of the last line may be left as a buffer for the next call. - * - * @public - */ - lex(source: string, incomplete: boolean) { + *lex(source: string, incomplete = false) { if (source) this.buffer = this.buffer ? this.buffer + source : source this.atEnd = !incomplete let next: State | null = this.next || 'stream' - while (next && (incomplete || this.hasChars(1))) next = this.parseNext(next) + while (next && (incomplete || this.hasChars(1))) + next = yield* this.parseNext(next) } private atLineEnd() { @@ -241,32 +229,32 @@ export class Lexer { return this.buffer.substr(this.pos, n) } - private parseNext(next: State) { + private *parseNext(next: State) { switch (next) { case 'stream': - return this.parseStream() + return yield* this.parseStream() case 'line-start': - return this.parseLineStart() + return yield* this.parseLineStart() case 'block-start': - return this.parseBlockStart() + return yield* this.parseBlockStart() case 'doc': - return this.parseDocument() + return yield* this.parseDocument() case 'flow': - return this.parseFlowCollection() + return yield* this.parseFlowCollection() case 'quoted-scalar': - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case 'block-scalar': - return this.parseBlockScalar() + return yield* this.parseBlockScalar() case 'plain-scalar': - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseStream() { + private *parseStream() { let line = this.getLine() if (line === null) return this.setNext('stream') if (line[0] === BOM) { - this.pushCount(1) + yield* this.pushCount(1) line = line.substring(1) } if (line[0] === '%') { @@ -281,102 +269,102 @@ export class Lexer { if (ch === ' ' || ch === '\t') dirEnd -= 1 else break } - const n = this.pushCount(dirEnd) + this.pushSpaces(true) - this.pushCount(line.length - n) // possible comment + const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true)) + yield* this.pushCount(line.length - n) // possible comment this.pushNewline() return 'stream' } if (this.atLineEnd()) { - const sp = this.pushSpaces(true) - this.pushCount(line.length - sp) - this.pushNewline() + const sp = yield* this.pushSpaces(true) + yield* this.pushCount(line.length - sp) + yield* this.pushNewline() return 'stream' } - this.push(DOCUMENT) - return this.parseLineStart() + yield DOCUMENT + return yield* this.parseLineStart() } - private parseLineStart() { + private *parseLineStart() { const ch = this.charAt(0) if (!ch && !this.atEnd) return this.setNext('line-start') if (ch === '-' || ch === '.') { if (!this.atEnd && !this.hasChars(4)) return this.setNext('line-start') const s = this.peek(3) if (s === '---' && isEmpty(this.charAt(3))) { - this.pushCount(3) + yield* this.pushCount(3) this.indentValue = 0 this.indentNext = 0 return 'doc' } else if (s === '...' && isEmpty(this.charAt(3))) { - this.pushCount(3) + yield* this.pushCount(3) return 'stream' } } - this.indentValue = this.pushSpaces(false) + this.indentValue = yield* this.pushSpaces(false) if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1))) this.indentNext = this.indentValue - return this.parseBlockStart() + return yield* this.parseBlockStart() } - private parseBlockStart(): 'doc' | null { + private *parseBlockStart(): Generator { const [ch0, ch1] = this.peek(2) if (!ch1 && !this.atEnd) return this.setNext('block-start') if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) { - const n = this.pushCount(1) + this.pushSpaces(true) + const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true)) this.indentNext = this.indentValue + 1 this.indentValue += n - return this.parseBlockStart() + return yield* this.parseBlockStart() } return 'doc' } - private parseDocument() { - this.pushSpaces(true) + private *parseDocument() { + yield* this.pushSpaces(true) const line = this.getLine() if (line === null) return this.setNext('doc') - let n = this.pushIndicators() + let n = yield* this.pushIndicators() switch (line[n]) { case '#': - this.pushCount(line.length - n) + yield* this.pushCount(line.length - n) // fallthrough case undefined: - this.pushNewline() - return this.parseLineStart() + yield* this.pushNewline() + return yield* this.parseLineStart() case '{': case '[': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = false this.flowLevel = 1 return 'flow' case '}': case ']': // this is an error - this.pushCount(1) + yield* this.pushCount(1) return 'doc' case '*': - this.pushUntil(isNotIdentifierChar) + yield* this.pushUntil(isNotIdentifierChar) return 'doc' case '"': case "'": - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case '|': case '>': - n += this.parseBlockScalarHeader() - n += this.pushSpaces(true) - this.pushCount(line.length - n) - this.pushNewline() - return this.parseBlockScalar() + n += yield* this.parseBlockScalarHeader() + n += yield* this.pushSpaces(true) + yield* this.pushCount(line.length - n) + yield* this.pushNewline() + return yield* this.parseBlockScalar() default: - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseFlowCollection() { + private *parseFlowCollection() { let nl: number, sp: number let indent = -1 do { - nl = this.pushNewline() - sp = this.pushSpaces(true) + nl = yield* this.pushNewline() + sp = yield* this.pushSpaces(true) if (nl > 0) this.indentValue = indent = sp } while (nl + sp > 0) const line = this.getLine() @@ -397,54 +385,55 @@ export class Lexer { if (!atFlowEndMarker) { // this is an error this.flowLevel = 0 - this.push(FLOW_END) - return this.parseLineStart() + yield FLOW_END + return yield* this.parseLineStart() } } let n = 0 - while (line[n] === ',') n += this.pushCount(1) + this.pushSpaces(true) - n += this.pushIndicators() + while (line[n] === ',') + n += (yield* this.pushCount(1)) + (yield* this.pushSpaces(true)) + n += yield* this.pushIndicators() switch (line[n]) { case undefined: return 'flow' case '#': - this.pushCount(line.length - n) + yield* this.pushCount(line.length - n) return 'flow' case '{': case '[': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = false this.flowLevel += 1 return 'flow' case '}': case ']': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = true this.flowLevel -= 1 return this.flowLevel ? 'flow' : 'doc' case '*': - this.pushUntil(isNotIdentifierChar) + yield* this.pushUntil(isNotIdentifierChar) return 'flow' case '"': case "'": this.flowKey = true - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case ':': { const next = this.charAt(1) if (this.flowKey || isEmpty(next) || next === ',') { - this.pushCount(1) - this.pushSpaces(true) + yield* this.pushCount(1) + yield* this.pushSpaces(true) return 'flow' } } // fallthrough default: this.flowKey = false - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseQuotedScalar() { + private *parseQuotedScalar() { const quote = this.charAt(0) let end = this.buffer.indexOf(quote, this.pos + 1) if (quote === "'") { @@ -475,11 +464,11 @@ export class Lexer { if (!this.atEnd) return this.setNext('quoted-scalar') end = this.buffer.length } - this.pushToIndex(end + 1, false) + yield* this.pushToIndex(end + 1, false) return this.flowLevel ? 'flow' : 'doc' } - private parseBlockScalarHeader() { + private *parseBlockScalarHeader() { this.blockScalarIndent = -1 this.blockScalarKeep = false let i = this.pos @@ -489,10 +478,10 @@ export class Lexer { else if (ch > '0' && ch <= '9') this.blockScalarIndent = Number(ch) - 1 else if (ch !== '-') break } - return this.pushUntil(ch => isEmpty(ch) || ch === '#') + return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#') } - private parseBlockScalar() { + private *parseBlockScalar() { let nl = this.pos - 1 // may be -1 if this.pos === 0 let indent = 0 let ch: string @@ -538,12 +527,12 @@ export class Lexer { else break } while (true) } - this.push(SCALAR) - this.pushToIndex(nl + 1, true) - return this.parseLineStart() + yield SCALAR + yield* this.pushToIndex(nl + 1, true) + return yield* this.parseLineStart() } - private parsePlainScalar() { + private *parsePlainScalar() { const inFlow = this.flowLevel > 0 let end = this.pos - 1 let i = this.pos - 1 @@ -574,45 +563,45 @@ export class Lexer { } } if (!ch && !this.atEnd) return this.setNext('plain-scalar') - this.push(SCALAR) - this.pushToIndex(end + 1, true) + yield SCALAR + yield* this.pushToIndex(end + 1, true) return inFlow ? 'flow' : 'doc' } - private pushCount(n: number) { + private *pushCount(n: number) { if (n > 0) { - this.push(this.buffer.substr(this.pos, n)) + yield this.buffer.substr(this.pos, n) this.pos += n return n } return 0 } - private pushToIndex(i: number, allowEmpty: boolean) { + private *pushToIndex(i: number, allowEmpty: boolean) { const s = this.buffer.slice(this.pos, i) if (s) { - this.push(s) + yield s this.pos += s.length return s.length - } else if (allowEmpty) this.push('') + } else if (allowEmpty) yield '' return 0 } - private pushIndicators(): number { + private *pushIndicators(): Generator { switch (this.charAt(0)) { case '!': if (this.charAt(1) === '<') return ( - this.pushVerbatimTag() + - this.pushSpaces(true) + - this.pushIndicators() + (yield* this.pushVerbatimTag()) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) // fallthrough case '&': return ( - this.pushUntil(isNotIdentifierChar) + - this.pushSpaces(true) + - this.pushIndicators() + (yield* this.pushUntil(isNotIdentifierChar)) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) case ':': case '?': // this is an error outside flow collections @@ -620,28 +609,31 @@ export class Lexer { if (isEmpty(this.charAt(1))) { if (this.flowLevel === 0) this.indentNext = this.indentValue + 1 return ( - this.pushCount(1) + this.pushSpaces(true) + this.pushIndicators() + (yield* this.pushCount(1)) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) } } return 0 } - private pushVerbatimTag() { + private *pushVerbatimTag() { let i = this.pos + 2 let ch = this.buffer[i] while (!isEmpty(ch) && ch !== '>') ch = this.buffer[++i] - return this.pushToIndex(ch === '>' ? i + 1 : i, false) + return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false) } - private pushNewline() { + private *pushNewline() { const ch = this.buffer[this.pos] - if (ch === '\n') return this.pushCount(1) - else if (ch === '\r' && this.charAt(1) === '\n') return this.pushCount(2) + if (ch === '\n') return yield* this.pushCount(1) + else if (ch === '\r' && this.charAt(1) === '\n') + return yield* this.pushCount(2) else return 0 } - private pushSpaces(allowTabs: boolean) { + private *pushSpaces(allowTabs: boolean) { let i = this.pos - 1 let ch: string do { @@ -649,16 +641,16 @@ export class Lexer { } while (ch === ' ' || (allowTabs && ch === '\t')) const n = i - this.pos if (n > 0) { - this.push(this.buffer.substr(this.pos, n)) + yield this.buffer.substr(this.pos, n) this.pos = i } return n } - private pushUntil(test: (ch: string) => boolean) { + private *pushUntil(test: (ch: string) => boolean) { let i = this.pos let ch = this.buffer[i] while (!test(ch)) ch = this.buffer[++i] - return this.pushToIndex(i, false) + return yield* this.pushToIndex(i, false) } } diff --git a/src/parse/parser.ts b/src/parse/parser.ts index b74220e3..d1228d61 100644 --- a/src/parse/parser.ts +++ b/src/parse/parser.ts @@ -148,9 +148,8 @@ function fixFlowSeqItems(fc: FlowCollection) { * const parser = new Parser(tok => cst.push(tok)) * const src: string = ... * - * // The following would be equivalent to `parser.parse(src, false)` - * const lexer = new Lexer(parser.next) - * lexer.lex(src, false) + * // The following would be equivalent to `parser.parse(src)` + * for (const lexeme of new Lexer().lex(src)) parser.next(lexeme) * parser.end() * ``` */ @@ -206,7 +205,7 @@ export class Parser { */ parse(source: string, incomplete = false) { if (this.onNewLine && this.offset === 0) this.onNewLine(0) - this.lexer.lex(source, incomplete) + for (const lexeme of this.lexer.lex(source, incomplete)) this.next(lexeme) if (!incomplete) this.end() } @@ -261,7 +260,7 @@ export class Parser { } // Must be defined after `next()` - private lexer = new Lexer(this.next) + private lexer = new Lexer() /** Call at end of input to push out any remaining constructions */ end() { From 7169dcecc88edb0ea32e1079d16010731fa1265b Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Sat, 3 Apr 2021 10:01:19 +0300 Subject: [PATCH 2/3] feat!: Turn the parser into a generator BREAKING CHANGE: The `push` argument of `new Parser()` is dropped, and instead its `parse()`, `next()` and `end()` methods are now generators that allow for the parsed tokens to be iterated. --- README.md | 4 +- docs/01_intro.md | 4 +- docs/07_parsing_yaml.md | 27 +++--- src/compose/composer.ts | 3 +- src/parse/parser.ts | 185 ++++++++++++++++++++-------------------- src/public-api.ts | 8 +- tests/stream.ts | 7 +- tests/tsconfig.json | 3 +- 8 files changed, 120 insertions(+), 121 deletions(-) diff --git a/README.md b/README.md index 49e71d1d..ec0b21e5 100644 --- a/README.md +++ b/README.md @@ -74,8 +74,8 @@ const YAML = require('yaml') ### Parsing YAML -- [`new Lexer(push)`](https://eemeli.org/yaml/#lexer) -- [`new Parser(push, onNewLine?)`](https://eemeli.org/yaml/#parser) +- [`new Lexer().lex(src)`](https://eemeli.org/yaml/#lexer) +- [`new Parser(onNewLine?).parse(src)`](https://eemeli.org/yaml/#parser) - [`new Composer(push, options?)`](https://eemeli.org/yaml/#composer) ## YAML.parse diff --git a/docs/01_intro.md b/docs/01_intro.md index 713b284f..dfca3804 100644 --- a/docs/01_intro.md +++ b/docs/01_intro.md @@ -96,6 +96,6 @@ import { import { Composer, Lexer, Parser } from 'yaml' ``` -- [`new Lexer(push)`](#lexer) -- [`new Parser(push, onNewLine?)`](#parser) +- [`new Lexer().lex(src)`](#lexer) +- [`new Parser(onNewLine?).parse(src)`](#parser) - [`new Composer(push, options?)`](#composer) diff --git a/docs/07_parsing_yaml.md b/docs/07_parsing_yaml.md index 3ede6889..0d30d8da 100644 --- a/docs/07_parsing_yaml.md +++ b/docs/07_parsing_yaml.md @@ -94,8 +94,8 @@ All remaining tokens are identifiable by their first character: ```js import { Parser } from 'yaml' -const parser = new Parser(tok => console.dir(tok, { depth: null })) -parser.parse('foo: [24,"42"]\n', false) +for (const token of new Parser().parse('foo: [24,"42"]\n')) + console.dir(token, { depth: null }) > { type: 'document', @@ -150,24 +150,21 @@ It should never throw errors, but may (rarely) include error tokens in its outpu To validate a CST, you will need to compose it into a `Document`. If the document contains errors, they will be included in the document's `errors` array, and each error will will contain an `offset` within the source string, which you may then use to find the corresponding node in the CST. -#### `new Parser(push: (token: Token) => void, onNewLine?: (offset: number) => void)` +#### `new Parser(onNewLine?: (offset: number) => void)` Create a new parser. -`push` is called separately with each parsed token. If defined, `onNewLine` is called separately with the start position of each new line (in `parse()`, including the start of input). -#### `parser.parse(source: string, incomplete = false)` +#### `parser.parse(source: string, incomplete = false): Generator` -Parse `source` as a YAML stream, calling `push` with each directive, document and other structure as it is completely parsed. +Parse `source` as a YAML stream, generating tokens for each directive, document and other structure as it is completely parsed. If `incomplete`, a part of the last line may be left as a buffer for the next call. -Errors are not thrown, but pushed out as `{ type: 'error', offset, message }` tokens. +Errors are not thrown, but are yielded as `{ type: 'error', offset, message }` tokens. -#### `parser.next(lexToken: string)` +#### `parser.next(lexToken: string): Generator` Advance the parser by one lexical token. -Bound to the Parser instance, so may be used directly as a callback function. - Used internally by `parser.parse()`; exposed to allow for use with an external lexer. For debug purposes, if the `LOG_TOKENS` env var is true-ish, all lexical tokens will be pretty-printed using `console.log()` as they are being processed. @@ -202,8 +199,9 @@ Collection items contain some subset of the following properties: import { LineCounter, Parser } from 'yaml' const lineCounter = new LineCounter() -const parser = new Parser(() => {}, lineCounter.addNewLine)) -parser.parse('foo:\n- 24\n- "42"\n') +const parser = new Parser(lineCounter.addNewLine)) +const tokens = parser.parse('foo:\n- 24\n- "42"\n') +Array.from(tokens) // forces iteration lineCounter.lineStarts > [ 0, 5, 10, 17 ] @@ -233,10 +231,11 @@ If `line === 0`, `addNewLine` has never been called or `offset` is before the fi ```js import { Composer, Parser } from 'yaml' + +const src = 'foo: bar\nfee: [24, "42"]' const docs = [] const composer = new Composer(doc => docs.push(doc)) -const parser = new Parser(composer.next) -parser.parse('foo: bar\nfee: [24, "42"]') +for (const token of new Parser().parse(src)) composer.next(token) composer.end() docs.map(doc => doc.toJS()) diff --git a/src/compose/composer.ts b/src/compose/composer.ts index 2a1013d5..56a033c9 100644 --- a/src/compose/composer.ts +++ b/src/compose/composer.ts @@ -53,8 +53,7 @@ function parsePrelude(prelude: string[]) { * const options = { ... } * const docs: Document.Parsed[] = [] * const composer = new Composer(doc => docs.push(doc), options) - * const parser = new Parser(composer.next) - * parser.parse(source) + * for (const doc of new Parser().parse(source)) composer.next(doc) * composer.end() * ``` */ diff --git a/src/parse/parser.ts b/src/parse/parser.ts index d1228d61..5a74b6e0 100644 --- a/src/parse/parser.ts +++ b/src/parse/parser.ts @@ -140,21 +140,31 @@ function fixFlowSeqItems(fc: FlowCollection) { /** * A YAML concrete syntax tree (CST) parser * - * While the `parse()` method provides an API for parsing a source string - * directly, the parser may also be used with a user-provided lexer: - * * ```ts - * const cst: Token[] = [] - * const parser = new Parser(tok => cst.push(tok)) * const src: string = ... + * for (const token of new Parser().parse(src)) { + * // token: Token + * } + * ``` * - * // The following would be equivalent to `parser.parse(src)` - * for (const lexeme of new Lexer().lex(src)) parser.next(lexeme) - * parser.end() + * To use the parser with a user-provided lexer: + * + * ```ts + * function* parse(source: string, lexer: Lexer) { + * const parser = new Parser() + * for (const lexeme of lexer.lex(source)) + * yield* parser.next(lexeme) + * yield* parser.end() + * } + * + * const src: string = ... + * const lexer = new Lexer() + * for (const token of parse(src, lexer)) { + * // token: Token + * } * ``` */ export class Parser { - private push: (token: Token) => void private onNewLine?: (offset: number) => void /** If true, space and sequence indicators count as indentation */ @@ -182,44 +192,38 @@ export class Parser { private type = '' as TokenType /** - * @param push - Called separately with each parsed token * @param onNewLine - If defined, called separately with the start position of * each new line (in `parse()`, including the start of input). - * @public */ - constructor( - push: (token: Token) => void, - onNewLine?: (offset: number) => void - ) { - this.push = push + constructor(onNewLine?: (offset: number) => void) { this.onNewLine = onNewLine } /** - * Parse `source` as a YAML stream, calling `push` with each directive, - * document and other structure as it is completely parsed. If `incomplete`, - * a part of the last line may be left as a buffer for the next call. + * Parse `source` as a YAML stream. + * If `incomplete`, a part of the last line may be left as a buffer for the next call. * - * Errors are not thrown, but pushed out as `{ type: 'error', message }` tokens. - * @public + * Errors are not thrown, but yielded as `{ type: 'error', message }` tokens. + * + * @returns A generator of tokens representing each directive, document, and other structure. */ - parse(source: string, incomplete = false) { + *parse(source: string, incomplete = false) { if (this.onNewLine && this.offset === 0) this.onNewLine(0) - for (const lexeme of this.lexer.lex(source, incomplete)) this.next(lexeme) - if (!incomplete) this.end() + for (const lexeme of this.lexer.lex(source, incomplete)) + yield* this.next(lexeme) + if (!incomplete) yield* this.end() } /** - * Advance the parser by the `source` of one lexical token. Bound to the - * Parser instance, so may be used directly as a callback function. + * Advance the parser by the `source` of one lexical token. */ - next = (source: string) => { + *next(source: string) { this.source = source if (process.env.LOG_TOKENS) console.log('|', prettyToken(source)) if (this.atScalar) { this.atScalar = false - this.step() + yield* this.step() this.offset += source.length return } @@ -227,7 +231,7 @@ export class Parser { const type = tokenType(source) if (!type) { const message = `Not a YAML token: ${source}` - this.pop({ type: 'error', offset: this.offset, message, source }) + yield* this.pop({ type: 'error', offset: this.offset, message, source }) this.offset += source.length } else if (type === 'scalar') { this.atNewLine = false @@ -235,7 +239,7 @@ export class Parser { this.type = 'scalar' } else { this.type = type - this.step() + yield* this.step() switch (type) { case 'newline': this.atNewLine = true @@ -260,11 +264,11 @@ export class Parser { } // Must be defined after `next()` - private lexer = new Lexer() + private lexer = new Lexer(); /** Call at end of input to push out any remaining constructions */ - end() { - while (this.stack.length > 0) this.pop() + *end() { + while (this.stack.length > 0) yield* this.pop() } private get sourceToken() { @@ -277,10 +281,10 @@ export class Parser { return st } - private step() { + private *step(): Generator { const top = this.peek(1) if (this.type === 'doc-end' && (!top || top.type !== 'doc-end')) { - while (this.stack.length > 0) this.pop() + while (this.stack.length > 0) yield* this.pop() this.stack.push({ type: 'doc-end', offset: this.offset, @@ -288,42 +292,42 @@ export class Parser { }) return } - if (!top) return this.stream() + if (!top) return yield* this.stream() switch (top.type) { case 'document': - return this.document(top) + return yield* this.document(top) case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': - return this.scalar(top) + return yield* this.scalar(top) case 'block-scalar': - return this.blockScalar(top) + return yield* this.blockScalar(top) case 'block-map': - return this.blockMap(top) + return yield* this.blockMap(top) case 'block-seq': - return this.blockSequence(top) + return yield* this.blockSequence(top) case 'flow-collection': - return this.flowCollection(top) + return yield* this.flowCollection(top) case 'doc-end': - return this.documentEnd(top) + return yield* this.documentEnd(top) } /* istanbul ignore next should not happen */ - this.pop() + yield* this.pop() } private peek(n: number) { return this.stack[this.stack.length - n] } - private pop(error?: Token) { + private *pop(error?: Token): Generator { const token = error || this.stack.pop() /* istanbul ignore if should not happen */ if (!token) { const message = 'Tried to pop an empty stack' - this.push({ type: 'error', offset: this.offset, source: '', message }) + yield { type: 'error', offset: this.offset, source: '', message } } else if (this.stack.length === 0) { - this.push(token) + yield token } else { const top = this.peek(1) // For these, parent indent is needed instead of own @@ -368,8 +372,8 @@ export class Parser { } /* istanbul ignore next should not happen */ default: - this.pop() - this.pop(token) + yield* this.pop() + yield* this.pop(token) } if ( @@ -398,20 +402,16 @@ export class Parser { } } - private stream() { + private *stream(): Generator { switch (this.type) { case 'directive-line': - this.push({ - type: 'directive', - offset: this.offset, - source: this.source - }) + yield { type: 'directive', offset: this.offset, source: this.source } return case 'byte-order-mark': case 'space': case 'comment': case 'newline': - this.push(this.sourceToken) + yield this.sourceToken return case 'doc-mode': case 'doc-start': { @@ -425,21 +425,21 @@ export class Parser { return } } - this.push({ + yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML stream`, source: this.source - }) + } } - private document(doc: Document) { - if (doc.value) return this.lineEnd(doc) + private *document(doc: Document): Generator { + if (doc.value) return yield* this.lineEnd(doc) switch (this.type) { case 'doc-start': { if (includesNonEmpty(doc.start)) { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } else doc.start.push(this.sourceToken) return } @@ -454,16 +454,16 @@ export class Parser { const bv = this.startBlockValue(doc) if (bv) this.stack.push(bv) else { - this.push({ + yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML document`, source: this.source - }) + } } } - private scalar(scalar: FlowScalar) { + private *scalar(scalar: FlowScalar) { if (this.type === 'map-value-ind') { const prev = getPrevProps(this.peek(2)) const start = getFirstKeyStartProps(prev) @@ -483,10 +483,10 @@ export class Parser { } this.onKeyLine = true this.stack[this.stack.length - 1] = map - } else this.lineEnd(scalar) + } else yield* this.lineEnd(scalar) } - private blockScalar(scalar: BlockScalar) { + private *blockScalar(scalar: BlockScalar) { switch (this.type) { case 'space': case 'comment': @@ -505,16 +505,16 @@ export class Parser { nl = this.source.indexOf('\n', nl) + 1 } } - this.pop() + yield* this.pop() break /* istanbul ignore next should not happen */ default: - this.pop() - this.step() + yield* this.pop() + yield* this.step() } } - private blockMap(map: BlockMap) { + private *blockMap(map: BlockMap) { const it = map.items[map.items.length - 1] // it.sep is true-ish if pair already has key or : separator switch (this.type) { @@ -634,11 +634,11 @@ export class Parser { } } } - this.pop() - this.step() + yield* this.pop() + yield* this.step() } - private blockSequence(seq: BlockSequence) { + private *blockSequence(seq: BlockSequence) { const it = seq.items[seq.items.length - 1] switch (this.type) { case 'newline': @@ -671,18 +671,21 @@ export class Parser { } if (this.indent > seq.indent) { const bv = this.startBlockValue(seq) - if (bv) return this.stack.push(bv) + if (bv) { + this.stack.push(bv) + return + } } - this.pop() - this.step() + yield* this.pop() + yield* this.step() } - private flowCollection(fc: FlowCollection) { + private *flowCollection(fc: FlowCollection) { const it = fc.items[fc.items.length - 1] if (this.type === 'flow-error-end') { let top: Token | undefined do { - this.pop() + yield* this.pop() top = this.peek(1) } while (top && top.type === 'flow-collection') } else if (fc.end.length === 0) { @@ -728,10 +731,10 @@ export class Parser { } const bv = this.startBlockValue(fc) /* istanbul ignore else should not happen */ - if (bv) return this.stack.push(bv) + if (bv) this.stack.push(bv) else { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } } else { const parent = this.peek(2) @@ -741,8 +744,8 @@ export class Parser { (this.type === 'newline' && !parent.items[parent.items.length - 1].sep)) ) { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } else if ( this.type === 'map-value-ind' && parent.type !== 'flow-collection' @@ -761,7 +764,7 @@ export class Parser { this.onKeyLine = true this.stack[this.stack.length - 1] = map } else { - this.lineEnd(fc) + yield* this.lineEnd(fc) } } } @@ -842,15 +845,15 @@ export class Parser { return null } - private documentEnd(docEnd: DocumentEnd) { + private *documentEnd(docEnd: DocumentEnd) { if (this.type !== 'doc-mode') { if (docEnd.end) docEnd.end.push(this.sourceToken) else docEnd.end = [this.sourceToken] - if (this.type === 'newline') this.pop() + if (this.type === 'newline') yield* this.pop() } } - private lineEnd(token: Document | FlowCollection | FlowScalar) { + private *lineEnd(token: Document | FlowCollection | FlowScalar) { switch (this.type) { case 'comma': case 'doc-start': @@ -858,8 +861,8 @@ export class Parser { case 'flow-seq-end': case 'flow-map-end': case 'map-value-ind': - this.pop() - this.step() + yield* this.pop() + yield* this.step() break case 'newline': this.onKeyLine = false @@ -870,7 +873,7 @@ export class Parser { // all other values are errors if (token.end) token.end.push(this.sourceToken) else token.end = [this.sourceToken] - if (this.type === 'newline') this.pop() + if (this.type === 'newline') yield* this.pop() } } } diff --git a/src/public-api.ts b/src/public-api.ts index df2122d0..7d437c71 100644 --- a/src/public-api.ts +++ b/src/public-api.ts @@ -50,8 +50,8 @@ export function parseAllDocuments( doc => docs.push(doc as Document.Parsed), options ) - const parser = new Parser(composer.next, lineCounter?.addNewLine) - parser.parse(source) + const parser = new Parser(lineCounter?.addNewLine) + for (const token of parser.parse(source)) composer.next(token) composer.end() if (prettyErrors && lineCounter) @@ -89,8 +89,8 @@ export function parseDocument( ) } }, options) - const parser = new Parser(composer.next, lineCounter?.addNewLine) - parser.parse(source) + const parser = new Parser(lineCounter?.addNewLine) + for (const token of parser.parse(source)) composer.next(token) composer.end(true, source.length) if (prettyErrors && lineCounter) { diff --git a/tests/stream.ts b/tests/stream.ts index c434e466..8baf7ed6 100644 --- a/tests/stream.ts +++ b/tests/stream.ts @@ -56,12 +56,11 @@ describe('Input in parts', () => { const composer = new Composer(doc => res.push(doc), { logLevel: 'error' }) - const parser = new Parser(composer.next) - + const parser = new Parser() const start = src.substring(0, i) const end = src.substring(i) - parser.parse(start, true) - parser.parse(end, false) + for (const token of parser.parse(start, true)) composer.next(token) + for (const token of parser.parse(end, false)) composer.next(token) composer.end() try { diff --git a/tests/tsconfig.json b/tests/tsconfig.json index c419157b..23d4b4ff 100644 --- a/tests/tsconfig.json +++ b/tests/tsconfig.json @@ -6,8 +6,7 @@ "paths": { "yaml": ["../src/index.ts"] }, - "rootDir": "..", - "target": "ES3" + "rootDir": ".." }, "include": ["**/*.ts"] } From 1755d2fc7fcfe7aae5730d6f2d285ff5b8e7ec72 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Sat, 3 Apr 2021 10:01:19 +0300 Subject: [PATCH 3/3] feat!: Turn the composer into a generator Adds a convenience method `composer.compose(tokens)`. BREAKING CHANGE: The `push` argument of `new Composer()` is dropped, and instead its `next()` and `end()` methods are now generators that allow for the parsed documents to be iterated. --- README.md | 2 +- docs/01_intro.md | 2 +- docs/07_parsing_yaml.md | 22 ++++++++------- src/compose/composer.ts | 59 +++++++++++++++++++---------------------- src/public-api.ts | 30 ++++++++++----------- tests/stream.ts | 13 ++++----- 6 files changed, 62 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index ec0b21e5..370a0000 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ const YAML = require('yaml') - [`new Lexer().lex(src)`](https://eemeli.org/yaml/#lexer) - [`new Parser(onNewLine?).parse(src)`](https://eemeli.org/yaml/#parser) -- [`new Composer(push, options?)`](https://eemeli.org/yaml/#composer) +- [`new Composer(options?).compose(tokens)`](https://eemeli.org/yaml/#composer) ## YAML.parse diff --git a/docs/01_intro.md b/docs/01_intro.md index dfca3804..8da1b94c 100644 --- a/docs/01_intro.md +++ b/docs/01_intro.md @@ -98,4 +98,4 @@ import { Composer, Lexer, Parser } from 'yaml' - [`new Lexer().lex(src)`](#lexer) - [`new Parser(onNewLine?).parse(src)`](#parser) -- [`new Composer(push, options?)`](#composer) +- [`new Composer(options?).compose(tokens)`](#composer) diff --git a/docs/07_parsing_yaml.md b/docs/07_parsing_yaml.md index 0d30d8da..967027a3 100644 --- a/docs/07_parsing_yaml.md +++ b/docs/07_parsing_yaml.md @@ -233,27 +233,29 @@ If `line === 0`, `addNewLine` has never been called or `offset` is before the fi import { Composer, Parser } from 'yaml' const src = 'foo: bar\nfee: [24, "42"]' -const docs = [] -const composer = new Composer(doc => docs.push(doc)) -for (const token of new Parser().parse(src)) composer.next(token) -composer.end() +const tokens = new Parser().parse(src) +const docs = new Composer().compose(tokens) -docs.map(doc => doc.toJS()) +Array.from(docs, doc => doc.toJS()) > [{ foo: 'bar', fee: [24, '42'] }] ``` -#### `new Composer(push: (doc: Document.Parsed) => void, options?: Options)` +#### `new Composer(options?: ParseOptions & DocumentOptions & SchemaOptions)` Create a new Document composer. Does not include an internal Parser instance, so an external one will be needed. -`options` will be used during composition, and passed to the `new Document` constructor; may include any of ParseOptions, DocumentOptions, and SchemaOptions. +`options` will be used during composition, and passed to the `new Document` constructor. -#### `composer.next(token: Token)` +#### `composer.compose(tokens: Iterable, forceDoc?: boolean, endOffset?: number): Generator` + +Compose tokens into documents. +Convenience wrapper combining calls to `composer.next()` and `composer.end()`. + +#### `composer.next(token: Token): Generator` Advance the composed by one CST token. -Bound to the Composer instance, so may be used directly as a callback function. -#### `composer.end(forceDoc?: boolean, offset?: number)` +#### `composer.end(forceDoc?: boolean, offset?: number): Generator` Always call at end of input to push out any remaining document. If `forceDoc` is true and the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. diff --git a/src/compose/composer.ts b/src/compose/composer.ts index 56a033c9..af591754 100644 --- a/src/compose/composer.ts +++ b/src/compose/composer.ts @@ -50,31 +50,26 @@ function parsePrelude(prelude: string[]) { * Compose a stream of CST nodes into a stream of YAML Documents. * * ```ts - * const options = { ... } - * const docs: Document.Parsed[] = [] - * const composer = new Composer(doc => docs.push(doc), options) - * for (const doc of new Parser().parse(source)) composer.next(doc) - * composer.end() + * import { Composer, Parser } from 'yaml' + * + * const src: string = ... + * const tokens = new Parser().parse(src) + * const docs = new Composer().compose(tokens) * ``` */ export class Composer { private directives: Directives private doc: Document.Parsed | null = null - private onDocument: (doc: Document.Parsed) => void private options: ParseOptions & DocumentOptions & SchemaOptions private atDirectives = false private prelude: string[] = [] private errors: YAMLParseError[] = [] private warnings: YAMLWarning[] = [] - constructor( - onDocument: Composer['onDocument'], - options: ParseOptions & DocumentOptions & SchemaOptions = {} - ) { + constructor(options: ParseOptions & DocumentOptions & SchemaOptions = {}) { this.directives = new Directives({ - version: options?.version || defaultOptions.version + version: options.version || defaultOptions.version }) - this.onDocument = onDocument this.options = options } @@ -136,10 +131,18 @@ export class Composer { } /** - * Advance the composed by one CST token. Bound to the Composer - * instance, so may be used directly as a callback function. + * Compose tokens into documents. + * + * @param forceDoc - If the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. + * @param endOffset - Should be set if `forceDoc` is also set, to set the document range end and to indicate errors correctly. */ - next = (token: Token) => { + *compose(tokens: Iterable, forceDoc = false, endOffset = -1) { + for (const token of tokens) yield* this.next(token) + yield* this.end(forceDoc, endOffset) + } + + /** Advance the composer by one CST token. */ + *next(token: Token) { if (process.env.LOG_STREAM) console.dir(token, { depth: null }) switch (token.type) { case 'directive': @@ -157,7 +160,7 @@ export class Composer { this.onError ) this.decorate(doc, false) - if (this.doc) this.onDocument(this.doc) + if (this.doc) yield this.doc this.doc = doc this.atDirectives = false break @@ -211,37 +214,29 @@ export class Composer { } } - /** Call at end of input to push out any remaining document. */ - end(): void - /** - * Call at end of input to push out any remaining document. + * Call at end of input to yield any remaining document. * - * @param forceDoc - If the stream contains no document, still emit a final - * document including any comments and directives that would be applied - * to a subsequent document. - * @param offset - Should be set if `forceDoc` is also set, to set the - * document range end and to indicate errors correctly. + * @param forceDoc - If the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. + * @param endOffset - Should be set if `forceDoc` is also set, to set the document range end and to indicate errors correctly. */ - end(forceDoc: true, offset: number): void - - end(forceDoc = false, offset = -1) { + *end(forceDoc = false, endOffset = -1) { if (this.doc) { this.decorate(this.doc, true) - this.onDocument(this.doc) + yield this.doc this.doc = null } else if (forceDoc) { const opts = Object.assign({ directives: this.directives }, this.options) const doc = new Document(undefined, opts) as Document.Parsed if (this.atDirectives) this.onError( - offset, + endOffset, 'MISSING_CHAR', 'Missing directives-end indicator line' ) - doc.range = [0, offset] + doc.range = [0, endOffset] this.decorate(doc, false) - this.onDocument(doc) + yield doc } } } diff --git a/src/public-api.ts b/src/public-api.ts index 7d437c71..dc5e6991 100644 --- a/src/public-api.ts +++ b/src/public-api.ts @@ -41,18 +41,12 @@ function parseOptions(options: ParseOptions | undefined) { */ export function parseAllDocuments( source: string, - options?: ParseOptions & DocumentOptions & SchemaOptions + options: ParseOptions & DocumentOptions & SchemaOptions = {} ): Document.Parsed[] | EmptyStream { const { lineCounter, prettyErrors } = parseOptions(options) - - const docs: Document.Parsed[] = [] - const composer = new Composer( - doc => docs.push(doc as Document.Parsed), - options - ) const parser = new Parser(lineCounter?.addNewLine) - for (const token of parser.parse(source)) composer.next(token) - composer.end() + const composer = new Composer(options) + const docs = Array.from(composer.compose(parser.parse(source))) if (prettyErrors && lineCounter) for (const doc of docs) { @@ -60,7 +54,7 @@ export function parseAllDocuments( doc.warnings.forEach(prettifyError(source, lineCounter)) } - if (docs.length > 0) return docs + if (docs.length > 0) return docs as Document.Parsed[] return Object.assign< Document.Parsed[], { empty: true }, @@ -71,13 +65,19 @@ export function parseAllDocuments( /** Parse an input string into a single YAML.Document */ export function parseDocument( source: string, - options?: ParseOptions & DocumentOptions & SchemaOptions + options: ParseOptions & DocumentOptions & SchemaOptions = {} ) { const { lineCounter, prettyErrors } = parseOptions(options) + const parser = new Parser(lineCounter?.addNewLine) + const composer = new Composer(options) // `doc` is always set by compose.end(true) at the very latest let doc: Document.Parsed = null as any - const composer = new Composer(_doc => { + for (const _doc of composer.compose( + parser.parse(source), + true, + source.length + )) { if (!doc) doc = _doc as Document.Parsed else if (doc.options.logLevel !== 'silent') { doc.errors.push( @@ -87,11 +87,9 @@ export function parseDocument( 'Source contains multiple documents; please use YAML.parseAllDocuments()' ) ) + break } - }, options) - const parser = new Parser(lineCounter?.addNewLine) - for (const token of parser.parse(source)) composer.next(token) - composer.end(true, source.length) + } if (prettyErrors && lineCounter) { doc.errors.forEach(prettifyError(source, lineCounter)) diff --git a/tests/stream.ts b/tests/stream.ts index 8baf7ed6..493f2326 100644 --- a/tests/stream.ts +++ b/tests/stream.ts @@ -53,15 +53,16 @@ describe('Input in parts', () => { for (let i = 1; i < src.length - 1; ++i) { const res: Document.Parsed[] = [] - const composer = new Composer(doc => res.push(doc), { - logLevel: 'error' - }) + const composer = new Composer({ logLevel: 'error' }) const parser = new Parser() const start = src.substring(0, i) const end = src.substring(i) - for (const token of parser.parse(start, true)) composer.next(token) - for (const token of parser.parse(end, false)) composer.next(token) - composer.end() + for (const token of [ + ...parser.parse(start, true), + ...parser.parse(end, false) + ]) + for (const doc of composer.next(token)) res.push(doc) + for (const doc of composer.end()) res.push(doc) try { expect(res.map(doc => doc.toJS())).toMatchObject(exp)