diff --git a/README.md b/README.md index 49e71d1d..370a0000 100644 --- a/README.md +++ b/README.md @@ -74,9 +74,9 @@ const YAML = require('yaml') ### Parsing YAML -- [`new Lexer(push)`](https://eemeli.org/yaml/#lexer) -- [`new Parser(push, onNewLine?)`](https://eemeli.org/yaml/#parser) -- [`new Composer(push, options?)`](https://eemeli.org/yaml/#composer) +- [`new Lexer().lex(src)`](https://eemeli.org/yaml/#lexer) +- [`new Parser(onNewLine?).parse(src)`](https://eemeli.org/yaml/#parser) +- [`new Composer(options?).compose(tokens)`](https://eemeli.org/yaml/#composer) ## YAML.parse diff --git a/docs/01_intro.md b/docs/01_intro.md index 713b284f..8da1b94c 100644 --- a/docs/01_intro.md +++ b/docs/01_intro.md @@ -96,6 +96,6 @@ import { import { Composer, Lexer, Parser } from 'yaml' ``` -- [`new Lexer(push)`](#lexer) -- [`new Parser(push, onNewLine?)`](#parser) -- [`new Composer(push, options?)`](#composer) +- [`new Lexer().lex(src)`](#lexer) +- [`new Parser(onNewLine?).parse(src)`](#parser) +- [`new Composer(options?).compose(tokens)`](#composer) diff --git a/docs/07_parsing_yaml.md b/docs/07_parsing_yaml.md index 2176eb56..c359e092 100644 --- a/docs/07_parsing_yaml.md +++ b/docs/07_parsing_yaml.md @@ -3,11 +3,11 @@ ```js import { - Lexer, - Parser, Composer, + CST, + Lexer, LineCounter, - tokens + Parser, } from 'yaml' ``` @@ -28,10 +28,8 @@ Both the Lexer and Parser accept incomplete input, allowing for them and the Com ```js import { Lexer } from 'yaml' -const tokens = [] -const lexer = new Lexer(tok => tokens.push(tok)) -lexer.lex('foo: bar\nfee:\n [24,"42"]\n', false) -console.dir(tokens) +const tokens = new Lexer().lex('foo: bar\nfee:\n [24,"42"]\n') +console.dir(Array.from(tokens)) > [ '\x02', '\x1F', 'foo', ':', ' ', '\x1F', 'bar', '\n', @@ -41,12 +39,11 @@ console.dir(tokens) ] ``` -#### `new Lexer(push: (token: string) => void)` +#### `new Lexer()` -#### `lexer.lex(src: string, incomplete: boolean): void` +#### `lexer.lex(src: string, incomplete?: boolean): Generator` The API for the lexer is rather minimal, and offers no configuration. -The constructor accepts a single callback as argument, defining a function that will be called once for each lexical token. If the input stream is chunked, the `lex()` method may be called separately for each chunk if the `incomplete` argument is `true`. At the end of input, `lex()` should be called a final time with `incomplete: false` to ensure that the remaining tokens are emitted. @@ -97,8 +94,8 @@ All remaining tokens are identifiable by their first character: ```js import { Parser } from 'yaml' -const parser = new Parser(tok => console.dir(tok, { depth: null })) -parser.parse('foo: [24,"42"]\n', false) +for (const token of new Parser().parse('foo: [24,"42"]\n')) + console.dir(token, { depth: null }) > { type: 'document', @@ -153,31 +150,28 @@ It should never throw errors, but may (rarely) include error tokens in its outpu To validate a CST, you will need to compose it into a `Document`. If the document contains errors, they will be included in the document's `errors` array, and each error will will contain an `offset` within the source string, which you may then use to find the corresponding node in the CST. -#### `new Parser(push: (token: Token) => void, onNewLine?: (offset: number) => void)` +#### `new Parser(onNewLine?: (offset: number) => void)` Create a new parser. -`push` is called separately with each parsed token. If defined, `onNewLine` is called separately with the start position of each new line (in `parse()`, including the start of input). -#### `parser.parse(source: string, incomplete = false)` +#### `parser.parse(source: string, incomplete = false): Generator` -Parse `source` as a YAML stream, calling `push` with each directive, document and other structure as it is completely parsed. +Parse `source` as a YAML stream, generating tokens for each directive, document and other structure as it is completely parsed. If `incomplete`, a part of the last line may be left as a buffer for the next call. -Errors are not thrown, but pushed out as `{ type: 'error', offset, message }` tokens. +Errors are not thrown, but are yielded as `{ type: 'error', offset, message }` tokens. -#### `parser.next(lexToken: string)` +#### `parser.next(lexToken: string): Generator` Advance the parser by one lexical token. -Bound to the Parser instance, so may be used directly as a callback function. - Used internally by `parser.parse()`; exposed to allow for use with an external lexer. For debug purposes, if the `LOG_TOKENS` env var is true-ish, all lexical tokens will be pretty-printed using `console.log()` as they are being processed. ### CST Nodes -For a complete description of CST node interfaces, please consult the [tokens.ts source](https://github.com/eemeli/yaml/blob/master/src/parse/tokens.ts). +For a complete description of CST node interfaces, please consult the [cst.ts source](https://github.com/eemeli/yaml/blob/master/src/parse/cst.ts). Some of the most common node properties include: @@ -205,8 +199,9 @@ Collection items contain some subset of the following properties: import { LineCounter, Parser } from 'yaml' const lineCounter = new LineCounter() -const parser = new Parser(() => {}, lineCounter.addNewLine)) -parser.parse('foo:\n- 24\n- "42"\n') +const parser = new Parser(lineCounter.addNewLine)) +const tokens = parser.parse('foo:\n- 24\n- "42"\n') +Array.from(tokens) // forces iteration lineCounter.lineStarts > [ 0, 5, 10, 17 ] @@ -236,28 +231,31 @@ If `line === 0`, `addNewLine` has never been called or `offset` is before the fi ```js import { Composer, Parser } from 'yaml' -const docs = [] -const composer = new Composer(doc => docs.push(doc)) -const parser = new Parser(composer.next) -parser.parse('foo: bar\nfee: [24, "42"]') -composer.end() -docs.map(doc => doc.toJS()) +const src = 'foo: bar\nfee: [24, "42"]' +const tokens = new Parser().parse(src) +const docs = new Composer().compose(tokens) + +Array.from(docs, doc => doc.toJS()) > [{ foo: 'bar', fee: [24, '42'] }] ``` -#### `new Composer(push: (doc: Document.Parsed) => void, options?: Options)` +#### `new Composer(options?: ParseOptions & DocumentOptions & SchemaOptions)` Create a new Document composer. Does not include an internal Parser instance, so an external one will be needed. -`options` will be used during composition, and passed to the `new Document` constructor; may include any of ParseOptions, DocumentOptions, and SchemaOptions. +`options` will be used during composition, and passed to the `new Document` constructor. + +#### `composer.compose(tokens: Iterable, forceDoc?: boolean, endOffset?: number): Generator` -#### `composer.next(token: Token)` +Compose tokens into documents. +Convenience wrapper combining calls to `composer.next()` and `composer.end()`. + +#### `composer.next(token: Token): Generator` Advance the composed by one CST token. -Bound to the Composer instance, so may be used directly as a callback function. -#### `composer.end(forceDoc?: boolean, offset?: number)` +#### `composer.end(forceDoc?: boolean, offset?: number): Generator` Always call at end of input to push out any remaining document. If `forceDoc` is true and the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. @@ -267,3 +265,154 @@ If `forceDoc` is true and the stream contains no document, still emit a final do Current stream status information. Mostly useful at the end of input for an empty stream. + +## Working with CST Tokens + +```ts +import { CST } from 'yaml' +``` + +For most use cases, the Document or pure JS interfaces provided by the library are the right tool. +Sometimes, though, it's important to keep the original YAML source in as pristine a condition as possible. +For those cases, the concrete syntax tree (CST) representation is provided, as it retains every character of the input, including whitespace. + +#### `CST.createScalarToken(value: string, context): BlockScalar | FlowScalar` + +Create a new scalar token with the value `value`. +Values that represent an actual string but may be parsed as a different type should use a `type` other than `'PLAIN'`, +as this function does not support any schema operations and won't check for such conflicts. + +| Argument | Type | Default | Description | +| ------------------- | --------------- | ------- | ----------------------------------------------------------------------------------------------------------------------------- | +| value | `string` | | The string representation of the value, which will have its content properly indented. **Required.** | +| context.end | `SourceToken[]` | | Comments and whitespace after the end of the value, or after the block scalar header. If undefined, a newline will be added. | +| context.implicitKey | `boolean` | `false` | Being within an implicit key may affect the resolved type of the token's value. | +| context.indent | `number` | | The indent level of the token. **Required.** | +| context.inFlow | `boolean` | `false` | Is this scalar within a flow collection? This may affect the resolved type of the token's value. | +| context.offset | `number` | `-1` | The offset position of the token. | +| context.type | `Scalar.Type` | | The preferred type of the scalar token. If undefined, the previous type of the `token` will be used, defaulting to `'PLAIN'`. | + + +```js +const [doc] = new Parser().parse('foo: "bar" #comment') +const item = doc.value.items[0].value +> { + type: 'double-quoted-scalar', + offset: 5, + indent: 0, + source: '"bar"', + end: [ + { type: 'space', offset: 10, indent: 0, source: ' ' }, + { type: 'comment', offset: 11, indent: 0, source: '#comment' } + ] + } + +YAML.resolveAsScalar(item) +> { value: 'bar', type: 'QUOTE_DOUBLE', comment: 'comment', length: 14 } +``` + +#### `CST.isCollection(token?: Token): boolean` + +#### `CST.isScalar(token?: Token): boolean` + +Custom type guards for detecting CST collections and scalars, in both their block and flow forms. + +#### `CST.resolveAsScalar(token?: Token, strict = true, onError?: ComposeErrorHandler)` + +If `token` is a CST flow or block scalar, determine its string value and a few other attributes. +Otherwise, return `null`. + +#### `CST.setScalarValue(token: Token, value: string, context?)` + +Set the value of `token` to the given string `value`, overwriting any previous contents and type that it may have. + +Best efforts are made to retain any comments previously associated with the `token`, though all contents within a collection's `items` will be overwritten. + +Values that represent an actual string but may be parsed as a different type should use a `type` other than `'PLAIN'`, as this function does not support any schema operations and won't check for such conflicts. + +| Argument | Type | Default | Description | +| ------------------- | ------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------- | +| token | `Token` | | Any token. If it does not include an `indent` value, the value will be stringified as if it were an implicit key. **Required.** | +| value | `string` | | The string representation of the value, which will have its content properly indented. **Required.** | +| context.afterKey | `boolean` | `false` | In most cases, values after a key should have an additional level of indentation. | +| context.implicitKey | `boolean` | `false` | Being within an implicit key may affect the resolved type of the token's value. | +| context.inFlow | `boolean` | `false` | Being within a flow collection may affect the resolved type of the token's value. | +| context.type | `Scalar.Type` | | The preferred type of the scalar token. If undefined, the previous type of the `token` will be used, defaulting to `'PLAIN'`. | + +```ts +function findScalarAtOffset( + cst: CST.Document, + offset: number +): CST.FlowScalar | CST.BlockScalar | undefined { + let res: CST.FlowScalar | CST.BlockScalar | undefined = undefined + CST.visit(cst, ({ key, value }) => { + for (const token of [key, value]) + if (CST.isScalar(token)) { + if (token.offset > offset) return CST.visit.BREAK + if ( + token.offset == offset || + (token.offset < offset && token.offset + token.source.length > offset) + ) { + res = token + return CST.visit.BREAK + } + } + }) + return res +} +``` + +#### `CST.stringify(cst: Token | CollectionItem): string` + +Stringify a CST document, token, or collection item. +Fair warning: This applies no validation whatsoever, and simply concatenates the sources in their logical order. + +#### `CST.visit(cst: CST.Document | CST.CollectionItem, visitor: CSTVisitor)` + +Apply a visitor to a CST document or item. +Effectively, the general-purpose workhorse of navigating the CST. + +Walks through the tree (depth-first) starting from `cst` as the root, calling a `visitor` function with two arguments when entering each item: + +- `item`: The current item, which includes the following members: + - `start: SourceToken[]` – Source tokens before the key or value, possibly including its anchor or tag. + - `key?: Token | null` – Set for pair values. May then be `null`, if the key before the `:` separator is empty. + - `sep?: SourceToken[]` – Source tokens between the key and the value, which should include the `:` map value indicator if `value` is set. + - `value?: Token` – The value of a sequence item, or of a map pair. +- `path`: The steps from the root to the current node, as an array of `['key' | 'value', number]` tuples. + +The return value of the visitor may be used to control the traversal: + +- `undefined` (default): Do nothing and continue +- `CST.visit.SKIP`: Do not visit the children of this token, continue with next sibling +- `CST.visit.BREAK`: Terminate traversal completely +- `CST.visit.REMOVE`: Remove the current item, then continue with the next one +- `number`: Set the index of the next step. This is useful especially if the index of the current token has changed. +- `function`: Define the next visitor for this item. After the original visitor is called on item entry, next visitors are called after handling a non-empty `key` and when exiting the item. + + +```js +const [doc] = new Parser().parse('[ foo, bar, baz ]') +CST.visit(doc, (item, path) => { + if (!CST.isScalar(item.value)) return + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'bar') { + const parent = CST.visit.parentCollection(doc, path) + const idx = path[path.length - 1][1] + const { indent } = item.value + parent.items.splice(idx, 0, { + start: item.start.slice(), + value: CST.createScalarToken('bing', { end: [], indent }) + }) + return idx + 2 + } +}) + +CST.stringify(doc) +> '[ foo, bing, bar, baz ]' +``` + +A couple of utility functions are provided for working with the `path`: + +- `CST.visit.itemAtPath(cst, path): CST.CollectionItem | undefined` – Find the item at `path` from `cst` as the root. +- `CST.visit.parentCollection(cst, path): CST.BlockMap | CST.BlockSequence | CST.FlowCollection` – Get the immediate parent collection of the item at `path` from `cst` as the root. Throws an error if the collection is not found, which should never happen if the item itself exists. diff --git a/src/compose/compose-collection.ts b/src/compose/compose-collection.ts index 11bd0fbc..53d70682 100644 --- a/src/compose/compose-collection.ts +++ b/src/compose/compose-collection.ts @@ -2,11 +2,7 @@ import { isMap, isNode, ParsedNode } from '../nodes/Node.js' import { Scalar } from '../nodes/Scalar.js' import type { YAMLMap } from '../nodes/YAMLMap.js' import type { YAMLSeq } from '../nodes/YAMLSeq.js' -import type { - BlockMap, - BlockSequence, - FlowCollection -} from '../parse/tokens.js' +import type { BlockMap, BlockSequence, FlowCollection } from '../parse/cst.js' import { CollectionTag } from '../schema/types.js' import type { ComposeContext, ComposeNode } from './compose-node.js' import type { ComposeErrorHandler } from './composer.js' diff --git a/src/compose/compose-doc.ts b/src/compose/compose-doc.ts index 4bd5bb6c..026e7bd7 100644 --- a/src/compose/compose-doc.ts +++ b/src/compose/compose-doc.ts @@ -5,7 +5,7 @@ import type { ParseOptions, SchemaOptions } from '../options.js' -import type * as Tokens from '../parse/tokens.js' +import type * as CST from '../parse/cst.js' import { ComposeContext, composeEmptyNode, @@ -18,7 +18,7 @@ import { resolveProps } from './resolve-props.js' export function composeDoc( options: ParseOptions & DocumentOptions & SchemaOptions, directives: Directives, - { offset, start, value, end }: Tokens.Document, + { offset, start, value, end }: CST.Document, onError: ComposeErrorHandler ) { const opts = Object.assign({ directives }, options) diff --git a/src/compose/compose-node.ts b/src/compose/compose-node.ts index 355b8c2b..24651d87 100644 --- a/src/compose/compose-node.ts +++ b/src/compose/compose-node.ts @@ -2,7 +2,7 @@ import type { Directives } from '../doc/directives.js' import { Alias } from '../nodes/Alias.js' import type { ParsedNode } from '../nodes/Node.js' import type { ParseOptions } from '../options.js' -import type { FlowScalar, Token } from '../parse/tokens.js' +import type { FlowScalar, Token } from '../parse/cst.js' import type { Schema } from '../schema/Schema.js' import { composeCollection } from './compose-collection.js' import { composeScalar } from './compose-scalar.js' diff --git a/src/compose/compose-scalar.ts b/src/compose/compose-scalar.ts index 63e67325..dca620ac 100644 --- a/src/compose/compose-scalar.ts +++ b/src/compose/compose-scalar.ts @@ -1,6 +1,6 @@ import { isScalar, SCALAR } from '../nodes/Node.js' import { Scalar } from '../nodes/Scalar.js' -import type { BlockScalar, FlowScalar } from '../parse/tokens.js' +import type { BlockScalar, FlowScalar } from '../parse/cst.js' import type { Schema } from '../schema/Schema.js' import type { ScalarTag } from '../schema/types.js' import type { ComposeContext } from './compose-node.js' diff --git a/src/compose/composer.ts b/src/compose/composer.ts index 2a1013d5..95488f7e 100644 --- a/src/compose/composer.ts +++ b/src/compose/composer.ts @@ -8,7 +8,7 @@ import { ParseOptions, SchemaOptions } from '../options.js' -import type { Token } from '../parse/tokens.js' +import type { Token } from '../parse/cst.js' import { composeDoc } from './compose-doc.js' import { resolveEnd } from './resolve-end.js' @@ -50,32 +50,26 @@ function parsePrelude(prelude: string[]) { * Compose a stream of CST nodes into a stream of YAML Documents. * * ```ts - * const options = { ... } - * const docs: Document.Parsed[] = [] - * const composer = new Composer(doc => docs.push(doc), options) - * const parser = new Parser(composer.next) - * parser.parse(source) - * composer.end() + * import { Composer, Parser } from 'yaml' + * + * const src: string = ... + * const tokens = new Parser().parse(src) + * const docs = new Composer().compose(tokens) * ``` */ export class Composer { private directives: Directives private doc: Document.Parsed | null = null - private onDocument: (doc: Document.Parsed) => void private options: ParseOptions & DocumentOptions & SchemaOptions private atDirectives = false private prelude: string[] = [] private errors: YAMLParseError[] = [] private warnings: YAMLWarning[] = [] - constructor( - onDocument: Composer['onDocument'], - options: ParseOptions & DocumentOptions & SchemaOptions = {} - ) { + constructor(options: ParseOptions & DocumentOptions & SchemaOptions = {}) { this.directives = new Directives({ - version: options?.version || defaultOptions.version + version: options.version || defaultOptions.version }) - this.onDocument = onDocument this.options = options } @@ -137,10 +131,18 @@ export class Composer { } /** - * Advance the composed by one CST token. Bound to the Composer - * instance, so may be used directly as a callback function. + * Compose tokens into documents. + * + * @param forceDoc - If the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. + * @param endOffset - Should be set if `forceDoc` is also set, to set the document range end and to indicate errors correctly. */ - next = (token: Token) => { + *compose(tokens: Iterable, forceDoc = false, endOffset = -1) { + for (const token of tokens) yield* this.next(token) + yield* this.end(forceDoc, endOffset) + } + + /** Advance the composer by one CST token. */ + *next(token: Token) { if (process.env.LOG_STREAM) console.dir(token, { depth: null }) switch (token.type) { case 'directive': @@ -158,7 +160,7 @@ export class Composer { this.onError ) this.decorate(doc, false) - if (this.doc) this.onDocument(this.doc) + if (this.doc) yield this.doc this.doc = doc this.atDirectives = false break @@ -212,37 +214,29 @@ export class Composer { } } - /** Call at end of input to push out any remaining document. */ - end(): void - /** - * Call at end of input to push out any remaining document. + * Call at end of input to yield any remaining document. * - * @param forceDoc - If the stream contains no document, still emit a final - * document including any comments and directives that would be applied - * to a subsequent document. - * @param offset - Should be set if `forceDoc` is also set, to set the - * document range end and to indicate errors correctly. + * @param forceDoc - If the stream contains no document, still emit a final document including any comments and directives that would be applied to a subsequent document. + * @param endOffset - Should be set if `forceDoc` is also set, to set the document range end and to indicate errors correctly. */ - end(forceDoc: true, offset: number): void - - end(forceDoc = false, offset = -1) { + *end(forceDoc = false, endOffset = -1) { if (this.doc) { this.decorate(this.doc, true) - this.onDocument(this.doc) + yield this.doc this.doc = null } else if (forceDoc) { const opts = Object.assign({ directives: this.directives }, this.options) const doc = new Document(undefined, opts) as Document.Parsed if (this.atDirectives) this.onError( - offset, + endOffset, 'MISSING_CHAR', 'Missing directives-end indicator line' ) - doc.range = [0, offset] + doc.range = [0, endOffset] this.decorate(doc, false) - this.onDocument(doc) + yield doc } } } diff --git a/src/compose/resolve-block-map.ts b/src/compose/resolve-block-map.ts index af18380e..83fe741c 100644 --- a/src/compose/resolve-block-map.ts +++ b/src/compose/resolve-block-map.ts @@ -1,6 +1,6 @@ import { Pair } from '../nodes/Pair.js' import { YAMLMap } from '../nodes/YAMLMap.js' -import type { BlockMap } from '../parse/tokens.js' +import type { BlockMap } from '../parse/cst.js' import type { ComposeContext, ComposeNode } from './compose-node.js' import type { ComposeErrorHandler } from './composer.js' import { resolveProps } from './resolve-props.js' diff --git a/src/compose/resolve-block-scalar.ts b/src/compose/resolve-block-scalar.ts index bcb78308..b84fc3ef 100644 --- a/src/compose/resolve-block-scalar.ts +++ b/src/compose/resolve-block-scalar.ts @@ -1,5 +1,5 @@ import { Scalar } from '../nodes/Scalar.js' -import type { BlockScalar } from '../parse/tokens.js' +import type { BlockScalar } from '../parse/cst.js' import type { ComposeErrorHandler } from './composer.js' export function resolveBlockScalar( diff --git a/src/compose/resolve-block-seq.ts b/src/compose/resolve-block-seq.ts index 57053c41..a6211df9 100644 --- a/src/compose/resolve-block-seq.ts +++ b/src/compose/resolve-block-seq.ts @@ -1,5 +1,5 @@ import { YAMLSeq } from '../nodes/YAMLSeq.js' -import type { BlockSequence } from '../parse/tokens.js' +import type { BlockSequence } from '../parse/cst.js' import type { ComposeContext, ComposeNode } from './compose-node.js' import type { ComposeErrorHandler } from './composer.js' import { resolveProps } from './resolve-props.js' diff --git a/src/compose/resolve-end.ts b/src/compose/resolve-end.ts index 065cfa87..e7fb5fcc 100644 --- a/src/compose/resolve-end.ts +++ b/src/compose/resolve-end.ts @@ -1,4 +1,4 @@ -import type { SourceToken } from '../parse/tokens.js' +import type { SourceToken } from '../parse/cst.js' import type { ComposeErrorHandler } from './composer.js' export function resolveEnd( diff --git a/src/compose/resolve-flow-collection.ts b/src/compose/resolve-flow-collection.ts index aadaa299..5e236072 100644 --- a/src/compose/resolve-flow-collection.ts +++ b/src/compose/resolve-flow-collection.ts @@ -2,7 +2,7 @@ import { isPair } from '../nodes/Node.js' import { Pair } from '../nodes/Pair.js' import { YAMLMap } from '../nodes/YAMLMap.js' import { YAMLSeq } from '../nodes/YAMLSeq.js' -import type { FlowCollection } from '../parse/tokens.js' +import type { FlowCollection } from '../parse/cst.js' import type { ComposeContext, ComposeNode } from './compose-node.js' import type { ComposeErrorHandler } from './composer.js' import { resolveEnd } from './resolve-end.js' diff --git a/src/compose/resolve-flow-scalar.ts b/src/compose/resolve-flow-scalar.ts index 9486f482..d31e6c6a 100644 --- a/src/compose/resolve-flow-scalar.ts +++ b/src/compose/resolve-flow-scalar.ts @@ -1,5 +1,5 @@ import { Scalar } from '../nodes/Scalar.js' -import type { FlowScalar } from '../parse/tokens.js' +import type { FlowScalar } from '../parse/cst.js' import type { ComposeErrorHandler } from './composer.js' import { resolveEnd } from './resolve-end.js' diff --git a/src/compose/resolve-props.ts b/src/compose/resolve-props.ts index 419c27b4..b289b0a2 100644 --- a/src/compose/resolve-props.ts +++ b/src/compose/resolve-props.ts @@ -1,4 +1,4 @@ -import type { SourceToken } from '../parse/tokens.js' +import type { SourceToken } from '../parse/cst.js' import type { ComposeContext } from './compose-node.js' import type { ComposeErrorHandler } from './composer.js' diff --git a/src/compose/util-contains-newline.ts b/src/compose/util-contains-newline.ts index 32cd8d3d..a34e3066 100644 --- a/src/compose/util-contains-newline.ts +++ b/src/compose/util-contains-newline.ts @@ -1,4 +1,4 @@ -import type { Token } from '../parse/tokens.js' +import type { Token } from '../parse/cst.js' export function containsNewline(key: Token | null | undefined) { if (!key) return null diff --git a/src/compose/util-empty-scalar-position.ts b/src/compose/util-empty-scalar-position.ts index 6b1e46f9..51180717 100644 --- a/src/compose/util-empty-scalar-position.ts +++ b/src/compose/util-empty-scalar-position.ts @@ -1,4 +1,4 @@ -import type { Token } from '../parse/tokens.js' +import type { Token } from '../parse/cst.js' export function emptyScalarPosition( offset: number, diff --git a/src/index.ts b/src/index.ts index dbb9c043..50bd1b14 100644 --- a/src/index.ts +++ b/src/index.ts @@ -33,10 +33,10 @@ export { ToStringOptions } from './options.js' +export * as CST from './parse/cst.js' export { Lexer } from './parse/lexer.js' export { LineCounter } from './parse/line-counter.js' export { Parser } from './parse/parser.js' -export * as tokens from './parse/tokens.js' export { EmptyStream, diff --git a/src/parse/cst-scalar.ts b/src/parse/cst-scalar.ts new file mode 100644 index 00000000..4fcdd4c1 --- /dev/null +++ b/src/parse/cst-scalar.ts @@ -0,0 +1,264 @@ +import type { ComposeErrorHandler } from '../compose/composer.js' +import { resolveBlockScalar } from '../compose/resolve-block-scalar.js' +import { resolveFlowScalar } from '../compose/resolve-flow-scalar.js' +import { YAMLParseError } from '../errors.js' +import type { Scalar } from '../nodes/Scalar.js' +import type { StringifyContext } from '../stringify/stringify.js' +import { stringifyString } from '../stringify/stringifyString.js' +import type { BlockScalar, FlowScalar, SourceToken, Token } from './cst.js' + +/** + * If `token` is a CST flow or block scalar, determine its string value and a few other attributes. + * Otherwise, return `null`. + */ +export function resolveAsScalar( + token: Token | null | undefined, + strict = true, + onError?: ComposeErrorHandler +): { + value: string + type: Scalar.Type | null + comment: string + length: number +} | null { + if (token) { + if (!onError) + onError = (offset, code, message) => { + throw new YAMLParseError(offset, code, message) + } + switch (token.type) { + case 'scalar': + case 'single-quoted-scalar': + case 'double-quoted-scalar': + return resolveFlowScalar(token, strict, onError) + case 'block-scalar': + return resolveBlockScalar(token, strict, onError) + } + } + return null +} + +/** + * Create a new scalar token with `value` + * + * Values that represent an actual string but may be parsed as a different type should use a `type` other than `'PLAIN'`, + * as this function does not support any schema operations and won't check for such conflicts. + * + * @param value The string representation of the value, which will have its content properly indented. + * @param context.end Comments and whitespace after the end of the value, or after the block scalar header. If undefined, a newline will be added. + * @param context.implicitKey Being within an implicit key may affect the resolved type of the token's value. + * @param context.indent The indent level of the token. + * @param context.inFlow Is this scalar within a flow collection? This may affect the resolved type of the token's value. + * @param context.offset The offset position of the token. + * @param context.type The preferred type of the scalar token. If undefined, the previous type of the `token` will be used, defaulting to `'PLAIN'`. + */ +export function createScalarToken( + value: string, + context: { + end?: SourceToken[] + implicitKey?: boolean + indent: number + inFlow?: boolean + offset?: number + type?: Scalar.Type + } +): BlockScalar | FlowScalar { + const { + implicitKey = false, + indent, + inFlow = false, + offset = -1, + type = 'PLAIN' + } = context + const source = stringifyString( + { type, value } as Scalar, + { + implicitKey, + indent: indent > 0 ? ' '.repeat(indent) : '', + inFlow, + options: { lineWidth: -1 } + } as StringifyContext + ) + const end = context.end ?? [{ type: 'newline', offset: -1, indent, source: '\n' }] + switch (source[0]) { + case '|': + case '>': { + const he = source.indexOf('\n') + const head = source.substring(0, he) + const body = source.substring(he + 1) + '\n' + const props: Token[] = [ + { type: 'block-scalar-header', offset, indent, source: head } + ] + if (!addEndtoBlockProps(props, end)) + props.push({ type: 'newline', offset: -1, indent, source: '\n' }) + return { type: 'block-scalar', offset, indent, props, source: body } + } + case '"': + return { type: 'double-quoted-scalar', offset, indent, source, end } + case "'": + return { type: 'single-quoted-scalar', offset, indent, source, end } + default: + return { type: 'scalar', offset, indent, source, end } + } +} + +/** + * Set the value of `token` to the given string `value`, overwriting any previous contents and type that it may have. + * + * Best efforts are made to retain any comments previously associated with the `token`, + * though all contents within a collection's `items` will be overwritten. + * + * Values that represent an actual string but may be parsed as a different type should use a `type` other than `'PLAIN'`, + * as this function does not support any schema operations and won't check for such conflicts. + * + * @param token Any token. If it does not include an `indent` value, the value will be stringified as if it were an implicit key. + * @param value The string representation of the value, which will have its content properly indented. + * @param context.afterKey In most cases, values after a key should have an additional level of indentation. + * @param context.implicitKey Being within an implicit key may affect the resolved type of the token's value. + * @param context.inFlow Being within a flow collection may affect the resolved type of the token's value. + * @param context.type The preferred type of the scalar token. If undefined, the previous type of the `token` will be used, defaulting to `'PLAIN'`. + */ +export function setScalarValue( + token: Token, + value: string, + context: { + afterKey?: boolean + implicitKey?: boolean + inFlow?: boolean + type?: Scalar.Type + } = {} +) { + let { afterKey = false, implicitKey = false, inFlow = false, type } = context + let indent = 'indent' in token ? token.indent : null + if (afterKey && typeof indent === 'number') indent += 2 + if (!type) + switch (token.type) { + case 'single-quoted-scalar': + type = 'QUOTE_SINGLE' + break + case 'double-quoted-scalar': + type = 'QUOTE_DOUBLE' + break + case 'block-scalar': { + const header = token.props[0] + if (header.type !== 'block-scalar-header') + throw new Error('Invalid block scalar header') + type = header.source[0] === '>' ? 'BLOCK_FOLDED' : 'BLOCK_LITERAL' + break + } + default: + type = 'PLAIN' + } + const source = stringifyString( + { type, value } as Scalar, + { + implicitKey: implicitKey || indent === null, + indent: indent !== null && indent > 0 ? ' '.repeat(indent) : '', + inFlow, + options: { lineWidth: -1 } + } as StringifyContext + ) + switch (source[0]) { + case '|': + case '>': + setBlockScalarValue(token, source) + break + case '"': + setFlowScalarValue(token, source, 'double-quoted-scalar') + break + case "'": + setFlowScalarValue(token, source, 'single-quoted-scalar') + break + default: + setFlowScalarValue(token, source, 'scalar') + } +} + +function setBlockScalarValue(token: Token, source: string) { + const he = source.indexOf('\n') + const head = source.substring(0, he) + const body = source.substring(he + 1) + '\n' + if (token.type === 'block-scalar') { + const header = token.props[0] + if (header.type !== 'block-scalar-header') + throw new Error('Invalid block scalar header') + header.source = head + token.source = body + } else { + let offset = token.offset + const indent = 'indent' in token ? token.indent : -1 + const props: Token[] = [ + { type: 'block-scalar-header', offset, indent, source: head } + ] + if (!addEndtoBlockProps(props, 'end' in token ? token.end : undefined)) + props.push({ type: 'newline', offset: -1, indent, source: '\n' }) + + for (const key of Object.keys(token)) + if (key !== 'type' && key !== 'offset') delete (token as any)[key] + Object.assign(token, { type: 'block-scalar', indent, props, source: body }) + } +} + +/** @returns `true` if last token is a newline */ +function addEndtoBlockProps(props: Token[], end?: SourceToken[]) { + if (end) + for (const st of end) + switch (st.type) { + case 'space': + case 'comment': + props.push(st) + break + case 'newline': + props.push(st) + return true + } + return false +} + +function setFlowScalarValue( + token: Token, + source: string, + type: 'scalar' | 'double-quoted-scalar' | 'single-quoted-scalar' +) { + switch (token.type) { + case 'scalar': + case 'double-quoted-scalar': + case 'single-quoted-scalar': + token.type = type + token.source = source + break + case 'block-scalar': { + const end = token.props.slice(1) + let oa = source.length + if (token.props[0].type === 'block-scalar-header') + oa -= token.props[0].source.length + for (const tok of end) tok.offset += oa + delete (token as any).props + Object.assign(token, { type, source, end }) + break + } + case 'block-map': + case 'block-seq': { + const offset = token.offset + source.length + const nl = { type: 'newline', offset, indent: token.indent, source: '\n' } + delete (token as any).items + Object.assign(token, { type, source, end: [nl] }) + break + } + default: { + const indent = 'indent' in token ? token.indent : -1 + const end = + 'end' in token && Array.isArray(token.end) + ? token.end.filter( + st => + st.type === 'space' || + st.type === 'comment' || + st.type === 'newline' + ) + : [] + for (const key of Object.keys(token)) + if (key !== 'type' && key !== 'offset') delete (token as any)[key] + Object.assign(token, { type, indent, source, end }) + } + } +} diff --git a/src/parse/cst-stringify.ts b/src/parse/cst-stringify.ts new file mode 100644 index 00000000..511c082c --- /dev/null +++ b/src/parse/cst-stringify.ts @@ -0,0 +1,52 @@ +import type { CollectionItem, Token } from './cst.js' + +/** + * Stringify a CST document, token, or collection item + * + * Fair warning: This applies no validation whatsoever, and + * simply concatenates the sources in their logical order. + */ +export const stringify = (cst: Token | CollectionItem) => + 'type' in cst ? stringifyToken(cst) : stringifyItem(cst) + +function stringifyToken(token: Token) { + switch (token.type) { + case 'block-scalar': { + let res = '' + for (const tok of token.props) res += stringifyToken(tok) + return res + token.source + } + case 'block-map': + case 'block-seq': { + let res = '' + for (const item of token.items) res += stringifyItem(item) + return res + } + case 'flow-collection': { + let res = token.start.source + for (const item of token.items) res += stringifyItem(item) + for (const st of token.end) res += st.source + return res + } + case 'document': { + let res = stringifyItem(token) + if (token.end) for (const st of token.end) res += st.source + return res + } + default: { + let res = token.source + if ('end' in token && token.end) + for (const st of token.end) res += st.source + return res + } + } +} + +function stringifyItem({ start, key, sep, value }: CollectionItem) { + let res = '' + for (const st of start) res += st.source + if (key) res += stringifyToken(key) + if (sep) for (const st of sep) res += st.source + if (value) res += stringifyToken(value) + return res +} diff --git a/src/parse/cst-visit.ts b/src/parse/cst-visit.ts new file mode 100644 index 00000000..b178bd19 --- /dev/null +++ b/src/parse/cst-visit.ts @@ -0,0 +1,113 @@ +import type { CollectionItem, Document } from './cst.js' + +const BREAK = Symbol('break visit') +const SKIP = Symbol('skip children') +const REMOVE = Symbol('remove item') + +export type VisitPath = readonly ['key' | 'value', number][] + +export type Visitor = ( + item: CollectionItem, + path: VisitPath +) => number | symbol | Visitor | void + +/** + * Apply a visitor to a CST document or item. + * + * Walks through the tree (depth-first) starting from the root, calling a + * `visitor` function with two arguments when entering each item: + * - `item`: The current item, which included the following members: + * - `start: SourceToken[]` – Source tokens before the key or value, + * possibly including its anchor or tag. + * - `key?: Token | null` – Set for pair values. May then be `null`, if + * the key before the `:` separator is empty. + * - `sep?: SourceToken[]` – Source tokens between the key and the value, + * which should include the `:` map value indicator if `value` is set. + * - `value?: Token` – The value of a sequence item, or of a map pair. + * - `path`: The steps from the root to the current node, as an array of + * `['key' | 'value', number]` tuples. + * + * The return value of the visitor may be used to control the traversal: + * - `undefined` (default): Do nothing and continue + * - `visit.SKIP`: Do not visit the children of this token, continue with + * next sibling + * - `visit.BREAK`: Terminate traversal completely + * - `visit.REMOVE`: Remove the current item, then continue with the next one + * - `number`: Set the index of the next step. This is useful especially if + * the index of the current token has changed. + * - `function`: Define the next visitor for this item. After the original + * visitor is called on item entry, next visitors are called after handling + * a non-empty `key` and when exiting the item. + */ +export function visit(cst: Document | CollectionItem, visitor: Visitor) { + if ('type' in cst && cst.type === 'document') + cst = { start: cst.start, value: cst.value } + _visit(Object.freeze([]), cst, visitor) +} + +// Without the `as symbol` casts, TS declares these in the `visit` +// namespace using `var`, but then complains about that because +// `unique symbol` must be `const`. + +/** Terminate visit traversal completely */ +visit.BREAK = BREAK as symbol + +/** Do not visit the children of the current item */ +visit.SKIP = SKIP as symbol + +/** Remove the current item */ +visit.REMOVE = REMOVE as symbol + +/** Find the item at `path` from `cst` as the root */ +visit.itemAtPath = (cst: Document | CollectionItem, path: VisitPath) => { + let item: CollectionItem = cst + for (const [field, index] of path) { + const tok = item && item[field] + if (tok && 'items' in tok) { + item = tok.items[index] + } else return undefined + } + return item +} + +/** + * Get the immediate parent collection of the item at `path` from `cst` as the root. + * + * Throws an error if the collection is not found, which should never happen if the item itself exists. + */ +visit.parentCollection = (cst: Document | CollectionItem, path: VisitPath) => { + const parent = visit.itemAtPath(cst, path.slice(0, -1)) + const field = path[path.length - 1][0] + const coll = parent && parent[field] + if (coll && 'items' in coll) return coll + throw new Error('Parent collection not found') +} + +function _visit( + path: VisitPath, + item: CollectionItem, + visitor: Visitor +): number | symbol | Visitor | void { + let ctrl = visitor(item, path) + if (typeof ctrl === 'symbol') return ctrl + for (const field of ['key', 'value'] as const) { + const token = item[field] + if (token && 'items' in token) { + for (let i = 0; i < token.items.length; ++i) { + const ci = _visit( + Object.freeze(path.concat([[field, i]])), + token.items[i], + visitor + ) + if (typeof ci === 'number') i = ci - 1 + else if (ci === BREAK) return BREAK + else if (ci === REMOVE) { + token.items.splice(i, 1) + i -= 1 + } + } + if (typeof ctrl === 'function' && field === 'key') ctrl = ctrl(item, path) + } + } + return typeof ctrl === 'function' ? ctrl(item, path) : ctrl +} diff --git a/src/parse/tokens.ts b/src/parse/cst.ts similarity index 81% rename from src/parse/tokens.ts rename to src/parse/cst.ts index 8ee19939..1d5c97e2 100644 --- a/src/parse/tokens.ts +++ b/src/parse/cst.ts @@ -1,3 +1,11 @@ +export { + createScalarToken, + resolveAsScalar, + setScalarValue +} from './cst-scalar.js' +export { stringify } from './cst-stringify.js' +export { visit, Visitor, VisitPath } from './cst-visit.js' + export interface SourceToken { type: | 'byte-order-mark' @@ -65,7 +73,7 @@ export interface BlockScalar { offset: number indent: number props: Token[] - source?: string + source: string } export interface BlockMap { @@ -95,17 +103,19 @@ export interface BlockSequence { }> } +export type CollectionItem = { + start: SourceToken[] + key?: Token | null + sep?: SourceToken[] + value?: Token +} + export interface FlowCollection { type: 'flow-collection' offset: number indent: number start: SourceToken - items: Array<{ - start: SourceToken[] - key?: Token | null - sep?: SourceToken[] - value?: Token - }> + items: CollectionItem[] end: SourceToken[] } @@ -138,6 +148,22 @@ export const FLOW_END = '\x18' // C0: Cancel /** Next token is a scalar value */ export const SCALAR = '\x1f' // C0: Unit Separator +/** @returns `true` if `token` is a flow or block collection */ +export const isCollection = ( + token: Token | null | undefined +): token is BlockMap | BlockSequence | FlowCollection => + !!token && 'items' in token + +/** @returns `true` if `token` is a flow or block scalar; not an alias */ +export const isScalar = ( + token: Token | null | undefined +): token is FlowScalar | BlockScalar => + !!token && + (token.type === 'scalar' || + token.type === 'single-quoted-scalar' || + token.type === 'double-quoted-scalar' || + token.type === 'block-scalar') + /* istanbul ignore next */ /** Get a printable representation of a lexer token */ export function prettyToken(token: string) { diff --git a/src/parse/lexer.ts b/src/parse/lexer.ts index 22b72ccc..61f88547 100644 --- a/src/parse/lexer.ts +++ b/src/parse/lexer.ts @@ -66,7 +66,7 @@ plain-scalar(is-flow, min) [else] -> plain-scalar(min) */ -import { BOM, DOCUMENT, FLOW_END, SCALAR } from './tokens.js' +import { BOM, DOCUMENT, FLOW_END, SCALAR } from './cst.js' type State = | 'stream' @@ -112,8 +112,6 @@ const isNotIdentifierChar = (ch: string) => * - `\u{FEFF}` (Byte order mark): Emitted separately outside documents */ export class Lexer { - private push: (token: string) => void - /** * Flag indicating whether the end of the current buffer marks the end of * all input @@ -159,30 +157,20 @@ export class Lexer { private next: State | null = null /** A pointer to `buffer`; the current position of the lexer. */ - private pos = 0 + private pos = 0; /** - * Define/initialise a YAML lexer. `push` will be called separately with each - * token when `lex()` is passed an input string. + * Generate YAML tokens from the `source` string. If `incomplete`, + * a part of the last line may be left as a buffer for the next call. * - * @public + * @returns A generator of lexical tokens */ - constructor(push: (token: string) => void) { - this.push = push - } - - /** - * Read YAML tokens from the `source` string, calling the callback - * defined in the constructor for each one. If `incomplete`, a part - * of the last line may be left as a buffer for the next call. - * - * @public - */ - lex(source: string, incomplete: boolean) { + *lex(source: string, incomplete = false) { if (source) this.buffer = this.buffer ? this.buffer + source : source this.atEnd = !incomplete let next: State | null = this.next || 'stream' - while (next && (incomplete || this.hasChars(1))) next = this.parseNext(next) + while (next && (incomplete || this.hasChars(1))) + next = yield* this.parseNext(next) } private atLineEnd() { @@ -241,32 +229,32 @@ export class Lexer { return this.buffer.substr(this.pos, n) } - private parseNext(next: State) { + private *parseNext(next: State) { switch (next) { case 'stream': - return this.parseStream() + return yield* this.parseStream() case 'line-start': - return this.parseLineStart() + return yield* this.parseLineStart() case 'block-start': - return this.parseBlockStart() + return yield* this.parseBlockStart() case 'doc': - return this.parseDocument() + return yield* this.parseDocument() case 'flow': - return this.parseFlowCollection() + return yield* this.parseFlowCollection() case 'quoted-scalar': - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case 'block-scalar': - return this.parseBlockScalar() + return yield* this.parseBlockScalar() case 'plain-scalar': - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseStream() { + private *parseStream() { let line = this.getLine() if (line === null) return this.setNext('stream') if (line[0] === BOM) { - this.pushCount(1) + yield* this.pushCount(1) line = line.substring(1) } if (line[0] === '%') { @@ -281,102 +269,102 @@ export class Lexer { if (ch === ' ' || ch === '\t') dirEnd -= 1 else break } - const n = this.pushCount(dirEnd) + this.pushSpaces(true) - this.pushCount(line.length - n) // possible comment + const n = (yield* this.pushCount(dirEnd)) + (yield* this.pushSpaces(true)) + yield* this.pushCount(line.length - n) // possible comment this.pushNewline() return 'stream' } if (this.atLineEnd()) { - const sp = this.pushSpaces(true) - this.pushCount(line.length - sp) - this.pushNewline() + const sp = yield* this.pushSpaces(true) + yield* this.pushCount(line.length - sp) + yield* this.pushNewline() return 'stream' } - this.push(DOCUMENT) - return this.parseLineStart() + yield DOCUMENT + return yield* this.parseLineStart() } - private parseLineStart() { + private *parseLineStart() { const ch = this.charAt(0) if (!ch && !this.atEnd) return this.setNext('line-start') if (ch === '-' || ch === '.') { if (!this.atEnd && !this.hasChars(4)) return this.setNext('line-start') const s = this.peek(3) if (s === '---' && isEmpty(this.charAt(3))) { - this.pushCount(3) + yield* this.pushCount(3) this.indentValue = 0 this.indentNext = 0 return 'doc' } else if (s === '...' && isEmpty(this.charAt(3))) { - this.pushCount(3) + yield* this.pushCount(3) return 'stream' } } - this.indentValue = this.pushSpaces(false) + this.indentValue = yield* this.pushSpaces(false) if (this.indentNext > this.indentValue && !isEmpty(this.charAt(1))) this.indentNext = this.indentValue - return this.parseBlockStart() + return yield* this.parseBlockStart() } - private parseBlockStart(): 'doc' | null { + private *parseBlockStart(): Generator { const [ch0, ch1] = this.peek(2) if (!ch1 && !this.atEnd) return this.setNext('block-start') if ((ch0 === '-' || ch0 === '?' || ch0 === ':') && isEmpty(ch1)) { - const n = this.pushCount(1) + this.pushSpaces(true) + const n = (yield* this.pushCount(1)) + (yield* this.pushSpaces(true)) this.indentNext = this.indentValue + 1 this.indentValue += n - return this.parseBlockStart() + return yield* this.parseBlockStart() } return 'doc' } - private parseDocument() { - this.pushSpaces(true) + private *parseDocument() { + yield* this.pushSpaces(true) const line = this.getLine() if (line === null) return this.setNext('doc') - let n = this.pushIndicators() + let n = yield* this.pushIndicators() switch (line[n]) { case '#': - this.pushCount(line.length - n) + yield* this.pushCount(line.length - n) // fallthrough case undefined: - this.pushNewline() - return this.parseLineStart() + yield* this.pushNewline() + return yield* this.parseLineStart() case '{': case '[': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = false this.flowLevel = 1 return 'flow' case '}': case ']': // this is an error - this.pushCount(1) + yield* this.pushCount(1) return 'doc' case '*': - this.pushUntil(isNotIdentifierChar) + yield* this.pushUntil(isNotIdentifierChar) return 'doc' case '"': case "'": - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case '|': case '>': - n += this.parseBlockScalarHeader() - n += this.pushSpaces(true) - this.pushCount(line.length - n) - this.pushNewline() - return this.parseBlockScalar() + n += yield* this.parseBlockScalarHeader() + n += yield* this.pushSpaces(true) + yield* this.pushCount(line.length - n) + yield* this.pushNewline() + return yield* this.parseBlockScalar() default: - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseFlowCollection() { + private *parseFlowCollection() { let nl: number, sp: number let indent = -1 do { - nl = this.pushNewline() - sp = this.pushSpaces(true) + nl = yield* this.pushNewline() + sp = yield* this.pushSpaces(true) if (nl > 0) this.indentValue = indent = sp } while (nl + sp > 0) const line = this.getLine() @@ -397,54 +385,55 @@ export class Lexer { if (!atFlowEndMarker) { // this is an error this.flowLevel = 0 - this.push(FLOW_END) - return this.parseLineStart() + yield FLOW_END + return yield* this.parseLineStart() } } let n = 0 - while (line[n] === ',') n += this.pushCount(1) + this.pushSpaces(true) - n += this.pushIndicators() + while (line[n] === ',') + n += (yield* this.pushCount(1)) + (yield* this.pushSpaces(true)) + n += yield* this.pushIndicators() switch (line[n]) { case undefined: return 'flow' case '#': - this.pushCount(line.length - n) + yield* this.pushCount(line.length - n) return 'flow' case '{': case '[': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = false this.flowLevel += 1 return 'flow' case '}': case ']': - this.pushCount(1) + yield* this.pushCount(1) this.flowKey = true this.flowLevel -= 1 return this.flowLevel ? 'flow' : 'doc' case '*': - this.pushUntil(isNotIdentifierChar) + yield* this.pushUntil(isNotIdentifierChar) return 'flow' case '"': case "'": this.flowKey = true - return this.parseQuotedScalar() + return yield* this.parseQuotedScalar() case ':': { const next = this.charAt(1) if (this.flowKey || isEmpty(next) || next === ',') { - this.pushCount(1) - this.pushSpaces(true) + yield* this.pushCount(1) + yield* this.pushSpaces(true) return 'flow' } } // fallthrough default: this.flowKey = false - return this.parsePlainScalar() + return yield* this.parsePlainScalar() } } - private parseQuotedScalar() { + private *parseQuotedScalar() { const quote = this.charAt(0) let end = this.buffer.indexOf(quote, this.pos + 1) if (quote === "'") { @@ -475,11 +464,11 @@ export class Lexer { if (!this.atEnd) return this.setNext('quoted-scalar') end = this.buffer.length } - this.pushToIndex(end + 1, false) + yield* this.pushToIndex(end + 1, false) return this.flowLevel ? 'flow' : 'doc' } - private parseBlockScalarHeader() { + private *parseBlockScalarHeader() { this.blockScalarIndent = -1 this.blockScalarKeep = false let i = this.pos @@ -489,10 +478,10 @@ export class Lexer { else if (ch > '0' && ch <= '9') this.blockScalarIndent = Number(ch) - 1 else if (ch !== '-') break } - return this.pushUntil(ch => isEmpty(ch) || ch === '#') + return yield* this.pushUntil(ch => isEmpty(ch) || ch === '#') } - private parseBlockScalar() { + private *parseBlockScalar() { let nl = this.pos - 1 // may be -1 if this.pos === 0 let indent = 0 let ch: string @@ -538,12 +527,12 @@ export class Lexer { else break } while (true) } - this.push(SCALAR) - this.pushToIndex(nl + 1, true) - return this.parseLineStart() + yield SCALAR + yield* this.pushToIndex(nl + 1, true) + return yield* this.parseLineStart() } - private parsePlainScalar() { + private *parsePlainScalar() { const inFlow = this.flowLevel > 0 let end = this.pos - 1 let i = this.pos - 1 @@ -574,45 +563,45 @@ export class Lexer { } } if (!ch && !this.atEnd) return this.setNext('plain-scalar') - this.push(SCALAR) - this.pushToIndex(end + 1, true) + yield SCALAR + yield* this.pushToIndex(end + 1, true) return inFlow ? 'flow' : 'doc' } - private pushCount(n: number) { + private *pushCount(n: number) { if (n > 0) { - this.push(this.buffer.substr(this.pos, n)) + yield this.buffer.substr(this.pos, n) this.pos += n return n } return 0 } - private pushToIndex(i: number, allowEmpty: boolean) { + private *pushToIndex(i: number, allowEmpty: boolean) { const s = this.buffer.slice(this.pos, i) if (s) { - this.push(s) + yield s this.pos += s.length return s.length - } else if (allowEmpty) this.push('') + } else if (allowEmpty) yield '' return 0 } - private pushIndicators(): number { + private *pushIndicators(): Generator { switch (this.charAt(0)) { case '!': if (this.charAt(1) === '<') return ( - this.pushVerbatimTag() + - this.pushSpaces(true) + - this.pushIndicators() + (yield* this.pushVerbatimTag()) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) // fallthrough case '&': return ( - this.pushUntil(isNotIdentifierChar) + - this.pushSpaces(true) + - this.pushIndicators() + (yield* this.pushUntil(isNotIdentifierChar)) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) case ':': case '?': // this is an error outside flow collections @@ -620,28 +609,31 @@ export class Lexer { if (isEmpty(this.charAt(1))) { if (this.flowLevel === 0) this.indentNext = this.indentValue + 1 return ( - this.pushCount(1) + this.pushSpaces(true) + this.pushIndicators() + (yield* this.pushCount(1)) + + (yield* this.pushSpaces(true)) + + (yield* this.pushIndicators()) ) } } return 0 } - private pushVerbatimTag() { + private *pushVerbatimTag() { let i = this.pos + 2 let ch = this.buffer[i] while (!isEmpty(ch) && ch !== '>') ch = this.buffer[++i] - return this.pushToIndex(ch === '>' ? i + 1 : i, false) + return yield* this.pushToIndex(ch === '>' ? i + 1 : i, false) } - private pushNewline() { + private *pushNewline() { const ch = this.buffer[this.pos] - if (ch === '\n') return this.pushCount(1) - else if (ch === '\r' && this.charAt(1) === '\n') return this.pushCount(2) + if (ch === '\n') return yield* this.pushCount(1) + else if (ch === '\r' && this.charAt(1) === '\n') + return yield* this.pushCount(2) else return 0 } - private pushSpaces(allowTabs: boolean) { + private *pushSpaces(allowTabs: boolean) { let i = this.pos - 1 let ch: string do { @@ -649,16 +641,16 @@ export class Lexer { } while (ch === ' ' || (allowTabs && ch === '\t')) const n = i - this.pos if (n > 0) { - this.push(this.buffer.substr(this.pos, n)) + yield this.buffer.substr(this.pos, n) this.pos = i } return n } - private pushUntil(test: (ch: string) => boolean) { + private *pushUntil(test: (ch: string) => boolean) { let i = this.pos let ch = this.buffer[i] while (!test(ch)) ch = this.buffer[++i] - return this.pushToIndex(i, false) + return yield* this.pushToIndex(i, false) } } diff --git a/src/parse/parser.ts b/src/parse/parser.ts index b74220e3..1517ea2a 100644 --- a/src/parse/parser.ts +++ b/src/parse/parser.ts @@ -1,4 +1,3 @@ -import { Lexer } from './lexer.js' import { SourceToken, Token, @@ -12,7 +11,8 @@ import { prettyToken, tokenType, TokenType -} from './tokens.js' +} from './cst.js' +import { Lexer } from './lexer.js' function includesToken(list: SourceToken[], type: SourceToken['type']) { for (let i = 0; i < list.length; ++i) if (list[i].type === type) return true @@ -140,22 +140,31 @@ function fixFlowSeqItems(fc: FlowCollection) { /** * A YAML concrete syntax tree (CST) parser * - * While the `parse()` method provides an API for parsing a source string - * directly, the parser may also be used with a user-provided lexer: - * * ```ts - * const cst: Token[] = [] - * const parser = new Parser(tok => cst.push(tok)) * const src: string = ... + * for (const token of new Parser().parse(src)) { + * // token: Token + * } + * ``` * - * // The following would be equivalent to `parser.parse(src, false)` - * const lexer = new Lexer(parser.next) - * lexer.lex(src, false) - * parser.end() + * To use the parser with a user-provided lexer: + * + * ```ts + * function* parse(source: string, lexer: Lexer) { + * const parser = new Parser() + * for (const lexeme of lexer.lex(source)) + * yield* parser.next(lexeme) + * yield* parser.end() + * } + * + * const src: string = ... + * const lexer = new Lexer() + * for (const token of parse(src, lexer)) { + * // token: Token + * } * ``` */ export class Parser { - private push: (token: Token) => void private onNewLine?: (offset: number) => void /** If true, space and sequence indicators count as indentation */ @@ -183,44 +192,38 @@ export class Parser { private type = '' as TokenType /** - * @param push - Called separately with each parsed token * @param onNewLine - If defined, called separately with the start position of * each new line (in `parse()`, including the start of input). - * @public */ - constructor( - push: (token: Token) => void, - onNewLine?: (offset: number) => void - ) { - this.push = push + constructor(onNewLine?: (offset: number) => void) { this.onNewLine = onNewLine } /** - * Parse `source` as a YAML stream, calling `push` with each directive, - * document and other structure as it is completely parsed. If `incomplete`, - * a part of the last line may be left as a buffer for the next call. + * Parse `source` as a YAML stream. + * If `incomplete`, a part of the last line may be left as a buffer for the next call. + * + * Errors are not thrown, but yielded as `{ type: 'error', message }` tokens. * - * Errors are not thrown, but pushed out as `{ type: 'error', message }` tokens. - * @public + * @returns A generator of tokens representing each directive, document, and other structure. */ - parse(source: string, incomplete = false) { + *parse(source: string, incomplete = false) { if (this.onNewLine && this.offset === 0) this.onNewLine(0) - this.lexer.lex(source, incomplete) - if (!incomplete) this.end() + for (const lexeme of this.lexer.lex(source, incomplete)) + yield* this.next(lexeme) + if (!incomplete) yield* this.end() } /** - * Advance the parser by the `source` of one lexical token. Bound to the - * Parser instance, so may be used directly as a callback function. + * Advance the parser by the `source` of one lexical token. */ - next = (source: string) => { + *next(source: string) { this.source = source if (process.env.LOG_TOKENS) console.log('|', prettyToken(source)) if (this.atScalar) { this.atScalar = false - this.step() + yield* this.step() this.offset += source.length return } @@ -228,7 +231,7 @@ export class Parser { const type = tokenType(source) if (!type) { const message = `Not a YAML token: ${source}` - this.pop({ type: 'error', offset: this.offset, message, source }) + yield* this.pop({ type: 'error', offset: this.offset, message, source }) this.offset += source.length } else if (type === 'scalar') { this.atNewLine = false @@ -236,7 +239,7 @@ export class Parser { this.type = 'scalar' } else { this.type = type - this.step() + yield* this.step() switch (type) { case 'newline': this.atNewLine = true @@ -261,11 +264,11 @@ export class Parser { } // Must be defined after `next()` - private lexer = new Lexer(this.next) + private lexer = new Lexer(); /** Call at end of input to push out any remaining constructions */ - end() { - while (this.stack.length > 0) this.pop() + *end() { + while (this.stack.length > 0) yield* this.pop() } private get sourceToken() { @@ -278,10 +281,10 @@ export class Parser { return st } - private step() { + private *step(): Generator { const top = this.peek(1) if (this.type === 'doc-end' && (!top || top.type !== 'doc-end')) { - while (this.stack.length > 0) this.pop() + while (this.stack.length > 0) yield* this.pop() this.stack.push({ type: 'doc-end', offset: this.offset, @@ -289,42 +292,42 @@ export class Parser { }) return } - if (!top) return this.stream() + if (!top) return yield* this.stream() switch (top.type) { case 'document': - return this.document(top) + return yield* this.document(top) case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': - return this.scalar(top) + return yield* this.scalar(top) case 'block-scalar': - return this.blockScalar(top) + return yield* this.blockScalar(top) case 'block-map': - return this.blockMap(top) + return yield* this.blockMap(top) case 'block-seq': - return this.blockSequence(top) + return yield* this.blockSequence(top) case 'flow-collection': - return this.flowCollection(top) + return yield* this.flowCollection(top) case 'doc-end': - return this.documentEnd(top) + return yield* this.documentEnd(top) } /* istanbul ignore next should not happen */ - this.pop() + yield* this.pop() } private peek(n: number) { return this.stack[this.stack.length - n] } - private pop(error?: Token) { + private *pop(error?: Token): Generator { const token = error || this.stack.pop() /* istanbul ignore if should not happen */ if (!token) { const message = 'Tried to pop an empty stack' - this.push({ type: 'error', offset: this.offset, source: '', message }) + yield { type: 'error', offset: this.offset, source: '', message } } else if (this.stack.length === 0) { - this.push(token) + yield token } else { const top = this.peek(1) // For these, parent indent is needed instead of own @@ -369,8 +372,8 @@ export class Parser { } /* istanbul ignore next should not happen */ default: - this.pop() - this.pop(token) + yield* this.pop() + yield* this.pop(token) } if ( @@ -399,20 +402,16 @@ export class Parser { } } - private stream() { + private *stream(): Generator { switch (this.type) { case 'directive-line': - this.push({ - type: 'directive', - offset: this.offset, - source: this.source - }) + yield { type: 'directive', offset: this.offset, source: this.source } return case 'byte-order-mark': case 'space': case 'comment': case 'newline': - this.push(this.sourceToken) + yield this.sourceToken return case 'doc-mode': case 'doc-start': { @@ -426,21 +425,21 @@ export class Parser { return } } - this.push({ + yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML stream`, source: this.source - }) + } } - private document(doc: Document) { - if (doc.value) return this.lineEnd(doc) + private *document(doc: Document): Generator { + if (doc.value) return yield* this.lineEnd(doc) switch (this.type) { case 'doc-start': { if (includesNonEmpty(doc.start)) { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } else doc.start.push(this.sourceToken) return } @@ -455,16 +454,16 @@ export class Parser { const bv = this.startBlockValue(doc) if (bv) this.stack.push(bv) else { - this.push({ + yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML document`, source: this.source - }) + } } } - private scalar(scalar: FlowScalar) { + private *scalar(scalar: FlowScalar) { if (this.type === 'map-value-ind') { const prev = getPrevProps(this.peek(2)) const start = getFirstKeyStartProps(prev) @@ -484,10 +483,10 @@ export class Parser { } this.onKeyLine = true this.stack[this.stack.length - 1] = map - } else this.lineEnd(scalar) + } else yield* this.lineEnd(scalar) } - private blockScalar(scalar: BlockScalar) { + private *blockScalar(scalar: BlockScalar) { switch (this.type) { case 'space': case 'comment': @@ -506,16 +505,16 @@ export class Parser { nl = this.source.indexOf('\n', nl) + 1 } } - this.pop() + yield* this.pop() break /* istanbul ignore next should not happen */ default: - this.pop() - this.step() + yield* this.pop() + yield* this.step() } } - private blockMap(map: BlockMap) { + private *blockMap(map: BlockMap) { const it = map.items[map.items.length - 1] // it.sep is true-ish if pair already has key or : separator switch (this.type) { @@ -635,11 +634,11 @@ export class Parser { } } } - this.pop() - this.step() + yield* this.pop() + yield* this.step() } - private blockSequence(seq: BlockSequence) { + private *blockSequence(seq: BlockSequence) { const it = seq.items[seq.items.length - 1] switch (this.type) { case 'newline': @@ -672,18 +671,21 @@ export class Parser { } if (this.indent > seq.indent) { const bv = this.startBlockValue(seq) - if (bv) return this.stack.push(bv) + if (bv) { + this.stack.push(bv) + return + } } - this.pop() - this.step() + yield* this.pop() + yield* this.step() } - private flowCollection(fc: FlowCollection) { + private *flowCollection(fc: FlowCollection) { const it = fc.items[fc.items.length - 1] if (this.type === 'flow-error-end') { let top: Token | undefined do { - this.pop() + yield* this.pop() top = this.peek(1) } while (top && top.type === 'flow-collection') } else if (fc.end.length === 0) { @@ -729,10 +731,10 @@ export class Parser { } const bv = this.startBlockValue(fc) /* istanbul ignore else should not happen */ - if (bv) return this.stack.push(bv) + if (bv) this.stack.push(bv) else { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } } else { const parent = this.peek(2) @@ -742,8 +744,8 @@ export class Parser { (this.type === 'newline' && !parent.items[parent.items.length - 1].sep)) ) { - this.pop() - this.step() + yield* this.pop() + yield* this.step() } else if ( this.type === 'map-value-ind' && parent.type !== 'flow-collection' @@ -762,7 +764,7 @@ export class Parser { this.onKeyLine = true this.stack[this.stack.length - 1] = map } else { - this.lineEnd(fc) + yield* this.lineEnd(fc) } } } @@ -797,7 +799,8 @@ export class Parser { type: 'block-scalar', offset: this.offset, indent: this.indent, - props: [this.sourceToken] + props: [this.sourceToken], + source: '' } as BlockScalar case 'flow-map-start': case 'flow-seq-start': @@ -843,15 +846,15 @@ export class Parser { return null } - private documentEnd(docEnd: DocumentEnd) { + private *documentEnd(docEnd: DocumentEnd) { if (this.type !== 'doc-mode') { if (docEnd.end) docEnd.end.push(this.sourceToken) else docEnd.end = [this.sourceToken] - if (this.type === 'newline') this.pop() + if (this.type === 'newline') yield* this.pop() } } - private lineEnd(token: Document | FlowCollection | FlowScalar) { + private *lineEnd(token: Document | FlowCollection | FlowScalar) { switch (this.type) { case 'comma': case 'doc-start': @@ -859,8 +862,8 @@ export class Parser { case 'flow-seq-end': case 'flow-map-end': case 'map-value-ind': - this.pop() - this.step() + yield* this.pop() + yield* this.step() break case 'newline': this.onKeyLine = false @@ -871,7 +874,7 @@ export class Parser { // all other values are errors if (token.end) token.end.push(this.sourceToken) else token.end = [this.sourceToken] - if (this.type === 'newline') this.pop() + if (this.type === 'newline') yield* this.pop() } } } diff --git a/src/public-api.ts b/src/public-api.ts index df2122d0..dc5e6991 100644 --- a/src/public-api.ts +++ b/src/public-api.ts @@ -41,18 +41,12 @@ function parseOptions(options: ParseOptions | undefined) { */ export function parseAllDocuments( source: string, - options?: ParseOptions & DocumentOptions & SchemaOptions + options: ParseOptions & DocumentOptions & SchemaOptions = {} ): Document.Parsed[] | EmptyStream { const { lineCounter, prettyErrors } = parseOptions(options) - - const docs: Document.Parsed[] = [] - const composer = new Composer( - doc => docs.push(doc as Document.Parsed), - options - ) - const parser = new Parser(composer.next, lineCounter?.addNewLine) - parser.parse(source) - composer.end() + const parser = new Parser(lineCounter?.addNewLine) + const composer = new Composer(options) + const docs = Array.from(composer.compose(parser.parse(source))) if (prettyErrors && lineCounter) for (const doc of docs) { @@ -60,7 +54,7 @@ export function parseAllDocuments( doc.warnings.forEach(prettifyError(source, lineCounter)) } - if (docs.length > 0) return docs + if (docs.length > 0) return docs as Document.Parsed[] return Object.assign< Document.Parsed[], { empty: true }, @@ -71,13 +65,19 @@ export function parseAllDocuments( /** Parse an input string into a single YAML.Document */ export function parseDocument( source: string, - options?: ParseOptions & DocumentOptions & SchemaOptions + options: ParseOptions & DocumentOptions & SchemaOptions = {} ) { const { lineCounter, prettyErrors } = parseOptions(options) + const parser = new Parser(lineCounter?.addNewLine) + const composer = new Composer(options) // `doc` is always set by compose.end(true) at the very latest let doc: Document.Parsed = null as any - const composer = new Composer(_doc => { + for (const _doc of composer.compose( + parser.parse(source), + true, + source.length + )) { if (!doc) doc = _doc as Document.Parsed else if (doc.options.logLevel !== 'silent') { doc.errors.push( @@ -87,11 +87,9 @@ export function parseDocument( 'Source contains multiple documents; please use YAML.parseAllDocuments()' ) ) + break } - }, options) - const parser = new Parser(composer.next, lineCounter?.addNewLine) - parser.parse(source) - composer.end(true, source.length) + } if (prettyErrors && lineCounter) { doc.errors.forEach(prettifyError(source, lineCounter)) diff --git a/tests/cst.ts b/tests/cst.ts new file mode 100644 index 00000000..7100e8f6 --- /dev/null +++ b/tests/cst.ts @@ -0,0 +1,197 @@ +import { CST, Parser } from 'yaml' +import { source } from './_utils' + +function cstDoc(src: string) { + const tokens = Array.from(new Parser().parse(src)) + expect(tokens).toHaveLength(1) + expect(tokens[0].type).toBe('document') + return tokens[0] as CST.Document +} + +describe('CST.visit', () => { + test('Visit paths in order', () => { + const doc = cstDoc(source` + foo: + - baz + - [ bar: 42 ] + ? { fuzz, ball } + : fii + `) + const paths: CST.VisitPath[] = [] + CST.visit(doc, (_item, path) => { + paths.push(path) + }) + expect(paths).toMatchObject([ + [], + [['value', 0]], + [ + ['value', 0], + ['value', 0] + ], + [ + ['value', 0], + ['value', 1] + ], + [ + ['value', 0], + ['value', 1], + ['value', 0] + ], + [['value', 1]], + [ + ['value', 1], + ['key', 0] + ], + [ + ['value', 1], + ['key', 1] + ] + ]) + }) + + test('break visit', () => { + const doc = cstDoc(source` + - " foo" + - "foo" + - "bar" + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'foo') return CST.visit.BREAK + }) + expect(visits).toBe(3) + }) + + test('remove item', () => { + const doc = cstDoc(source` + - " foo" + - "foo" + - "bar" + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'foo') return CST.visit.REMOVE + }) + expect(visits).toBe(4) + expect(CST.stringify(doc)).toBe(source` + - " foo" + - "bar" + `) + }) + + test('replace value with block scalar in seq', () => { + const doc = cstDoc(source` + - " foo" + - "foo" + - "bar" + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + if (item.value) { + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'foo') + CST.setScalarValue(item.value, 'foo', { type: 'BLOCK_LITERAL' }) + } + }) + expect(visits).toBe(4) + expect(CST.stringify(doc)).toBe(source` + - " foo" + - |- + foo + - "bar" + `) + }) + + test('add item', () => { + const doc = cstDoc(source` + - " foo" + - "foo" + - "bar" + `) + let visits = 0 + CST.visit(doc, (item, path) => { + visits += 1 + if (CST.isScalar(item.value)) { + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'foo') { + const parent = CST.visit.parentCollection(doc, path) + const idx = path[path.length - 1][1] + const { indent } = item.value + parent.items.splice(idx, 0, { + start: item.start.slice(), + value: CST.createScalarToken('hip', { indent }) + }) + return idx + 2 + } + } + }) + expect(visits).toBe(4) + expect(CST.stringify(doc)).toBe(source` + - " foo" + - hip + - "foo" + - "bar" + `) + }) + + test('replace value with flow scalar in map', () => { + const doc = cstDoc(source` + - a: A + b: B\t#comment + c: C + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + if (item.value) { + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'B') { + CST.setScalarValue(item.value, 'foo\n\nbar', { afterKey: !!item.key }) + } + } + }) + expect(visits).toBe(5) + expect(CST.stringify(doc)).toBe(source` + - a: A + b: foo + + + bar\t#comment + c: C + `) + }) + + test('skip children', () => { + const doc = cstDoc(source` + - " foo" + - [ foo, 13, 42 ] + - "bar" + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + if (item.value?.type === 'flow-collection') return CST.visit.SKIP + }) + expect(visits).toBe(4) + }) + + test('set next index', () => { + const doc = cstDoc(source` + - "foo" + - [ foo, 13, 42 ] + - "bar" + `) + let visits = 0 + CST.visit(doc, item => { + visits += 1 + const scalar = CST.resolveAsScalar(item.value) + if (scalar?.value === 'foo') return 2 + }) + expect(visits).toBe(3) + }) +}) diff --git a/tests/stream.ts b/tests/stream.ts index c434e466..493f2326 100644 --- a/tests/stream.ts +++ b/tests/stream.ts @@ -53,16 +53,16 @@ describe('Input in parts', () => { for (let i = 1; i < src.length - 1; ++i) { const res: Document.Parsed[] = [] - const composer = new Composer(doc => res.push(doc), { - logLevel: 'error' - }) - const parser = new Parser(composer.next) - + const composer = new Composer({ logLevel: 'error' }) + const parser = new Parser() const start = src.substring(0, i) const end = src.substring(i) - parser.parse(start, true) - parser.parse(end, false) - composer.end() + for (const token of [ + ...parser.parse(start, true), + ...parser.parse(end, false) + ]) + for (const doc of composer.next(token)) res.push(doc) + for (const doc of composer.end()) res.push(doc) try { expect(res.map(doc => doc.toJS())).toMatchObject(exp) diff --git a/tests/tsconfig.json b/tests/tsconfig.json index c419157b..23d4b4ff 100644 --- a/tests/tsconfig.json +++ b/tests/tsconfig.json @@ -6,8 +6,7 @@ "paths": { "yaml": ["../src/index.ts"] }, - "rootDir": "..", - "target": "ES3" + "rootDir": ".." }, "include": ["**/*.ts"] } diff --git a/tests/yaml-test-suite.js b/tests/yaml-test-suite.js index dfe23edb..c12549a9 100644 --- a/tests/yaml-test-suite.js +++ b/tests/yaml-test-suite.js @@ -1,7 +1,7 @@ import fs from 'fs' import path from 'path' -import * as YAML from 'yaml' +import { CST, parseAllDocuments, Parser } from 'yaml' import { testEvents } from 'yaml/test-events' const skip = { @@ -65,7 +65,13 @@ testDirs.forEach(dir => { } describe(`${dir}: ${name}`, () => { - const docs = YAML.parseAllDocuments(yaml, { resolveKnownTags: false }) + test('cst stringify', () => { + let res = '' + for (const tok of new Parser().parse(yaml)) res += CST.stringify(tok) + expect(res).toBe(yaml) + }) + + const docs = parseAllDocuments(yaml, { resolveKnownTags: false }) if (events) { _test('test.event', () => { const res = testEvents(yaml) @@ -87,15 +93,15 @@ testDirs.forEach(dir => { if (!error) { const src2 = docs.map(doc => String(doc).replace(/\n$/, '')).join('\n...\n') + '\n' - const docs2 = YAML.parseAllDocuments(src2, { resolveKnownTags: false }) + const docs2 = parseAllDocuments(src2, { resolveKnownTags: false }) if (json) _test('stringfy+re-parse', () => matchJson(docs2, json)) if (outYaml) { _test('out.yaml', () => { - const resDocs = YAML.parseAllDocuments(yaml) + const resDocs = parseAllDocuments(yaml) const resJson = resDocs.map(doc => doc.toJS({ mapAsMap: true })) - const expDocs = YAML.parseAllDocuments(outYaml) + const expDocs = parseAllDocuments(outYaml) const expJson = expDocs.map(doc => doc.toJS({ mapAsMap: true })) expect(resJson).toMatchObject(expJson) })