From 1da4dac935ad604fb37ec93f16a453d73d9c5955 Mon Sep 17 00:00:00 2001 From: Eemeli Aro Date: Fri, 2 Apr 2021 20:26:43 +0300 Subject: [PATCH] feat(cst): Add visitor & stringifier + utilities Adds: - `CST.isCollection(token)` - `CST.isScalar(token)` - `CST.stringify(cst)` - `CST.visit(cst, visitor)` --- src/parse/cst-stringify.ts | 52 +++++++++++++++++++++++ src/parse/cst-visit.ts | 86 ++++++++++++++++++++++++++++++++++++++ src/parse/cst.ts | 19 +++++++++ tests/yaml-test-suite.js | 16 ++++--- 4 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 src/parse/cst-stringify.ts create mode 100644 src/parse/cst-visit.ts diff --git a/src/parse/cst-stringify.ts b/src/parse/cst-stringify.ts new file mode 100644 index 00000000..511c082c --- /dev/null +++ b/src/parse/cst-stringify.ts @@ -0,0 +1,52 @@ +import type { CollectionItem, Token } from './cst.js' + +/** + * Stringify a CST document, token, or collection item + * + * Fair warning: This applies no validation whatsoever, and + * simply concatenates the sources in their logical order. + */ +export const stringify = (cst: Token | CollectionItem) => + 'type' in cst ? stringifyToken(cst) : stringifyItem(cst) + +function stringifyToken(token: Token) { + switch (token.type) { + case 'block-scalar': { + let res = '' + for (const tok of token.props) res += stringifyToken(tok) + return res + token.source + } + case 'block-map': + case 'block-seq': { + let res = '' + for (const item of token.items) res += stringifyItem(item) + return res + } + case 'flow-collection': { + let res = token.start.source + for (const item of token.items) res += stringifyItem(item) + for (const st of token.end) res += st.source + return res + } + case 'document': { + let res = stringifyItem(token) + if (token.end) for (const st of token.end) res += st.source + return res + } + default: { + let res = token.source + if ('end' in token && token.end) + for (const st of token.end) res += st.source + return res + } + } +} + +function stringifyItem({ start, key, sep, value }: CollectionItem) { + let res = '' + for (const st of start) res += st.source + if (key) res += stringifyToken(key) + if (sep) for (const st of sep) res += st.source + if (value) res += stringifyToken(value) + return res +} diff --git a/src/parse/cst-visit.ts b/src/parse/cst-visit.ts new file mode 100644 index 00000000..63f7b249 --- /dev/null +++ b/src/parse/cst-visit.ts @@ -0,0 +1,86 @@ +import type { CollectionItem, Document } from './cst.js' + +const BREAK = Symbol('break visit') +const SKIP = Symbol('skip children') +const REMOVE = Symbol('remove item') + +export type Visitor = ( + item: CollectionItem, + path: readonly ['key' | 'value', number][] +) => number | symbol | Visitor | void + +/** + * Apply a visitor to a CST document or item. + * + * Walks through the tree (depth-first) starting from the root, calling a + * `visitor` function with two arguments when entering each item: + * - `item`: The current item, which included the following members: + * - `start: SourceToken[]` – Source tokens before the key or value, + * possibly including its anchor or tag. + * - `key?: Token | null` – Set for pair values. May then be `null`, if + * the key before the `:` separator is empty. + * - `sep?: SourceToken[]` – Source tokens between the key and the value, + * which should include the `:` map value indicator if `value` is set. + * - `value?: Token` – The value of a sequence item, or of a map pair. + * - `path`: The steps from the root to the current node, as an array of + * `['key' | 'value', number]` tuples. + * + * The return value of the visitor may be used to control the traversal: + * - `undefined` (default): Do nothing and continue + * - `visit.SKIP`: Do not visit the children of this token, continue with + * next sibling + * - `visit.BREAK`: Terminate traversal completely + * - `visit.REMOVE`: Remove the current item, then continue with the next one + * - `number`: Set the index of the next step. This is useful especially if + * the index of the current token has changed. + * - `function`: Define the next visitor for this item. After the original + * visitor is called on item entry, next visitors are called after handling + * a non-empty `key` and when exiting the item. + */ +export function visit(cst: Document | CollectionItem, visitor: Visitor) { + if ('type' in cst && cst.type === 'document') + cst = { start: cst.start, value: cst.value } + _visit(Object.freeze([]), cst, visitor) +} + +// Without the `as symbol` casts, TS declares these in the `visit` +// namespace using `var`, but then complains about that because +// `unique symbol` must be `const`. + +/** Terminate visit traversal completely */ +visit.BREAK = BREAK as symbol + +/** Do not visit the children of the current item */ +visit.SKIP = SKIP as symbol + +/** Remove the current item */ +visit.REMOVE = REMOVE as symbol + +function _visit( + path: readonly ['key' | 'value', number][], + item: CollectionItem, + visitor: Visitor +): number | symbol | Visitor | void { + let ctrl = visitor(item, path) + if (typeof ctrl === 'symbol') return ctrl + for (const field of ['key', 'value'] as const) { + const token = item[field] + if (token && 'items' in token) { + for (let i = 0; i < token.items.length; ++i) { + const ci = _visit( + Object.freeze(path.concat([field, i])), + token.items[i], + visitor + ) + if (typeof ci === 'number') i = ci - 1 + else if (ci === BREAK) return BREAK + else if (ci === REMOVE) { + token.items.splice(i, 1) + i -= 1 + } + } + if (typeof ctrl === 'function' && field === 'key') ctrl = ctrl(item, path) + } + } + return typeof ctrl === 'function' ? ctrl(item, path) : ctrl +} diff --git a/src/parse/cst.ts b/src/parse/cst.ts index a22f6064..81e51e46 100644 --- a/src/parse/cst.ts +++ b/src/parse/cst.ts @@ -1,3 +1,6 @@ +export { stringify } from './cst-stringify.js' +export { visit, Visitor } from './cst-visit.js' + export interface SourceToken { type: | 'byte-order-mark' @@ -140,6 +143,22 @@ export const FLOW_END = '\x18' // C0: Cancel /** Next token is a scalar value */ export const SCALAR = '\x1f' // C0: Unit Separator +/** @returns `true` if `token` is a flow or block collection */ +export const isCollection = ( + token: Token | null | undefined +): token is BlockMap | BlockSequence | FlowCollection => + !!token && 'items' in token + +/** @returns `true` if `token` is a flow or block scalar; not an alias */ +export const isScalar = ( + token: Token | null | undefined +): token is FlowScalar | BlockScalar => + !!token && + (token.type === 'scalar' || + token.type === 'single-quoted-scalar' || + token.type === 'double-quoted-scalar' || + token.type === 'block-scalar') + /* istanbul ignore next */ /** Get a printable representation of a lexer token */ export function prettyToken(token: string) { diff --git a/tests/yaml-test-suite.js b/tests/yaml-test-suite.js index dfe23edb..f50c1dac 100644 --- a/tests/yaml-test-suite.js +++ b/tests/yaml-test-suite.js @@ -1,7 +1,7 @@ import fs from 'fs' import path from 'path' -import * as YAML from 'yaml' +import { CST, parseAllDocuments, Parser } from 'yaml' import { testEvents } from 'yaml/test-events' const skip = { @@ -65,7 +65,13 @@ testDirs.forEach(dir => { } describe(`${dir}: ${name}`, () => { - const docs = YAML.parseAllDocuments(yaml, { resolveKnownTags: false }) + test('cst stringify', () => { + let res = '' + new Parser(tok => (res += CST.stringify(tok))).parse(yaml) + expect(res).toBe(yaml) + }) + + const docs = parseAllDocuments(yaml, { resolveKnownTags: false }) if (events) { _test('test.event', () => { const res = testEvents(yaml) @@ -87,15 +93,15 @@ testDirs.forEach(dir => { if (!error) { const src2 = docs.map(doc => String(doc).replace(/\n$/, '')).join('\n...\n') + '\n' - const docs2 = YAML.parseAllDocuments(src2, { resolveKnownTags: false }) + const docs2 = parseAllDocuments(src2, { resolveKnownTags: false }) if (json) _test('stringfy+re-parse', () => matchJson(docs2, json)) if (outYaml) { _test('out.yaml', () => { - const resDocs = YAML.parseAllDocuments(yaml) + const resDocs = parseAllDocuments(yaml) const resJson = resDocs.map(doc => doc.toJS({ mapAsMap: true })) - const expDocs = YAML.parseAllDocuments(outYaml) + const expDocs = parseAllDocuments(outYaml) const expJson = expDocs.map(doc => doc.toJS({ mapAsMap: true })) expect(resJson).toMatchObject(expJson) })