From 79d3276f619409062bcc17d5a50c5120ae7ed3cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Wed, 7 Jul 2021 11:51:40 -0400 Subject: [PATCH] Overhaul comment attachment (#13521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: inline pushComment * chore: add benchmark cases * perf: overhaul comment attachment * cleanup * update test fixtures They are all bugfixes. * fix: merge HTMLComment parsing to skipSpace * perf: remove unattachedCommentStack baseline 128 nested leading comments: 11_034 ops/sec ±50.64% (0.091ms) baseline 256 nested leading comments: 6_037 ops/sec ±11.46% (0.166ms) baseline 512 nested leading comments: 3_077 ops/sec ±2.31% (0.325ms) baseline 1024 nested leading comments: 1_374 ops/sec ±3.22% (0.728ms) current 128 nested leading comments: 11_027 ops/sec ±37.41% (0.091ms) current 256 nested leading comments: 6_736 ops/sec ±1.39% (0.148ms) current 512 nested leading comments: 3_306 ops/sec ±0.69% (0.302ms) current 1024 nested leading comments: 1_579 ops/sec ±2.09% (0.633ms) baseline 128 nested trailing comments: 10_073 ops/sec ±42.95% (0.099ms) baseline 256 nested trailing comments: 6_294 ops/sec ±2.19% (0.159ms) baseline 512 nested trailing comments: 3_041 ops/sec ±0.8% (0.329ms) baseline 1024 nested trailing comments: 1_530 ops/sec ±1.18% (0.654ms) current 128 nested trailing comments: 11_461 ops/sec ±44.89% (0.087ms) current 256 nested trailing comments: 7_212 ops/sec ±1.6% (0.139ms) current 512 nested trailing comments: 3_403 ops/sec ±1% (0.294ms) current 1024 nested trailing comments: 1_539 ops/sec ±1.49% (0.65ms) * fix: do not expose CommentWhitespace type * add comments on CommentWhitespace * add test case for #11576 * fix: mark containerNode be the innermost node containing commentWS * fix: adjust trailing comma comments for Record/Tuple/OptionalCall * fix: drain comment stacks in parseExpression * docs: update comments * add a new benchmark * chore: containerNode => containingNode * add more benchmark cases * fix: avoid finishNodeAt in stmtToDirective * finalize comment right after containerNode is set * add testcase about directive * fix: finish SequenceExpression at current pos and adjust later * chore: rename test cases * add new test case on switch statement * fix: adjust comments after trailing comma of function params * add comment attachment design doc * misc fix * fix: reset previous trailing comments when parsing async method/accessor * chore: add more comment testcases * fix flow errors * fix: handle comments when parsing async arrow * fix: handle comments when "static" is a class modifier * fix flow errors * fix: handle comments when parsing async function/do * refactor: simplify resetPreviousNodeTrailingComments * update test fixtures --- .../babel-parser/ast/comment-attachment.md | 129 +++++ .../benchmark/many-async-arrows/bench.mjs | 22 + .../many-inner-comments-nested/bench.mjs | 22 + .../benchmark/many-inner-comments/bench.mjs | 24 + .../many-leading-comments-nested/bench.mjs | 24 + .../benchmark/many-leading-comments/bench.mjs | 24 + .../many-leading-trailing-comments/bench.mjs | 27 + .../many-trailing-comments-nested/bench.mjs | 22 + .../many-trailing-comments/bench.mjs | 24 + packages/babel-parser/src/parser/comments.js | 461 ++++++++---------- .../babel-parser/src/parser/expression.js | 28 +- packages/babel-parser/src/parser/statement.js | 32 +- packages/babel-parser/src/plugins/estree.js | 2 +- .../babel-parser/src/plugins/flow/index.js | 4 +- packages/babel-parser/src/tokenizer/index.js | 153 +++--- packages/babel-parser/src/tokenizer/state.js | 14 +- packages/babel-parser/src/types.js | 22 +- .../comments/basic/arrow-function/input.js | 3 + .../comments/basic/arrow-function/output.json | 126 +++++ .../basic/async-arrow-function/input.js | 3 + .../basic/async-arrow-function/output.json | 177 +++++++ .../basic/async-call-expression/input.js | 1 + .../basic/async-call-expression/output.json | 65 +++ .../basic/async-do-expression/input.js | 1 + .../basic/async-do-expression/options.json | 3 + .../basic/async-do-expression/output.json | 90 ++++ .../comments/basic/async-function/input.js | 1 + .../comments/basic/async-function/output.json | 57 +++ .../call-expression-no-argument/input.js | 1 + .../call-expression-no-argument/output.json | 53 ++ .../input.js | 0 .../output.json | 0 .../input.js | 0 .../output.json | 0 .../basic/class-accessor-computed/input.js | 3 + .../basic/class-accessor-computed/output.json | 343 +++++++++++++ .../class-method-async-generator/input.js | 2 + .../class-method-async-generator/output.json | 207 ++++++++ .../class-method-static-generator/input.js | 2 + .../class-method-static-generator/output.json | 175 +++++++ .../comments/basic/class-method/input.js | 2 + .../comments/basic/class-method/output.json | 362 ++++++++++++++ .../basic/class-private-method/input.js | 2 + .../basic/class-private-method/output.json | 196 ++++++++ .../basic/class-static-block/input.js | 1 + .../basic/class-static-block/options.json | 3 + .../basic/class-static-block/output.json | 110 +++++ .../comments/basic/directive/input.js | 1 + .../comments/basic/directive/output.json | 64 +++ .../fixtures/comments/basic/function/input.js | 4 + .../comments/basic/function/output.json | 290 +++++++++++ .../basic/object-accessor-computed/input.js | 3 + .../object-accessor-computed/output.json | 325 ++++++++++++ .../object-method-async-generator/input.js | 2 + .../object-method-async-generator/output.json | 197 ++++++++ .../comments/basic/object-method/input.js | 1 + .../comments/basic/object-method/output.json | 175 +++++++ .../basic/sequence-expression/input.js | 1 + .../basic/sequence-expression/output.json | 145 ++++++ .../comments/basic/switch-case/input.js | 6 + .../comments/basic/switch-case/output.json | 342 +++++++++++++ .../basic/switch-no-case-comment/input.js | 1 + .../basic/switch-no-case-comment/output.json | 59 +++ .../comments/basic/try-statement/input.js | 4 + .../comments/basic/try-statement/output.json | 259 ++++++++++ .../core/uncategorised/342/output.json | 13 +- .../output.json | 34 +- .../output.json | 34 +- .../call-trailing-comma-comments/input.js | 1 + .../call-trailing-comma-comments/output.json | 60 +++ .../trailing-comma-comments/input.js | 2 + .../trailing-comma-comments/options.json | 3 + .../trailing-comma-comments/output.json | 90 ++++ .../05-type-annotation/output.json | 11 +- .../export/internal-comments/output.json | 73 +-- .../import/internal-comments/output.json | 85 ++-- 76 files changed, 4816 insertions(+), 492 deletions(-) create mode 100644 packages/babel-parser/ast/comment-attachment.md create mode 100644 packages/babel-parser/benchmark/many-async-arrows/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-inner-comments-nested/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-inner-comments/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-leading-comments-nested/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-leading-comments/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-leading-trailing-comments/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-trailing-comments-nested/bench.mjs create mode 100644 packages/babel-parser/benchmark/many-trailing-comments/bench.mjs create mode 100644 packages/babel-parser/test/fixtures/comments/basic/arrow-function/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/arrow-function/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-arrow-function/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-arrow-function/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-call-expression/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-call-expression/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-do-expression/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-do-expression/options.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-do-expression/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-function/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/async-function/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/call-expression-no-argument/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/call-expression-no-argument/output.json rename packages/babel-parser/test/fixtures/comments/basic/{function-trailing-comma-shorthand => call-expression-trailing-comma-object-shorthand}/input.js (100%) rename packages/babel-parser/test/fixtures/comments/basic/{function-trailing-comma-shorthand => call-expression-trailing-comma-object-shorthand}/output.json (100%) rename packages/babel-parser/test/fixtures/comments/basic/{function-trailing-comma => call-expression-trailing-comma}/input.js (100%) rename packages/babel-parser/test/fixtures/comments/basic/{function-trailing-comma => call-expression-trailing-comma}/output.json (100%) create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-accessor-computed/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-accessor-computed/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method-async-generator/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method-async-generator/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method-static-generator/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method-static-generator/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-method/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-private-method/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-private-method/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-static-block/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-static-block/options.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/class-static-block/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/directive/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/directive/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/function/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/function/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-accessor-computed/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-accessor-computed/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-method-async-generator/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-method-async-generator/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-method/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/object-method/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/sequence-expression/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/sequence-expression/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/switch-case/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/switch-case/output.json create mode 100755 packages/babel-parser/test/fixtures/comments/basic/switch-no-case-comment/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/switch-no-case-comment/output.json create mode 100644 packages/babel-parser/test/fixtures/comments/basic/try-statement/input.js create mode 100644 packages/babel-parser/test/fixtures/comments/basic/try-statement/output.json create mode 100644 packages/babel-parser/test/fixtures/es2020/optional-chaining/call-trailing-comma-comments/input.js create mode 100644 packages/babel-parser/test/fixtures/es2020/optional-chaining/call-trailing-comma-comments/output.json create mode 100644 packages/babel-parser/test/fixtures/experimental/record-and-tuple/trailing-comma-comments/input.js create mode 100644 packages/babel-parser/test/fixtures/experimental/record-and-tuple/trailing-comma-comments/options.json create mode 100644 packages/babel-parser/test/fixtures/experimental/record-and-tuple/trailing-comma-comments/output.json diff --git a/packages/babel-parser/ast/comment-attachment.md b/packages/babel-parser/ast/comment-attachment.md new file mode 100644 index 000000000000..f5aafdf12355 --- /dev/null +++ b/packages/babel-parser/ast/comment-attachment.md @@ -0,0 +1,129 @@ +# Comment attachment + +When Babel is parsing JavaScript files, the comments will be attached to its adjacent AST nodes. If such neighbors do not exist, Babel will fallback to the innermost containing node. + +The [current implementation](https://github.com/babel/babel/pull/13521) is based on its converse problem: Instead of attaching comments directly to AST nodes, we attach nodes to a stack of applicable comment whitespaces (see below for definitions). After a comment whitespace has set up its node relationship including leading, trailing and container, we forward the comments to the AST nodes and perform adjustments such as merge innerComments after trailing comma to the last element's trailing comments. + +### Comment Whitespace + +A comment whitespace represents a sequence of whitespace characters and comments including `//` comment line, `/* */` comment block, `` HTML close comment. For example, the following snippet + +```js +a// 1 +/* 2 */ + + +2; +``` + +have two comment whitespaces + +```jsonc +// for `// 1\n/* 2 */ ` +{ + start: 1, // position of '/' + end: 15, // position of '+' + comments: [ + CommentLine { start: 1, end: 5}, + CommentBlock { start: 6, end: 13 } + ], + leadingNode: Identifier("a"), + trailingNode: null, + containerNode: BinaryExpression, +} +``` + +and + +```jsonc +// for ` \n` +{ + start: 16, // position of ' ' after '+' + end: 28, // position of '2' + comments: [ + CommentLine { start: 17, end: 23}, + CommentLine { start: 24, end: 27 } + ], + leadingNode: null, + trailingNode: NumericLiteral(2), + containerNode: BinaryExpression, +} +``` + +Given a program source, the set of all the comment whitespaces has the following properties: + +**Nonemptiness** (P1): For every `w` of comment whitespaces, `w` satisifies + +``` +w.start < w.end +``` + +**Isolation** (P2): There must not exist any pair of comment whitespaces `w1` and `w2` such that + +``` +w1.start ≤ w2.start ≤ w1.end +``` + +**Completeness** (P3): For every comment AST node `c`, there must exist a comment whitespace `w`, such that + +``` +w.start ≤ c.start < c.end ≤ w.end +``` + +We can also say `w` encompasses `c`. + +**Monotonicity** (Corollary from P1 and P2): Given a non-empty list of comment whitespaces orderred by `start`, denoted by `{ w1, w2, ... w_n }`, they must satisify + +``` +w1.start < w1.end < w2.start < w2.end < ... < w_n.start < w_n.end +``` + +For any given comment whitespace `w` and an AST node `n`, we can define the following relationships: + +1. `n` is the _leading node_ of `w` iff `n.end = w.start` +2. `n` is the _trailing node_ of `w` iff `n.start = w.end` +3. `n` is the _containing node_ of `w` iff for all AST nodes `N` satisfying `N.start < w.start < w.end < N.end`, the following proposition is true: + +``` +N.start ≤ n.start < w.start < w.end < n.end ≤ N.end +``` + +Note that the relationship from `w` to `n` is _not_ injective. In other words, a comment whitespace can have multiple leading nodes, trailing nodes, and/or containing nodes. To address this issue we can define the extrema of the set of related ast nodes. + +1. Outermost leading/trailing node: `n` is the _outermost leading/trailing node_ of `w` iff for every other leading/trailing node `N` of `w`, `N` is a descendant of `n` +2. Innermost containing node: `n` is the _innermost containing node_ of `w` iff for every other containing node `N` of `w`, `n` is a descendant of `N` + +For any given comment `c` and AST node `n`, now we can (in)formally define leading comments, trailing comments and inner comments: + +**Leading Comment**: `c` is one of leading comments of `n` iff there exist a comment whitespace `w`, such that `n` is the outermost trailing node of `w` and `w` encompasses `c` + +**Trailing Comment**: `c` is one of trailing comments of `n` iff there exist a comment whitespace `w`, such that `n` is the outermost leading node of `w` and `w` encompasses `c` + +**Inner Comment**: `c` is one of inner comments of `n` iff + +1. there exist a comment whitespace `w`, such that `n` is the innermost containing node of `w` and `w` encompasses `c`. +2. there does not exist a comment whitespace `w`, such that `n` is the outermost leading or trailing node of `w and `w`encompasses`c`. + +The Isolation (P2) of a comment whitespace gaurantees that if two comments `c1`, `c2` belongs to the leading/trailing comments of `n`, `c1` and `c2` must be encompassed by the same comment whitespace `w`. This property simplifies classification of leading/trailing because we can now mark a group of comments instead of checking every comments under the same comment whitespace. + +Note that Babel parser marks certain inner comments after a trailing comma of a list structures to be the trailing comments of the last element in that list. (https://github.com/babel/babel/pull/10369) This behaviour can be considered as conpensation due to lack of a `TrailingCommaElement` AST structure to which a comment can be attached. Although this PR implements such behaviour, we will not be discussing it in the design section. + +### Construct Comment Whitespace + +We construct the comment whitespace in `Tokenizer#skipSpace` of `packages/babel-parser/src/tokenizer/index.js`, after we exit from the skip loop, we collect the `comments`, mark the location info and push to `parser.state.commentStack`. In this PR we also merge the parsing of `HTMLOpenComment` and `HTMLCloseComment` to `skipSpace`. + +### Attaching Nodes to Comment Whitespace + +For every finished AST node invoked from `parser#finishNode`. Before an AST node is finished, the whitespace token have been read from `tokenizer#next()`, so if this node has trailing comments, it must be the `leadingNode` of the last element in `commentStack`. + +Note that the `leadingNode` will be updated by subsequent `finishNode()` calls invoked at the same position. The last `finishNode()` call is the winner, which is exactly the _outermost_ leading node that we are interested. Likewise for `trailingNode`. + +Then we iterate `state.commentStack` reversely. we mark `trailingNode` when `comment.end = node.start`, mark `containingNode` when it is not defined, so here the first `finishNode()` is the winner, which is exactly the _innermost_ containing node. + +After we set the containing node, we can assign comments to related node, since the nature of a recursive descending parser requires that when `containingNode` is finished, its `leadingNode` and `trailingNode` must have been parsed\*, so the related node stops being updated by `processComment`. + +\* Technically this is not always true because we have `estree` plugins invokes `finishNodeAt` at a different tokenizer location. However, since most `estree` users are using `@babel/eslint-parser`, which removes the attached comment. So we are good here. + +### Finalize comment whitespaces + +In this step we attach the comments and do the trailing comma adjustments. Note that an extra routine `finalizeRemainingComments` is provided for `parseExpression`, which may not have opportunity to finalize comments which is added to the leading/trailing of the top level Expression node. diff --git a/packages/babel-parser/benchmark/many-async-arrows/bench.mjs b/packages/babel-parser/benchmark/many-async-arrows/bench.mjs new file mode 100644 index 000000000000..030e52353501 --- /dev/null +++ b/packages/babel-parser/benchmark/many-async-arrows/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "async () => {};".repeat(length); +} +function benchCases(name, implementation, options) { + for (const length of [256, 512, 1024, 2048]) { + const input = createInput(length); + suite.add(`${name} ${length} async arrow functions`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-inner-comments-nested/bench.mjs b/packages/babel-parser/benchmark/many-inner-comments-nested/bench.mjs new file mode 100644 index 000000000000..38d80f1a8d40 --- /dev/null +++ b/packages/babel-parser/benchmark/many-inner-comments-nested/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "[,\n// c\n".repeat(length) + "\n]".repeat(length); +} +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + suite.add(`${name} ${length} nested inner comments`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-inner-comments/bench.mjs b/packages/babel-parser/benchmark/many-inner-comments/bench.mjs new file mode 100644 index 000000000000..c0001e73ecd0 --- /dev/null +++ b/packages/babel-parser/benchmark/many-inner-comments/bench.mjs @@ -0,0 +1,24 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "[" + "// c\n".repeat(length) + "]"; +} +current.parse(createInput(256), {}); +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + const { parse } = implementation; + suite.add(`${name} ${length} inner comments`, () => { + parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-leading-comments-nested/bench.mjs b/packages/babel-parser/benchmark/many-leading-comments-nested/bench.mjs new file mode 100644 index 000000000000..55c7d2503304 --- /dev/null +++ b/packages/babel-parser/benchmark/many-leading-comments-nested/bench.mjs @@ -0,0 +1,24 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "// c\n{\n".repeat(length) + "}".repeat(length); +} +current.parse(createInput(256), {}); +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + const { parse } = implementation; + suite.add(`${name} ${length} nested leading comments`, () => { + parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-leading-comments/bench.mjs b/packages/babel-parser/benchmark/many-leading-comments/bench.mjs new file mode 100644 index 000000000000..0adc4f427430 --- /dev/null +++ b/packages/babel-parser/benchmark/many-leading-comments/bench.mjs @@ -0,0 +1,24 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "// c\n".repeat(length) + "{}"; +} +current.parse(createInput(256), {}); +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + const { parse } = implementation; + suite.add(`${name} ${length} leading comments`, () => { + parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-leading-trailing-comments/bench.mjs b/packages/babel-parser/benchmark/many-leading-trailing-comments/bench.mjs new file mode 100644 index 000000000000..fe613d4e5e6d --- /dev/null +++ b/packages/babel-parser/benchmark/many-leading-trailing-comments/bench.mjs @@ -0,0 +1,27 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "\n// c\na".repeat(length); +} +current.parse(createInput(256), {}); +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + const { parse } = implementation; + suite.add( + `${name} ${length} leading comments + ${length - 1} trailing comments`, + () => { + parse(input, options); + } + ); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-trailing-comments-nested/bench.mjs b/packages/babel-parser/benchmark/many-trailing-comments-nested/bench.mjs new file mode 100644 index 000000000000..ae9fc2b0a7ad --- /dev/null +++ b/packages/babel-parser/benchmark/many-trailing-comments-nested/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "{".repeat(length) + "} // c\n".repeat(length); +} +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + suite.add(`${name} ${length} nested trailing comments`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-trailing-comments/bench.mjs b/packages/babel-parser/benchmark/many-trailing-comments/bench.mjs new file mode 100644 index 000000000000..1982485ee0ef --- /dev/null +++ b/packages/babel-parser/benchmark/many-trailing-comments/bench.mjs @@ -0,0 +1,24 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "{}" + "// c\n".repeat(length); +} +current.parse(createInput(256), {}); +function benchCases(name, implementation, options) { + for (const length of [128, 256, 512, 1024]) { + const input = createInput(length); + const { parse } = implementation; + suite.add(`${name} ${length} trailing comments`, () => { + parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/src/parser/comments.js b/packages/babel-parser/src/parser/comments.js index c990c9e0e8a9..e6dc327e1f38 100644 --- a/packages/babel-parser/src/parser/comments.js +++ b/packages/babel-parser/src/parser/comments.js @@ -1,288 +1,241 @@ // @flow +/*:: declare var invariant; */ + +import BaseParser from "./base"; +import type { Comment, Node } from "../types"; +import * as charCodes from "charcodes"; + /** - * Based on the comment attachment algorithm used in espree and estraverse. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. + * A whitespace token containing comments + * @typedef CommentWhitespace + * @type {object} + * @property {number} start - the start of the whitespace token. + * @property {number} end - the end of the whitespace token. + * @property {Array} comments - the containing comments + * @property {Node | null} leadingNode - the immediately preceding AST node of the whitespace token + * @property {Node | null} trailingNode - the immediately following AST node of the whitespace token + * @property {Node | null} containingNode - the innermost AST node containing the whitespace + * with minimal size (|end - start|) + */ +export type CommentWhitespace = { + start: number, + end: number, + comments: Array, + leadingNode: Node | null, + trailingNode: Node | null, + containingNode: Node | null, +}; +/** + * Merge comments with node's trailingComments or assign comments to be + * trailingComments. New comments will be placed before old comments + * because the commentStack is enumerated reversely. * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * @param {Node} node + * @param {Array} comments */ +function setTrailingComments(node: Node, comments: Array) { + if (node.trailingComments === undefined) { + node.trailingComments = comments; + } else { + node.trailingComments.unshift(...comments); + } +} -import BaseParser from "./base"; -import type { Comment, Node } from "../types"; +/** + * Merge comments with node's innerComments or assign comments to be + * innerComments. New comments will be placed before old comments + * because the commentStack is enumerated reversely. + * + * @param {Node} node + * @param {Array} comments + */ +export function setInnerComments(node: Node, comments: Array | void) { + if (node.innerComments === undefined) { + node.innerComments = comments; + } else if (comments !== undefined) { + node.innerComments.unshift(...comments); + } +} -function last(stack: $ReadOnlyArray): T { - return stack[stack.length - 1]; +/** + * Given node and elements array, if elements has non-null element, + * merge comments to its trailingComments, otherwise merge comments + * to node's innerComments + * + * @param {Node} node + * @param {Array} elements + * @param {Array} comments + */ +function adjustInnerComments( + node: Node, + elements: Array, + commentWS: CommentWhitespace, +) { + let lastElement = null; + let i = elements.length; + while (lastElement === null && i > 0) { + lastElement = elements[--i]; + } + if (lastElement === null || lastElement.start > commentWS.start) { + setInnerComments(node, commentWS.comments); + } else { + setTrailingComments(lastElement, commentWS.comments); + } } +/** @class CommentsParser */ export default class CommentsParser extends BaseParser { addComment(comment: Comment): void { if (this.filename) comment.loc.filename = this.filename; - this.state.trailingComments.push(comment); - this.state.leadingComments.push(comment); + this.state.comments.push(comment); } - adjustCommentsAfterTrailingComma( - node: Node, - elements: (Node | null)[], - // When the current node is followed by a token which hasn't a respective AST node, we - // need to take all the trailing comments to prevent them from being attached to an - // unrelated node. e.g. in - // var { x } /* cmt */ = { y } - // we don't want /* cmt */ to be attached to { y }. - // On the other hand, in - // fn(x) [new line] /* cmt */ [new line] y - // /* cmt */ is both a trailing comment of fn(x) and a leading comment of y - takeAllComments?: boolean, - ) { - if (this.state.leadingComments.length === 0) { - return; - } - - let lastElement = null; - let i = elements.length; - while (lastElement === null && i > 0) { - lastElement = elements[--i]; - } - if (lastElement === null) { - return; - } - - for (let j = 0; j < this.state.leadingComments.length; j++) { - if ( - this.state.leadingComments[j].end < this.state.commentPreviousNode.end - ) { - this.state.leadingComments.splice(j, 1); - j--; - } + /** + * Given a newly created AST node _n_, attach _n_ to a comment whitespace _w_ if applicable + * {@see {@link CommentWhitespace}} + * + * @param {Node} node + * @returns {void} + * @memberof CommentsParser + */ + processComment(node: Node): void { + const { commentStack } = this.state; + const commentStackLength = commentStack.length; + if (commentStackLength === 0) return; + let i = commentStackLength - 1; + const lastCommentWS = commentStack[i]; + + if (lastCommentWS.start === node.end) { + lastCommentWS.leadingNode = node; + i--; } - const newTrailingComments = []; - for (let i = 0; i < this.state.leadingComments.length; i++) { - const leadingComment = this.state.leadingComments[i]; - if (leadingComment.end < node.end) { - newTrailingComments.push(leadingComment); - - // Perf: we don't need to splice if we are going to reset the array anyway - if (!takeAllComments) { - this.state.leadingComments.splice(i, 1); - i--; - } + const { start: nodeStart } = node; + // invariant: for all 0 <= j <= i, let c = commentStack[j], c must satisfy c.end < node.end + for (; i >= 0; i--) { + const commentWS = commentStack[i]; + const commentEnd = commentWS.end; + if (commentEnd > nodeStart) { + // by definition of commentWhiteSpace, this implies commentWS.start > nodeStart + // so node can be a containingNode candidate. At this time we can finalize the comment + // whitespace, because + // 1) its leadingNode or trailingNode, if exists, will not change + // 2) its containingNode have been assigned and will not change because it is the + // innermost minimal-sized AST node + commentWS.containingNode = node; + this.finalizeComment(commentWS); + commentStack.splice(i, 1); } else { - if (node.trailingComments === undefined) { - node.trailingComments = []; + if (commentEnd === nodeStart) { + commentWS.trailingNode = node; } - node.trailingComments.push(leadingComment); + // stop the loop when commentEnd <= nodeStart + break; } } - if (takeAllComments) this.state.leadingComments = []; - - if (newTrailingComments.length > 0) { - lastElement.trailingComments = newTrailingComments; - } else if (lastElement.trailingComments !== undefined) { - lastElement.trailingComments = []; - } } - processComment(node: Node): void { - if (node.type === "Program" && node.body.length > 0) return; - - const stack = this.state.commentStack; - - let firstChild, lastChild, trailingComments, i, j; - - if (this.state.trailingComments.length > 0) { - // If the first comment in trailingComments comes after the - // current node, then we're good - all comments in the array will - // come after the node and so it's safe to add them as official - // trailingComments. - if (this.state.trailingComments[0].start >= node.end) { - trailingComments = this.state.trailingComments; - this.state.trailingComments = []; - } else { - // Otherwise, if the first comment doesn't come after the - // current node, that means we have a mix of leading and trailing - // comments in the array and that leadingComments contains the - // same items as trailingComments. Reset trailingComments to - // zero items and we'll handle this by evaluating leadingComments - // later. - this.state.trailingComments.length = 0; - } - } else if (stack.length > 0) { - const lastInStack = last(stack); - if ( - lastInStack.trailingComments && - lastInStack.trailingComments[0].start >= node.end - ) { - trailingComments = lastInStack.trailingComments; - delete lastInStack.trailingComments; - } - } - - // Eating the stack. - if (stack.length > 0 && last(stack).start >= node.start) { - firstChild = stack.pop(); - } - - while (stack.length > 0 && last(stack).start >= node.start) { - lastChild = stack.pop(); - } - - if (!lastChild && firstChild) lastChild = firstChild; - - // Adjust comments that follow a trailing comma on the last element in a - // comma separated list of nodes to be the trailing comments on the last - // element - if (firstChild) { - switch (node.type) { - case "ObjectExpression": - this.adjustCommentsAfterTrailingComma(node, node.properties); - break; - case "ObjectPattern": - this.adjustCommentsAfterTrailingComma(node, node.properties, true); - break; - case "CallExpression": - this.adjustCommentsAfterTrailingComma(node, node.arguments); - break; - case "ArrayExpression": - this.adjustCommentsAfterTrailingComma(node, node.elements); - break; - case "ArrayPattern": - this.adjustCommentsAfterTrailingComma(node, node.elements, true); - break; + /** + * Assign the comments of comment whitespaces to related AST nodes. + * Also adjust innerComments following trailing comma. + * + * @memberof CommentsParser + */ + finalizeComment(commentWS: CommentWhitespace) { + const { comments } = commentWS; + if (commentWS.leadingNode !== null || commentWS.trailingNode !== null) { + if (commentWS.leadingNode !== null) { + setTrailingComments(commentWS.leadingNode, comments); } - } else if ( - this.state.commentPreviousNode && - ((this.state.commentPreviousNode.type === "ImportSpecifier" && - node.type !== "ImportSpecifier") || - (this.state.commentPreviousNode.type === "ExportSpecifier" && - node.type !== "ExportSpecifier")) - ) { - this.adjustCommentsAfterTrailingComma(node, [ - this.state.commentPreviousNode, - ]); - } - - if (lastChild) { - if (lastChild.leadingComments) { - if ( - lastChild !== node && - lastChild.leadingComments.length > 0 && - last(lastChild.leadingComments).end <= node.start - ) { - node.leadingComments = lastChild.leadingComments; - delete lastChild.leadingComments; - } else { - // A leading comment for an anonymous class had been stolen by its first ClassMethod, - // so this takes back the leading comment. - // See also: https://github.com/eslint/espree/issues/158 - for (i = lastChild.leadingComments.length - 2; i >= 0; --i) { - if (lastChild.leadingComments[i].end <= node.start) { - node.leadingComments = lastChild.leadingComments.splice(0, i + 1); - break; - } - } - } + if (commentWS.trailingNode !== null) { + commentWS.trailingNode.leadingComments = comments; } - } else if (this.state.leadingComments.length > 0) { - if (last(this.state.leadingComments).end <= node.start) { - if (this.state.commentPreviousNode) { - for (j = 0; j < this.state.leadingComments.length; j++) { - if ( - this.state.leadingComments[j].end < - this.state.commentPreviousNode.end - ) { - this.state.leadingComments.splice(j, 1); - j--; - } - } - } - if (this.state.leadingComments.length > 0) { - node.leadingComments = this.state.leadingComments; - this.state.leadingComments = []; - } - } else { - // https://github.com/eslint/espree/issues/2 - // - // In special cases, such as return (without a value) and - // debugger, all comments will end up as leadingComments and - // will otherwise be eliminated. This step runs when the - // commentStack is empty and there are comments left - // in leadingComments. - // - // This loop figures out the stopping point between the actual - // leading and trailing comments by finding the location of the - // first comment that comes after the given node. - for (i = 0; i < this.state.leadingComments.length; i++) { - if (this.state.leadingComments[i].end > node.start) { + } else { + /*:: invariant(commentWS.containingNode !== null) */ + const { containingNode: node, start: commentStart } = commentWS; + if (this.input.charCodeAt(commentStart - 1) === charCodes.comma) { + // If a commentWhitespace follows a comma and the containingNode allows + // list structures with trailing comma, merge it to the trailingComment + // of the last non-null list element + switch (node.type) { + case "ObjectExpression": + case "ObjectPattern": + case "RecordExpression": + adjustInnerComments(node, node.properties, commentWS); break; + case "CallExpression": + case "OptionalCallExpression": + adjustInnerComments(node, node.arguments, commentWS); + break; + case "FunctionDeclaration": + case "FunctionExpression": + case "ArrowFunctionExpression": + case "ObjectMethod": + case "ClassMethod": + case "ClassPrivateMethod": + adjustInnerComments(node, node.params, commentWS); + break; + case "ArrayExpression": + case "ArrayPattern": + case "TupleExpression": + adjustInnerComments(node, node.elements, commentWS); + break; + case "ExportNamedDeclaration": + case "ImportDeclaration": + adjustInnerComments(node, node.specifiers, commentWS); + break; + default: { + setInnerComments(node, comments); } } - - // Split the array based on the location of the first comment - // that comes after the node. Keep in mind that this could - // result in an empty array, and if so, the array must be - // deleted. - const leadingComments = this.state.leadingComments.slice(0, i); - - if (leadingComments.length) { - node.leadingComments = leadingComments; - } - - // Similarly, trailing comments are attached later. The variable - // must be reset to null if there are no trailing comments. - trailingComments = this.state.leadingComments.slice(i); - if (trailingComments.length === 0) { - trailingComments = null; - } + } else { + setInnerComments(node, comments); } } + } - this.state.commentPreviousNode = node; - - if (trailingComments) { - if ( - trailingComments.length && - trailingComments[0].start >= node.start && - last(trailingComments).end <= node.end - ) { - node.innerComments = trailingComments; - } else { - // TrailingComments maybe contain innerComments - const firstTrailingCommentIndex = trailingComments.findIndex( - comment => comment.end >= node.end, - ); - - if (firstTrailingCommentIndex > 0) { - node.innerComments = trailingComments.slice( - 0, - firstTrailingCommentIndex, - ); - node.trailingComments = trailingComments.slice( - firstTrailingCommentIndex, - ); - } else { - node.trailingComments = trailingComments; - } - } + /** + * Drains remaning commentStack and applies finalizeComment + * to each comment whitespace. Used only in parseExpression + * where the top level AST node is _not_ Program + * {@see {@link CommentsParser#finalizeComment}} + * + * @memberof CommentsParser + */ + finalizeRemainingComments() { + const { commentStack } = this.state; + for (let i = commentStack.length - 1; i >= 0; i--) { + this.finalizeComment(commentStack[i]); } + this.state.commentStack = []; + } - stack.push(node); + /** + * Reset previous node trailing comments. Used in object / class + * property parsing. We parse `async`, `static`, `set` and `get` + * as an identifier but may reinterepret it into an async/static/accessor + * method later. In this case the identifier is not part of the AST and we + * should sync the knowledge to commentStacks + * + * For example, when parsing */ + // async /* 1 */ function f() {} + /* + * the comment whitespace "* 1 *" has leading node Identifier(async). When + * we see the function token, we create a Function node and mark "* 1 *" as + * inner comments. So "* 1 *" should be detached from the Identifier node. + * + * @param {N.Node} node the last finished AST node _before_ current token + * @returns + * @memberof CommentsParser + */ + resetPreviousNodeTrailingComments(node: Node) { + const { commentStack } = this.state; + const { length } = commentStack; + if (length === 0) return; + const commentWS = commentStack[length - 1]; + if (commentWS.leadingNode === node) { + commentWS.leadingNode = null; + } } } diff --git a/packages/babel-parser/src/parser/expression.js b/packages/babel-parser/src/parser/expression.js index aaf4794f7724..82b531ff42a8 100644 --- a/packages/babel-parser/src/parser/expression.js +++ b/packages/babel-parser/src/parser/expression.js @@ -56,6 +56,7 @@ import { } from "../util/expression-scope"; import { Errors, SourceTypeModuleErrors } from "./error"; import type { ParsingError } from "./error"; +import { setInnerComments } from "./comments"; /*:: import type { SourceType } from "../options"; @@ -161,6 +162,9 @@ export default class ExpressionParser extends LValParser { if (!this.match(tt.eof)) { this.unexpected(); } + // Unlike parseTopLevel, we need to drain remaining commentStacks + // because the top level node is _not_ Program. + this.finalizeRemainingComments(); expr.comments = this.state.comments; expr.errors = this.state.errors; if (this.options.tokens) { @@ -938,6 +942,7 @@ export default class ExpressionParser extends LValParser { node: N.ArrowFunctionExpression, call: N.CallExpression, ): N.ArrowFunctionExpression { + this.resetPreviousNodeTrailingComments(call); this.expect(tt.arrow); this.parseArrowExpression( node, @@ -945,6 +950,10 @@ export default class ExpressionParser extends LValParser { true, call.extra?.trailingComma, ); + // mark inner comments of `async()` as inner comments of `async () =>` + setInnerComments(node, call.innerComments); + // mark trailing comments of `async` to be inner comments + setInnerComments(node, call.callee.trailingComments); return node; } @@ -999,6 +1008,7 @@ export default class ExpressionParser extends LValParser { if (!containsEsc && id.name === "async" && !this.canInsertSemicolon()) { if (this.match(tt._function)) { + this.resetPreviousNodeTrailingComments(id); this.next(); return this.parseFunction( this.startNodeAtNode(id), @@ -1010,13 +1020,19 @@ export default class ExpressionParser extends LValParser { // arrow function. (Peeking ahead for "=" lets us avoid a more // expensive full-token lookahead on this common path.) if (this.lookaheadCharCode() === charCodes.equalsTo) { - return this.parseAsyncArrowUnaryFunction(id); + // although `id` is not used in async arrow unary function, + // we don't need to reset `async`'s trailing comments because + // it will be attached to the upcoming async arrow binding identifier + return this.parseAsyncArrowUnaryFunction( + this.startNodeAtNode(id), + ); } else { // Otherwise, treat "async" as an identifier and let calling code // deal with the current tt.name token. return id; } } else if (this.match(tt._do)) { + this.resetPreviousNodeTrailingComments(id); return this.parseDo(this.startNodeAtNode(id), true); } } @@ -1189,8 +1205,7 @@ export default class ExpressionParser extends LValParser { } // async [no LineTerminator here] AsyncArrowBindingIdentifier[?Yield] [no LineTerminator here] => AsyncConciseBody[?In] - parseAsyncArrowUnaryFunction(id: N.Expression): N.ArrowFunctionExpression { - const node = this.startNodeAtNode(id); + parseAsyncArrowUnaryFunction(node: N.Node): N.ArrowFunctionExpression { // We don't need to push a new ParameterDeclarationScope here since we are sure // 1) it is an async arrow, 2) no biding pattern is allowed in params this.prodParam.enter(functionFlags(true, this.prodParam.hasYield)); @@ -1509,7 +1524,10 @@ export default class ExpressionParser extends LValParser { if (exprList.length > 1) { val = this.startNodeAt(innerStartPos, innerStartLoc); val.expressions = exprList; - this.finishNodeAt(val, "SequenceExpression", innerEndPos, innerEndLoc); + // finish node at current location so it can pick up comments after `)` + this.finishNode(val, "SequenceExpression"); + val.end = innerEndPos; + val.loc.end = innerEndLoc; } else { val = exprList[0]; } @@ -1782,6 +1800,7 @@ export default class ExpressionParser extends LValParser { // https://tc39.es/ecma262/#prod-AsyncGeneratorMethod if (keyName === "async" && !this.hasPrecedingLineBreak()) { isAsync = true; + this.resetPreviousNodeTrailingComments(key); isGenerator = this.eat(tt.star); this.parsePropertyName(prop, /* isPrivateNameAllowed */ false); } @@ -1789,6 +1808,7 @@ export default class ExpressionParser extends LValParser { // set PropertyName[?Yield, ?Await] ( PropertySetParameterList ) { FunctionBody[~Yield, ~Await] } if (keyName === "get" || keyName === "set") { isAccessor = true; + this.resetPreviousNodeTrailingComments(key); prop.kind = keyName; if (this.match(tt.star)) { isGenerator = true; diff --git a/packages/babel-parser/src/parser/statement.js b/packages/babel-parser/src/parser/statement.js index 0f6ce71dcf31..fb51dd01587c 100644 --- a/packages/babel-parser/src/parser/statement.js +++ b/packages/babel-parser/src/parser/statement.js @@ -130,26 +130,27 @@ export default class StatementParser extends ExpressionParser { // TODO + /** + * cast a Statement to a Directive. This method mutates input statement. + * + * @param {N.Statement} stmt + * @returns {N.Directive} + * @memberof StatementParser + */ stmtToDirective(stmt: N.Statement): N.Directive { - const expr = stmt.expression; + const directive = (stmt: any); + directive.type = "Directive"; + directive.value = directive.expression; + delete directive.expression; - const directiveLiteral = this.startNodeAt(expr.start, expr.loc.start); - const directive = this.startNodeAt(stmt.start, stmt.loc.start); - - const raw = this.input.slice(expr.start, expr.end); + const directiveLiteral = directive.value; + const raw = this.input.slice(directiveLiteral.start, directiveLiteral.end); const val = (directiveLiteral.value = raw.slice(1, -1)); // remove quotes this.addExtra(directiveLiteral, "raw", raw); this.addExtra(directiveLiteral, "rawValue", val); - - directive.value = this.finishNodeAt( - directiveLiteral, - "DirectiveLiteral", - expr.end, - expr.loc.end, - ); - - return this.finishNodeAt(directive, "Directive", stmt.end, stmt.loc.end); + directiveLiteral.type = "DirectiveLiteral"; + return directive; } parseInterpreterDirective(): N.InterpreterDirective | null { @@ -1374,6 +1375,7 @@ export default class StatementParser extends ExpressionParser { classBody.body.push(this.parseClassProperty(prop)); return true; } + this.resetPreviousNodeTrailingComments(key); return false; } @@ -1494,6 +1496,7 @@ export default class StatementParser extends ExpressionParser { !this.isLineTerminator() ) { // an async method + this.resetPreviousNodeTrailingComments(key); const isGenerator = this.eat(tt.star); if (publicMember.optional) { @@ -1535,6 +1538,7 @@ export default class StatementParser extends ExpressionParser { ) { // `get\n*` is an uninitialized property named 'get' followed by a generator. // a getter or setter + this.resetPreviousNodeTrailingComments(key); method.kind = key.name; // The so-called parsed name would have been "get/set": get the real name. const isPrivate = this.match(tt.privateName); diff --git a/packages/babel-parser/src/plugins/estree.js b/packages/babel-parser/src/plugins/estree.js index 8d566a1ddd3a..c4dad6e78f49 100644 --- a/packages/babel-parser/src/plugins/estree.js +++ b/packages/babel-parser/src/plugins/estree.js @@ -131,8 +131,8 @@ export default (superClass: Class): Class => } stmtToDirective(stmt: N.Statement): N.Directive { - const directive = super.stmtToDirective(stmt); const value = stmt.expression.value; + const directive = super.stmtToDirective(stmt); // Record the expression value as in estree mode we want // the stmt to have the real value e.g. ("use strict") and diff --git a/packages/babel-parser/src/plugins/flow/index.js b/packages/babel-parser/src/plugins/flow/index.js index 3bf6f4082d7e..d27aaaa0777c 100644 --- a/packages/babel-parser/src/plugins/flow/index.js +++ b/packages/babel-parser/src/plugins/flow/index.js @@ -3212,7 +3212,7 @@ export default (superClass: Class): Class => return fileNode; } - skipBlockComment(): void { + skipBlockComment(): N.CommentBlock | void { if (this.hasPlugin("flowComments") && this.skipFlowComment()) { if (this.state.hasFlowComment) { this.unexpected(null, FlowErrors.NestedFlowComment); @@ -3232,7 +3232,7 @@ export default (superClass: Class): Class => return; } - super.skipBlockComment(); + return super.skipBlockComment(); } skipFlowComment(): number | boolean { diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index c1c9848b424d..a11c91649971 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -4,7 +4,6 @@ import type { Options } from "../options"; import * as N from "../types"; -import type { Position } from "../util/location"; import * as charCodes from "charcodes"; import { isIdentifierStart, isIdentifierChar } from "../util/identifier"; import { types as tt, keywords as keywordTypes, type TokenType } from "./types"; @@ -304,28 +303,7 @@ export default class Tokenizer extends ParserErrors { } } - pushComment( - block: boolean, - text: string, - start: number, - end: number, - startLoc: Position, - endLoc: Position, - ): void { - const comment = { - type: block ? "CommentBlock" : "CommentLine", - value: text, - start: start, - end: end, - loc: new SourceLocation(startLoc, endLoc), - }; - - if (this.options.tokens) this.pushToken(comment); - this.state.comments.push(comment); - this.addComment(comment); - } - - skipBlockComment(): void { + skipBlockComment(): N.CommentBlock | void { let startLoc; if (!this.isLookahead) startLoc = this.state.curPosition(); const start = this.state.pos; @@ -348,17 +326,19 @@ export default class Tokenizer extends ParserErrors { if (this.isLookahead) return; /*:: invariant(startLoc) */ - this.pushComment( - true, - this.input.slice(start + 2, end), - start, - this.state.pos, - startLoc, - this.state.curPosition(), - ); + const value = this.input.slice(start + 2, end); + const comment = { + type: "CommentBlock", + value: value, + start: start, + end: end + 2, + loc: new SourceLocation(startLoc, this.state.curPosition()), + }; + if (this.options.tokens) this.pushToken(comment); + return comment; } - skipLineComment(startSkip: number): void { + skipLineComment(startSkip: number): N.CommentLine | void { const start = this.state.pos; let startLoc; if (!this.isLookahead) startLoc = this.state.curPosition(); @@ -374,20 +354,26 @@ export default class Tokenizer extends ParserErrors { if (this.isLookahead) return; /*:: invariant(startLoc) */ - this.pushComment( - false, - this.input.slice(start + startSkip, this.state.pos), + const end = this.state.pos; + const value = this.input.slice(start + startSkip, end); + + const comment = { + type: "CommentLine", + value, start, - this.state.pos, - startLoc, - this.state.curPosition(), - ); + end, + loc: new SourceLocation(startLoc, this.state.curPosition()), + }; + if (this.options.tokens) this.pushToken(comment); + return comment; } // Called at the start of the parse and after every token. Skips // whitespace and comments, and. skipSpace(): void { + const spaceStart = this.state.pos; + const comments = []; loop: while (this.state.pos < this.length) { const ch = this.input.charCodeAt(this.state.pos); switch (ch) { @@ -413,13 +399,23 @@ export default class Tokenizer extends ParserErrors { case charCodes.slash: switch (this.input.charCodeAt(this.state.pos + 1)) { - case charCodes.asterisk: - this.skipBlockComment(); + case charCodes.asterisk: { + const comment = this.skipBlockComment(); + if (comment !== undefined) { + this.addComment(comment); + comments.push(comment); + } break; + } - case charCodes.slash: - this.skipLineComment(2); + case charCodes.slash: { + const comment = this.skipLineComment(2); + if (comment !== undefined) { + this.addComment(comment); + comments.push(comment); + } break; + } default: break loop; @@ -429,11 +425,56 @@ export default class Tokenizer extends ParserErrors { default: if (isWhitespace(ch)) { ++this.state.pos; + } else if (ch === charCodes.dash && !this.inModule) { + const pos = this.state.pos; + if ( + this.input.charCodeAt(pos + 1) === charCodes.dash && + this.input.charCodeAt(pos + 2) === charCodes.greaterThan && + (spaceStart === 0 || this.state.lineStart > spaceStart) + ) { + // A `-->` line comment + const comment = this.skipLineComment(3); + if (comment !== undefined) { + this.addComment(comment); + comments.push(comment); + } + } else { + break loop; + } + } else if (ch === charCodes.lessThan && !this.inModule) { + const pos = this.state.pos; + if ( + this.input.charCodeAt(pos + 1) === charCodes.exclamationMark && + this.input.charCodeAt(pos + 2) === charCodes.dash && + this.input.charCodeAt(pos + 3) === charCodes.dash + ) { + // `` line comment - this.skipLineComment(3); - this.skipSpace(); - this.nextToken(); - return; - } this.finishOp(tt.incDec, 2); return; } @@ -703,20 +732,6 @@ export default class Tokenizer extends ParserErrors { return; } - if ( - next === charCodes.exclamationMark && - code === charCodes.lessThan && - !this.inModule && - this.input.charCodeAt(this.state.pos + 2) === charCodes.dash && - this.input.charCodeAt(this.state.pos + 3) === charCodes.dash - ) { - // `