Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce exprAllowed usage #13431

Merged
merged 18 commits into from Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
69 changes: 30 additions & 39 deletions packages/babel-parser/src/parser/expression.js
Expand Up @@ -261,10 +261,6 @@ export default class ExpressionParser extends LValParser {
const startLoc = this.state.startLoc;
if (this.isContextual("yield")) {
if (this.prodParam.hasYield) {
// If we have [Yield] production, `yield` will start a YieldExpression thus
// regex is allowed following. Otherwise `yield` is an identifier and regex
// is disallowed in tt.name.updateContext
this.state.exprAllowed = true;
let left = this.parseYield();
if (afterLeftParse) {
left = afterLeftParse.call(this, left, startPos, startLoc);
Expand Down Expand Up @@ -988,11 +984,6 @@ export default class ExpressionParser extends LValParser {
// AsyncArrowFunction

parseExprAtom(refExpressionErrors?: ?ExpressionErrors): N.Expression {
// If a division operator appears in an expression position, the
// tokenizer got confused, and we force it to read a regexp instead.
if (this.state.type === tt.slash) this.readRegexp();

const canBeArrow = this.state.potentialArrowAt === this.state.start;
let node;

switch (this.state.type) {
Expand All @@ -1017,24 +1008,12 @@ export default class ExpressionParser extends LValParser {
return this.finishNode(node, "ThisExpression");

case tt.name: {
const canBeArrow = this.state.potentialArrowAt === this.state.start;
const containsEsc = this.state.containsEsc;
const id = this.parseIdentifier();

if (!containsEsc && id.name === "async" && !this.canInsertSemicolon()) {
if (this.match(tt._function)) {
const last = this.state.context.length - 1;
if (this.state.context[last] !== ct.functionStatement) {
// Since "async" is an identifier and normally identifiers
// can't be followed by expression, the tokenizer assumes
// that "function" starts a statement.
// Fixing it in the tokenizer would mean tracking not only the
// previous token ("async"), but also the one before to know
// its beforeExpr value.
// It's easier and more efficient to adjust the context here.
throw new Error("Internal error");
}
this.state.context[last] = ct.functionExpression;

this.next();
return this.parseFunction(
this.startNodeAtNode(id),
Expand Down Expand Up @@ -1073,7 +1052,9 @@ export default class ExpressionParser extends LValParser {
return this.parseDo(false);
}

case tt.regexp: {
case tt.slash:
case tt.slashAssign: {
this.readRegexp();
return this.parseRegExpLiteral(this.state.value);
}

Expand All @@ -1097,8 +1078,10 @@ export default class ExpressionParser extends LValParser {
case tt._false:
return this.parseBooleanLiteral(false);

case tt.parenL:
case tt.parenL: {
const canBeArrow = this.state.potentialArrowAt === this.state.start;
return this.parseParenAndDistinguishExpression(canBeArrow);
}

case tt.bracketBarL:
case tt.bracketHashL: {
Expand Down Expand Up @@ -1720,11 +1703,6 @@ export default class ExpressionParser extends LValParser {
node.properties.push(prop);
}

// The tokenizer uses `braceIsBlock` to detect whether `{` starts a block statement.
// If `{` is a block statement, `exprAllowed` will be `true`.
// However the tokenizer can not handle edge cases like `0 ? a : { a : 1 } / 2`, here
// we update `exprAllowed` when an object-like is parsed.
this.state.exprAllowed = false;
this.next();

this.state.inFSharpPipelineDirectBody = oldInFSharpPipelineDirectBody;
Expand Down Expand Up @@ -2514,17 +2492,30 @@ export default class ExpressionParser extends LValParser {
);

this.next();
if (
this.match(tt.semi) ||
(!this.match(tt.star) && !this.state.type.startsExpr) ||
this.hasPrecedingLineBreak()
) {
node.delegate = false;
node.argument = null;
} else {
node.delegate = this.eat(tt.star);
node.argument = this.parseMaybeAssign();
let delegating = false;
let argument = null;
if (!this.hasPrecedingLineBreak()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious about why

function* fn() {
  yield
  * []
}

is disallowed, it doesn't seem to be ambiguous 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I have no idea why it is disallowed, maybe @bakkot can shred some light here?

Copy link
Contributor

@bakkot bakkot Jun 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That decision was before my time, so I can only speculate. I agree it would still be unambiguous without the NLTH restriction. My best guess is that it's future-proofing, to reserve the possibility of introducing * as a prefix operator.

delegating = this.eat(tt.star);
switch (this.state.type) {
case tt.semi:
case tt.eof:
case tt.braceR:
case tt.parenR:
case tt.bracketR:
case tt.braceBarR:
case tt.colon:
case tt.comma:
Copy link
Contributor Author

@JLHwung JLHwung Jun 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original approach checks tt.semi and then excludes tokens with type.startsExpr. I think the new approach easier to reason about since it is an allowlist. This approach is taken from V8:

https://source.chromium.org/chromium/chromium/src/+/main:v8/src/parsing/parser-base.h;l=2979;drc=d17745f350d4956a07cb1113ee19e9cbc4be699f;bpv=1;bpt=1

However, V8 has tt._in here, which I think it is redundant because it seems to me that in never follow an AssignmentExpression. The spec has

[+In] RelationalExpression[?In, ?Yield, ?Await] in ShiftExpression
for ( LeftHandSideExpression in Expression ) Statement
for ( var ForBinding in Expression ) Statement
for ( ForDeclaration in Expression ) Statement

None of the productions before in can parsed down to an argument-less YieldExpression.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it's because of this?

function* fn() {
  a ? yield : 2
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I meant for tt._in. 🤦

// The above is the complete set of tokens that can
// follow an AssignmentExpression, and none of them
// can start an AssignmentExpression
if (!delegating) break;
/* fallthrough */
default:
argument = this.parseMaybeAssign();
}
}
node.delegate = delegating;
node.argument = argument;
return this.finishNode(node, "YieldExpression");
}

Expand Down
18 changes: 0 additions & 18 deletions packages/babel-parser/src/plugins/flow/index.js
Expand Up @@ -1710,10 +1710,6 @@ export default (superClass: Class<Parser>): Class<Parser> =>
this.state.inType = true;
const type = this.flowParseUnionType();
this.state.inType = oldInType;
// Ensure that a brace after a function generic type annotation is a
// statement, except in arrow functions (noAnonFunctionType)
this.state.exprAllowed =
this.state.exprAllowed || this.state.noAnonFunctionType;
return type;
}

Expand Down Expand Up @@ -3706,20 +3702,6 @@ export default (superClass: Class<Parser>): Class<Parser> =>
return this.finishNode(node, "EnumDeclaration");
}

updateContext(prevType: TokenType): void {
if (
this.match(tt.name) &&
this.state.value === "of" &&
prevType === tt.name &&
this.input.slice(this.state.lastTokStart, this.state.lastTokEnd) ===
"interface"
) {
this.state.exprAllowed = false;
} else {
super.updateContext(prevType);
}
}

// check if the next token is a tt.relation("<")
isLookaheadToken_lt(): boolean {
const next = this.nextTokenStart();
Expand Down
56 changes: 29 additions & 27 deletions packages/babel-parser/src/plugins/jsx/index.js
Expand Up @@ -45,29 +45,18 @@ const JsxErrors = makeErrorTemplates(

// Be aware that this file is always executed and not only when the plugin is enabled.
// Therefore this contexts and tokens do always exist.
tc.j_oTag = new TokContext("<tag", false);
tc.j_cTag = new TokContext("</tag", false);
tc.j_expr = new TokContext("<tag>...</tag>", true, true);
tc.j_oTag = new TokContext("<tag");
tc.j_cTag = new TokContext("</tag");
tc.j_expr = new TokContext("<tag>...</tag>", true);

tt.jsxName = new TokenType("jsxName");
tt.jsxText = new TokenType("jsxText", { beforeExpr: true });
tt.jsxTagStart = new TokenType("jsxTagStart", { startsExpr: true });
tt.jsxTagEnd = new TokenType("jsxTagEnd");

tt.jsxTagStart.updateContext = function () {
this.state.context.push(tc.j_expr); // treat as beginning of JSX expression
this.state.context.push(tc.j_oTag); // start opening tag context
this.state.exprAllowed = false;
};

tt.jsxTagEnd.updateContext = function (prevType) {
Copy link
Contributor Author

@JLHwung JLHwung Jun 8, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is merged with updateContext because it manipulates this.state.exprAllowed based on different contexts.

const out = this.state.context.pop();
if ((out === tc.j_oTag && prevType === tt.slash) || out === tc.j_cTag) {
this.state.context.pop();
this.state.exprAllowed = this.curContext() === tc.j_expr;
} else {
this.state.exprAllowed = true;
}
tt.jsxTagStart.updateContext = context => {
context.push(tc.j_expr); // treat as beginning of JSX expression
context.push(tc.j_oTag); // start opening tag context
};

function isFragment(object: ?N.JSXElement): boolean {
Expand Down Expand Up @@ -625,22 +614,35 @@ export default (superClass: Class<Parser>): Class<Parser> =>
}

updateContext(prevType: TokenType): void {
if (this.match(tt.braceL)) {
const curContext = this.curContext();
super.updateContext(prevType);
const { context, type } = this.state;
if (type === tt.braceL) {
const curContext = context[context.length - 1];
if (curContext === tc.j_oTag) {
this.state.context.push(tc.braceExpression);
context.push(tc.brace);
} else if (curContext === tc.j_expr) {
this.state.context.push(tc.templateQuasi);
} else {
super.updateContext(prevType);
context.push(tc.templateQuasi);
}
this.state.exprAllowed = true;
} else if (this.match(tt.slash) && prevType === tt.jsxTagStart) {
this.state.context.length -= 2; // do not consider JSX expr -> JSX open tag -> ... anymore
this.state.context.push(tc.j_cTag); // reconsider as closing tag context
} else if (type === tt.slash && prevType === tt.jsxTagStart) {
context.length -= 2; // do not consider JSX expr -> JSX open tag -> ... anymore
context.push(tc.j_cTag); // reconsider as closing tag context
this.state.exprAllowed = false;
} else if (type === tt.jsxTagEnd) {
const out = context.pop();
if ((out === tc.j_oTag && prevType === tt.slash) || out === tc.j_cTag) {
context.pop();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are we popping out here? j_expr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If out is j_oTag (<name></name>), context.pop is j_expr

If out is j_cTag (<name />), context.pop is the context before j_cTag, which is also the one before j_expr since we reinterpret j_expr, j_oTag as j_cTag when we see slash following jsxTagStart.

The logic here is was tt.jsxTagEnd.updateContext, I move it here so we can simplify the updateContext interface.

this.state.exprAllowed = context[context.length - 1] === tc.j_expr;
} else {
this.state.exprAllowed = true;
}
} else if (
type.keyword &&
(prevType === tt.dot || prevType === tt.questionDot)
) {
this.state.exprAllowed = false;
} else {
return super.updateContext(prevType);
this.state.exprAllowed = type.beforeExpr;
}
}
};
4 changes: 0 additions & 4 deletions packages/babel-parser/src/plugins/typescript/index.js
Expand Up @@ -1887,9 +1887,6 @@ export default (superClass: Class<Parser>): Class<Parser> =>
if (node.params.length === 0) {
this.raise(node.start, TSErrors.EmptyTypeArguments);
}
// This reads the next token after the `>` too, so do this in the enclosing context.
// But be sure not to parse a regex in the jsx expression `<C<number> />`, so set exprAllowed = false
this.state.exprAllowed = false;
this.expectRelational(">");
return this.finishNode(node, "TSTypeParameterInstantiation");
}
Expand Down Expand Up @@ -2046,7 +2043,6 @@ export default (superClass: Class<Parser>): Class<Parser> =>
state: N.ParseSubscriptState,
): N.Expression {
if (!this.hasPrecedingLineBreak() && this.match(tt.bang)) {
this.state.exprAllowed = false;
this.next();

const nonNullExpression: N.TsNonNullExpression = this.startNodeAt(
Expand Down