Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[parser] Disallow numeric separator in unicode scape sequences #10468

Merged
merged 7 commits into from Sep 23, 2019
26 changes: 21 additions & 5 deletions packages/babel-parser/src/tokenizer/index.js
Expand Up @@ -880,7 +880,11 @@ export default class Tokenizer extends LocationParser {
// were read, the integer value otherwise. When `len` is given, this
// will return `null` unless the integer has exactly `len` digits.

readInt(radix: number, len?: number): number | null {
readInt(
radix: number,
len?: number,
allowNumSeparator: boolean = true,
): number | null {
const start = this.state.pos;
const forbiddenSiblings =
radix === 16
Expand Down Expand Up @@ -917,6 +921,13 @@ export default class Tokenizer extends LocationParser {
this.raise(this.state.pos, "Invalid or unexpected token");
}

if (!allowNumSeparator) {
this.raise(
this.state.pos,
"Numeric separators are not allowed inside unicode escape sequences or hex escape sequences",
);
}

// Ignore this _ character
++this.state.pos;
continue;
Expand Down Expand Up @@ -1058,6 +1069,7 @@ export default class Tokenizer extends LocationParser {
code = this.readHexChar(
this.input.indexOf("}", this.state.pos) - this.state.pos,
throwOnInvalid,
false,
);
++this.state.pos;
if (code === null) {
Expand All @@ -1072,7 +1084,7 @@ export default class Tokenizer extends LocationParser {
}
}
} else {
code = this.readHexChar(4, throwOnInvalid);
code = this.readHexChar(4, throwOnInvalid, false);
}
return code;
}
Expand Down Expand Up @@ -1184,7 +1196,7 @@ export default class Tokenizer extends LocationParser {
case charCodes.lowercaseR:
return "\r";
case charCodes.lowercaseX: {
const code = this.readHexChar(2, throwOnInvalid);
const code = this.readHexChar(2, throwOnInvalid, false);
return code === null ? null : String.fromCharCode(code);
}
case charCodes.lowercaseU: {
Expand Down Expand Up @@ -1250,9 +1262,13 @@ export default class Tokenizer extends LocationParser {

// Used to read character escape sequences ('\x', '\u').

readHexChar(len: number, throwOnInvalid: boolean): number | null {
readHexChar(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

readHexChar is only used for \x and \u, so we don't have to expose allowNumSeparator to its caller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 Good catch. So, now it is exposed to readInt's callers and changed to false when called from readHexChar.

len: number,
throwOnInvalid: boolean,
allowNumSeparator: boolean = true,
): number | null {
const codePos = this.state.pos;
const n = this.readInt(16, len);
const n = this.readInt(16, len, allowNumSeparator);
if (n === null) {
if (throwOnInvalid) {
this.raise(codePos, "Bad character escape sequence");
Expand Down
@@ -0,0 +1 @@
"\x1_0";
@@ -0,0 +1 @@
{ "throws": "Numeric separators are not allowed inside unicode escape sequences or hex escape sequences (1:4)" }
@@ -0,0 +1 @@
"\u12_34"
@@ -0,0 +1 @@
{ "throws": "Numeric separators are not allowed inside unicode escape sequences or hex escape sequences (1:5)" }
@@ -0,0 +1 @@
"\u{1F_639}"
@@ -0,0 +1 @@
{ "throws": "Numeric separators are not allowed inside unicode escape sequences or hex escape sequences (1:6)" }
8 changes: 0 additions & 8 deletions scripts/tests/test262/test262_whitelist.txt
Expand Up @@ -104,10 +104,6 @@ language/expressions/object/method-definition/private-name-early-error-gen-insid
language/expressions/object/method-definition/private-name-early-error-gen-inside-class.js(strict mode)
language/expressions/object/method-definition/private-name-early-error-method-inside-class.js(default)
language/expressions/object/method-definition/private-name-early-error-method-inside-class.js(strict mode)
language/expressions/template-literal/unicode-escape-nls-err.js(default)
language/expressions/template-literal/unicode-escape-nls-err.js(strict mode)
language/identifiers/unicode-escape-nls-err.js(default)
language/identifiers/unicode-escape-nls-err.js(strict mode)
language/literals/bigint/non-octal-like-invalid-0008n.js(default)
language/literals/bigint/non-octal-like-invalid-012348n.js(default)
language/literals/bigint/non-octal-like-invalid-08n.js(default)
Expand All @@ -128,10 +124,6 @@ language/literals/numeric/numeric-separators/numeric-separator-literal-nonoctal-
language/literals/numeric/numeric-separators/numeric-separator-literal-nonoctal-0_8-err.js(default)
language/literals/numeric/numeric-separators/numeric-separator-literal-nonoctal-0_9-err.js(default)
language/literals/numeric/numeric-separators/numeric-separator-literal-nzd-nsl-dds-leading-zero-err.js(default)
language/literals/string/unicode-escape-nls-err-double.js(default)
language/literals/string/unicode-escape-nls-err-double.js(strict mode)
language/literals/string/unicode-escape-nls-err-single.js(default)
language/literals/string/unicode-escape-nls-err-single.js(strict mode)
language/module-code/privatename-not-valid-earlyerr-module-1.js(default)
language/module-code/privatename-not-valid-earlyerr-module-1.js(strict mode)
language/module-code/privatename-not-valid-earlyerr-module-2.js(default)
Expand Down