diff --git a/babel.config.js b/babel.config.js index ebd74470fbfd..f4e2a8bcadb0 100644 --- a/babel.config.js +++ b/babel.config.js @@ -209,6 +209,7 @@ module.exports = function (api) { test: [ "packages/babel-generator", "packages/babel-plugin-proposal-decorators", + "packages/babel-helper-string-parser", ].map(normalize), plugins: ["babel-plugin-transform-charcodes"], }, diff --git a/lib/babel-packages.js.flow b/lib/babel-packages.js.flow index bffca14e73d5..a4818eefc624 100644 --- a/lib/babel-packages.js.flow +++ b/lib/babel-packages.js.flow @@ -220,3 +220,85 @@ declare module "@babel/plugin-transform-classes" { declare module "@babel/helper-compilation-targets" { declare module.exports: any; } + +declare module "@babel/helper-string-parser" { + declare export type StringContentsErrorHandlers = EscapedCharErrorHandlers & { + unterminated( + initialPos: number, + initialLineStart: number, + initialCurLine: number + ): void, + }; + declare export function readStringContents( + type: "single" | "double" | "template", + input: string, + pos: number, + lineStart: number, + curLine: number, + errors: StringContentsErrorHandlers + ): { + pos: number, + str: string, + containsInvalid: boolean, + lineStart: number, + curLine: number, + }; + + declare export type EscapedCharErrorHandlers = HexCharErrorHandlers & + CodePointErrorHandlers & { + strictNumericEscape(pos: number): void, + }; + + declare export function readEscapedChar( + input: string, + pos: number, + lineStart: number, + curLine: number, + inTemplate: boolean, + errors: EscapedCharErrorHandlers + ): { + pos: number, + ch: string | null, + lineStart: number, + curLine: number, + }; + + declare type HexCharErrorHandlers = IntErrorHandlers & { + invalidEscapeSequence(pos: number, startPos: number): void, + }; + + declare export type IntErrorHandlers = { + numericSeparatorInEscapeSequence(pos: number): void, + unexpectedNumericSeparator(pos: number): void, + // It can return "true" to indicate that the error was handled + // and the int parsing should continue. + invalidDigit(pos: number, radix: number): boolean, + }; + + declare export function readInt( + input: string, + pos: number, + radix: number, + len?: number, + forceLen: boolean, + allowNumSeparator: boolean | "bail", + errors: IntErrorHandlers + ): { + n: null | number, + pos: number, + }; + + declare export type CodePointErrorHandlers = HexCharErrorHandlers & { + invalidCodePoint(pos: number): void, + }; + + declare export function readCodePoint( + input: string, + pos: number, + throwOnInvalid: boolean, + errors: CodePointErrorHandlers + ): { + code: any, + pos: number, + }; +} diff --git a/packages/babel-helper-string-parser/.npmignore b/packages/babel-helper-string-parser/.npmignore new file mode 100644 index 000000000000..f9806945836e --- /dev/null +++ b/packages/babel-helper-string-parser/.npmignore @@ -0,0 +1,3 @@ +src +test +*.log diff --git a/packages/babel-helper-string-parser/README.md b/packages/babel-helper-string-parser/README.md new file mode 100644 index 000000000000..5a13b5fc9d58 --- /dev/null +++ b/packages/babel-helper-string-parser/README.md @@ -0,0 +1,19 @@ +# @babel/helper-string-parser + +> A utility package to parse strings + +See our website [@babel/helper-string-parser](https://babeljs.io/docs/en/babel-helper-string-parser) for more information. + +## Install + +Using npm: + +```sh +npm install --save @babel/helper-string-parser +``` + +or using yarn: + +```sh +yarn add @babel/helper-string-parser +``` diff --git a/packages/babel-helper-string-parser/package.json b/packages/babel-helper-string-parser/package.json new file mode 100644 index 000000000000..5c3668617064 --- /dev/null +++ b/packages/babel-helper-string-parser/package.json @@ -0,0 +1,36 @@ +{ + "name": "@babel/helper-string-parser", + "version": "7.18.6", + "description": "A utility package to parse strings", + "repository": { + "type": "git", + "url": "https://github.com/babel/babel.git", + "directory": "packages/babel-helper-string-parser" + }, + "homepage": "https://babel.dev/docs/en/next/babel-helper-string-parser", + "license": "MIT", + "publishConfig": { + "access": "public" + }, + "main": "./lib/index.js", + "devDependencies": { + "charcodes": "^0.2.0" + }, + "engines": { + "node": ">=6.9.0" + }, + "author": "The Babel Team (https://babel.dev/team)", + "conditions": { + "USE_ESM": [ + { + "type": "module" + }, + null + ] + }, + "exports": { + ".": "./lib/index.js", + "./package.json": "./package.json" + }, + "type": "commonjs" +} diff --git a/packages/babel-helper-string-parser/src/index.ts b/packages/babel-helper-string-parser/src/index.ts new file mode 100644 index 000000000000..edb89f7dc2ef --- /dev/null +++ b/packages/babel-helper-string-parser/src/index.ts @@ -0,0 +1,396 @@ +import * as charCodes from "charcodes"; + +// The following character codes are forbidden from being +// an immediate sibling of NumericLiteralSeparator _ +const forbiddenNumericSeparatorSiblings = { + decBinOct: new Set([ + charCodes.dot, + charCodes.uppercaseB, + charCodes.uppercaseE, + charCodes.uppercaseO, + charCodes.underscore, // multiple separators are not allowed + charCodes.lowercaseB, + charCodes.lowercaseE, + charCodes.lowercaseO, + ]), + hex: new Set([ + charCodes.dot, + charCodes.uppercaseX, + charCodes.underscore, // multiple separators are not allowed + charCodes.lowercaseX, + ]), +}; + +const isAllowedNumericSeparatorSibling = { + // 0 - 1 + bin: (ch: number) => ch === charCodes.digit0 || ch === charCodes.digit1, + + // 0 - 7 + oct: (ch: number) => ch >= charCodes.digit0 && ch <= charCodes.digit7, + + // 0 - 9 + dec: (ch: number) => ch >= charCodes.digit0 && ch <= charCodes.digit9, + + // 0 - 9, A - F, a - f, + hex: (ch: number) => + (ch >= charCodes.digit0 && ch <= charCodes.digit9) || + (ch >= charCodes.uppercaseA && ch <= charCodes.uppercaseF) || + (ch >= charCodes.lowercaseA && ch <= charCodes.lowercaseF), +}; + +export type StringContentsErrorHandlers = EscapedCharErrorHandlers & { + unterminated( + initialPos: number, + initialLineStart: number, + initialCurLine: number, + ): void; +}; + +export function readStringContents( + type: "single" | "double" | "template", + input: string, + pos: number, + lineStart: number, + curLine: number, + errors: StringContentsErrorHandlers, +) { + const initialPos = pos; + const initialLineStart = lineStart; + const initialCurLine = curLine; + + let out = ""; + let containsInvalid = false; + let chunkStart = pos; + const { length } = input; + for (;;) { + if (pos >= length) { + errors.unterminated(initialPos, initialLineStart, initialCurLine); + break; + } + const ch = input.charCodeAt(pos); + if (isStringEnd(type, ch, input, pos)) { + out += input.slice(chunkStart, pos); + break; + } + if (ch === charCodes.backslash) { + out += input.slice(chunkStart, pos); + let escaped; + ({ + ch: escaped, + pos, + lineStart, + curLine, + } = readEscapedChar( + input, + pos, + lineStart, + curLine, + type === "template", + errors, + )); + if (escaped === null) { + containsInvalid = true; + } else { + out += escaped; + } + chunkStart = pos; + } else if ( + ch === charCodes.lineSeparator || + ch === charCodes.paragraphSeparator + ) { + ++pos; + ++curLine; + lineStart = pos; + } else if (ch === charCodes.lineFeed || ch === charCodes.carriageReturn) { + if (type === "template") { + out += input.slice(chunkStart, pos) + "\n"; + ++pos; + if ( + ch === charCodes.carriageReturn && + input.charCodeAt(pos) === charCodes.lineFeed + ) { + ++pos; + } + ++curLine; + chunkStart = lineStart = pos; + } else { + errors.unterminated(initialPos, initialLineStart, initialCurLine); + } + } else { + ++pos; + } + } + return { pos, str: out, containsInvalid, lineStart, curLine }; +} + +function isStringEnd( + type: "single" | "double" | "template", + ch: number, + input: string, + pos: number, +) { + if (type === "template") { + return ( + ch === charCodes.graveAccent || + (ch === charCodes.dollarSign && + input.charCodeAt(pos + 1) === charCodes.leftCurlyBrace) + ); + } + return ( + ch === (type === "double" ? charCodes.quotationMark : charCodes.apostrophe) + ); +} + +export type EscapedCharErrorHandlers = HexCharErrorHandlers & + CodePointErrorHandlers & { + strictNumericEscape(pos: number): void; + }; + +export function readEscapedChar( + input: string, + pos: number, + lineStart: number, + curLine: number, + inTemplate: boolean, + errors: EscapedCharErrorHandlers, +) { + const throwOnInvalid = !inTemplate; + pos++; // skip '\' + + const res = (ch: string | null) => ({ pos, ch, lineStart, curLine }); + + const ch = input.charCodeAt(pos++); + switch (ch) { + case charCodes.lowercaseN: + return res("\n"); + case charCodes.lowercaseR: + return res("\r"); + case charCodes.lowercaseX: { + let code; + ({ code, pos } = readHexChar( + input, + pos, + 2, + false, + throwOnInvalid, + errors, + )); + return res(code === null ? null : String.fromCharCode(code)); + } + case charCodes.lowercaseU: { + let code; + ({ code, pos } = readCodePoint(input, pos, throwOnInvalid, errors)); + return res(code === null ? null : String.fromCodePoint(code)); + } + case charCodes.lowercaseT: + return res("\t"); + case charCodes.lowercaseB: + return res("\b"); + case charCodes.lowercaseV: + return res("\u000b"); + case charCodes.lowercaseF: + return res("\f"); + case charCodes.carriageReturn: + if (input.charCodeAt(pos) === charCodes.lineFeed) { + ++pos; + } + // fall through + case charCodes.lineFeed: + lineStart = pos; + ++curLine; + // fall through + case charCodes.lineSeparator: + case charCodes.paragraphSeparator: + return res(""); + case charCodes.digit8: + case charCodes.digit9: + if (inTemplate) { + return res(null); + } else { + errors.strictNumericEscape(pos - 1); + } + // fall through + default: + if (ch >= charCodes.digit0 && ch <= charCodes.digit7) { + const startPos = pos - 1; + const match = input.slice(startPos, pos + 2).match(/^[0-7]+/)!; + + let octalStr = match[0]; + + let octal = parseInt(octalStr, 8); + if (octal > 255) { + octalStr = octalStr.slice(0, -1); + octal = parseInt(octalStr, 8); + } + pos += octalStr.length - 1; + const next = input.charCodeAt(pos); + if ( + octalStr !== "0" || + next === charCodes.digit8 || + next === charCodes.digit9 + ) { + if (inTemplate) { + return res(null); + } else { + errors.strictNumericEscape(startPos); + } + } + + return res(String.fromCharCode(octal)); + } + + return res(String.fromCharCode(ch)); + } +} + +type HexCharErrorHandlers = IntErrorHandlers & { + invalidEscapeSequence(pos: number, startPos: number): void; +}; + +// Used to read character escape sequences ('\x', '\u'). +function readHexChar( + input: string, + pos: number, + len: number, + forceLen: boolean, + throwOnInvalid: boolean, + errors: HexCharErrorHandlers, +) { + const initialPos = pos; + let n; + ({ n, pos } = readInt(input, pos, 16, len, forceLen, false, errors)); + if (n === null) { + if (throwOnInvalid) { + errors.invalidEscapeSequence(pos, initialPos); + } else { + pos = initialPos - 1; + } + } + return { code: n, pos }; +} + +export type IntErrorHandlers = { + numericSeparatorInEscapeSequence(pos: number): void; + unexpectedNumericSeparator(pos: number): void; + // It can return "true" to indicate that the error was handled + // and the int parsing should continue. + invalidDigit(pos: number, radix: number): boolean; +}; + +export function readInt( + input: string, + pos: number, + radix: number, + len: number | undefined, + forceLen: boolean, + allowNumSeparator: boolean | "bail", + errors: IntErrorHandlers, +) { + const start = pos; + const forbiddenSiblings = + radix === 16 + ? forbiddenNumericSeparatorSiblings.hex + : forbiddenNumericSeparatorSiblings.decBinOct; + const isAllowedSibling = + radix === 16 + ? isAllowedNumericSeparatorSibling.hex + : radix === 10 + ? isAllowedNumericSeparatorSibling.dec + : radix === 8 + ? isAllowedNumericSeparatorSibling.oct + : isAllowedNumericSeparatorSibling.bin; + + let invalid = false; + let total = 0; + + for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) { + const code = input.charCodeAt(pos); + let val; + + if (code === charCodes.underscore && allowNumSeparator !== "bail") { + const prev = input.charCodeAt(pos - 1); + const next = input.charCodeAt(pos + 1); + + if (!allowNumSeparator) { + errors.numericSeparatorInEscapeSequence(pos); + } else if ( + Number.isNaN(next) || + !isAllowedSibling(next) || + forbiddenSiblings.has(prev) || + forbiddenSiblings.has(next) + ) { + errors.unexpectedNumericSeparator(pos); + } + + // Ignore this _ character + ++pos; + continue; + } + + if (code >= charCodes.lowercaseA) { + val = code - charCodes.lowercaseA + charCodes.lineFeed; + } else if (code >= charCodes.uppercaseA) { + val = code - charCodes.uppercaseA + charCodes.lineFeed; + } else if (charCodes.isDigit(code)) { + val = code - charCodes.digit0; // 0-9 + } else { + val = Infinity; + } + if (val >= radix) { + // If we found a digit which is too big, errors.invalidDigit can return true to avoid + // breaking the loop (this is used for error recovery). + if (val <= 9 && errors.invalidDigit(pos, radix)) { + val = 0; + } else if (forceLen) { + val = 0; + invalid = true; + } else { + break; + } + } + ++pos; + total = total * radix + val; + } + if (pos === start || (len != null && pos - start !== len) || invalid) { + return { n: null, pos }; + } + + return { n: total, pos }; +} + +export type CodePointErrorHandlers = HexCharErrorHandlers & { + invalidCodePoint(pos: number): void; +}; + +export function readCodePoint( + input: string, + pos: number, + throwOnInvalid: boolean, + errors: CodePointErrorHandlers, +) { + const ch = input.charCodeAt(pos); + let code; + + if (ch === charCodes.leftCurlyBrace) { + ++pos; + ({ code, pos } = readHexChar( + input, + pos, + input.indexOf("}", pos) - pos, + true, + throwOnInvalid, + errors, + )); + ++pos; + if (code !== null && code > 0x10ffff) { + if (throwOnInvalid) { + errors.invalidCodePoint(pos); + } else { + return { code: null, pos }; + } + } + } else { + ({ code, pos } = readHexChar(input, pos, 4, false, throwOnInvalid, errors)); + } + return { code, pos }; +} diff --git a/packages/babel-parser/package.json b/packages/babel-parser/package.json index 7cb6c186bce9..277bd8bcd920 100644 --- a/packages/babel-parser/package.json +++ b/packages/babel-parser/package.json @@ -37,6 +37,7 @@ "@babel/code-frame": "workspace:^", "@babel/helper-check-duplicate-nodes": "workspace:^", "@babel/helper-fixtures": "workspace:^", + "@babel/helper-string-parser": "workspace:^", "@babel/helper-validator-identifier": "workspace:^", "charcodes": "^0.2.0" }, diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index 7c24f2e148a5..0d400d2aa7b0 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -35,6 +35,17 @@ import { import State from "./state"; import type { LookaheadState, DeferredStrictError } from "./state"; +import { + readInt, + readCodePoint, + readEscapedChar, + readStringContents, + type IntErrorHandlers, + type CodePointErrorHandlers, + type EscapedCharErrorHandlers, + type StringContentsErrorHandlers, +} from "@babel/helper-string-parser"; + const VALID_REGEX_FLAGS = new Set([ charCodes.lowercaseG, charCodes.lowercaseM, @@ -47,45 +58,6 @@ const VALID_REGEX_FLAGS = new Set([ charCodes.lowercaseV, ]); -// The following character codes are forbidden from being -// an immediate sibling of NumericLiteralSeparator _ - -const forbiddenNumericSeparatorSiblings = { - decBinOct: new Set([ - charCodes.dot, - charCodes.uppercaseB, - charCodes.uppercaseE, - charCodes.uppercaseO, - charCodes.underscore, // multiple separators are not allowed - charCodes.lowercaseB, - charCodes.lowercaseE, - charCodes.lowercaseO, - ]), - hex: new Set([ - charCodes.dot, - charCodes.uppercaseX, - charCodes.underscore, // multiple separators are not allowed - charCodes.lowercaseX, - ]), -}; - -const isAllowedNumericSeparatorSibling = { - // 0 - 1 - bin: ch => ch === charCodes.digit0 || ch === charCodes.digit1, - - // 0 - 7 - oct: ch => ch >= charCodes.digit0 && ch <= charCodes.digit7, - - // 0 - 9 - dec: ch => ch >= charCodes.digit0 && ch <= charCodes.digit9, - - // 0 - 9, A - F, a - f, - hex: ch => - (ch >= charCodes.digit0 && ch <= charCodes.digit9) || - (ch >= charCodes.uppercaseA && ch <= charCodes.uppercaseF) || - (ch >= charCodes.lowercaseA && ch <= charCodes.lowercaseF), -}; - // Object type used to represent tokens. Note that normally, tokens // simply exist as properties on the parser object. This is only // used for the onToken callback and the external tokenizer. @@ -1145,92 +1117,20 @@ export default class Tokenizer extends CommentsParser { readInt( radix: number, len?: number, - forceLen?: boolean, + forceLen: boolean = false, allowNumSeparator: boolean | "bail" = true, ): number | null { - const start = this.state.pos; - const forbiddenSiblings = - radix === 16 - ? forbiddenNumericSeparatorSiblings.hex - : forbiddenNumericSeparatorSiblings.decBinOct; - const isAllowedSibling = - radix === 16 - ? isAllowedNumericSeparatorSibling.hex - : radix === 10 - ? isAllowedNumericSeparatorSibling.dec - : radix === 8 - ? isAllowedNumericSeparatorSibling.oct - : isAllowedNumericSeparatorSibling.bin; - - let invalid = false; - let total = 0; - - for (let i = 0, e = len == null ? Infinity : len; i < e; ++i) { - const code = this.input.charCodeAt(this.state.pos); - let val; - - if (code === charCodes.underscore && allowNumSeparator !== "bail") { - const prev = this.input.charCodeAt(this.state.pos - 1); - const next = this.input.charCodeAt(this.state.pos + 1); - - if (!allowNumSeparator) { - this.raise(Errors.NumericSeparatorInEscapeSequence, { - at: this.state.curPosition(), - }); - } else if ( - Number.isNaN(next) || - !isAllowedSibling(next) || - forbiddenSiblings.has(prev) || - forbiddenSiblings.has(next) - ) { - this.raise(Errors.UnexpectedNumericSeparator, { - at: this.state.curPosition(), - }); - } - - // Ignore this _ character - ++this.state.pos; - continue; - } - - if (code >= charCodes.lowercaseA) { - val = code - charCodes.lowercaseA + charCodes.lineFeed; - } else if (code >= charCodes.uppercaseA) { - val = code - charCodes.uppercaseA + charCodes.lineFeed; - } else if (charCodes.isDigit(code)) { - val = code - charCodes.digit0; // 0-9 - } else { - val = Infinity; - } - if (val >= radix) { - // If we are in "errorRecovery" mode and we found a digit which is too big, - // don't break the loop. - - if (this.options.errorRecovery && val <= 9) { - val = 0; - this.raise(Errors.InvalidDigit, { - at: this.state.curPosition(), - radix, - }); - } else if (forceLen) { - val = 0; - invalid = true; - } else { - break; - } - } - ++this.state.pos; - total = total * radix + val; - } - if ( - this.state.pos === start || - (len != null && this.state.pos - start !== len) || - invalid - ) { - return null; - } - - return total; + const { n, pos } = readInt( + this.input, + this.state.pos, + radix, + len, + forceLen, + allowNumSeparator, + this.errorHandlers_readInt, + ); + this.state.pos = pos; + return n; } readRadixNumber(radix: number): void { @@ -1375,63 +1275,29 @@ export default class Tokenizer extends CommentsParser { // Read a string value, interpreting backslash-escapes. readCodePoint(throwOnInvalid: boolean): number | null { - const ch = this.input.charCodeAt(this.state.pos); - let code; - - if (ch === charCodes.leftCurlyBrace) { - ++this.state.pos; - code = this.readHexChar( - this.input.indexOf("}", this.state.pos) - this.state.pos, - true, - throwOnInvalid, - ); - ++this.state.pos; - if (code !== null && code > 0x10ffff) { - if (throwOnInvalid) { - this.raise(Errors.InvalidCodePoint, { at: this.state.curPosition() }); - } else { - return null; - } - } - } else { - code = this.readHexChar(4, false, throwOnInvalid); - } + const { code, pos } = readCodePoint( + this.input, + this.state.pos, + throwOnInvalid, + this.errorHandlers_readCodePoint, + ); + this.state.pos = pos; return code; } readString(quote: number): void { - let out = "", - chunkStart = ++this.state.pos; - for (;;) { - if (this.state.pos >= this.length) { - throw this.raise(Errors.UnterminatedString, { - at: this.state.startLoc, - }); - } - const ch = this.input.charCodeAt(this.state.pos); - if (ch === quote) break; - if (ch === charCodes.backslash) { - out += this.input.slice(chunkStart, this.state.pos); - // $FlowFixMe - out += this.readEscapedChar(false); - chunkStart = this.state.pos; - } else if ( - ch === charCodes.lineSeparator || - ch === charCodes.paragraphSeparator - ) { - ++this.state.pos; - ++this.state.curLine; - this.state.lineStart = this.state.pos; - } else if (isNewLine(ch)) { - throw this.raise(Errors.UnterminatedString, { - at: this.state.startLoc, - }); - } else { - ++this.state.pos; - } - } - out += this.input.slice(chunkStart, this.state.pos++); - this.finishToken(tt.string, out); + const { str, pos, curLine, lineStart } = readStringContents( + quote === charCodes.quotationMark ? "double" : "single", + this.input, + this.state.pos + 1, // skip the quote + this.state.lineStart, + this.state.curLine, + this.errorHandlers_readStringContents_string, + ); + this.state.pos = pos + 1; // skip the quote + this.state.lineStart = lineStart; + this.state.curLine = curLine; + this.finishToken(tt.string, str); } // Reads template continuation `}...` @@ -1446,64 +1312,31 @@ export default class Tokenizer extends CommentsParser { // Reads template string tokens. readTemplateToken(): void { - let out = "", - chunkStart = this.state.pos, - containsInvalid = false; - ++this.state.pos; // eat '`' or `}` - for (;;) { - if (this.state.pos >= this.length) { - // FIXME: explain - throw this.raise(Errors.UnterminatedTemplate, { - at: createPositionWithColumnOffset(this.state.startLoc, 1), - }); - } - const ch = this.input.charCodeAt(this.state.pos); - if (ch === charCodes.graveAccent) { - ++this.state.pos; // eat '`' - out += this.input.slice(chunkStart, this.state.pos); - this.finishToken(tt.templateTail, containsInvalid ? null : out); - return; - } - if ( - ch === charCodes.dollarSign && - this.input.charCodeAt(this.state.pos + 1) === charCodes.leftCurlyBrace - ) { - this.state.pos += 2; // eat '${' - out += this.input.slice(chunkStart, this.state.pos); - this.finishToken(tt.templateNonTail, containsInvalid ? null : out); - return; - } - if (ch === charCodes.backslash) { - out += this.input.slice(chunkStart, this.state.pos); - const escaped = this.readEscapedChar(true); - if (escaped === null) { - containsInvalid = true; - } else { - out += escaped; - } - chunkStart = this.state.pos; - } else if (isNewLine(ch)) { - out += this.input.slice(chunkStart, this.state.pos); - ++this.state.pos; - switch (ch) { - case charCodes.carriageReturn: - if (this.input.charCodeAt(this.state.pos) === charCodes.lineFeed) { - ++this.state.pos; - } - // fall through - case charCodes.lineFeed: - out += "\n"; - break; - default: - out += String.fromCharCode(ch); - break; - } - ++this.state.curLine; - this.state.lineStart = this.state.pos; - chunkStart = this.state.pos; - } else { - ++this.state.pos; - } + const opening = this.input[this.state.pos]; + const { str, containsInvalid, pos, curLine, lineStart } = + readStringContents( + "template", + this.input, + this.state.pos + 1, // skip '`' or `}` + this.state.lineStart, + this.state.curLine, + this.errorHandlers_readStringContents_template, + ); + this.state.pos = pos + 1; // skip '`' or `$` + this.state.lineStart = lineStart; + this.state.curLine = curLine; + + if (this.input.codePointAt(pos) === charCodes.graveAccent) { + this.finishToken( + tt.templateTail, + containsInvalid ? null : opening + str + "`", + ); + } else { + this.state.pos++; // skip '{' + this.finishToken( + tt.templateNonTail, + containsInvalid ? null : opening + str + "${", + ); } } @@ -1522,115 +1355,18 @@ export default class Tokenizer extends CommentsParser { // Used to read escaped characters readEscapedChar(inTemplate: boolean): string | null { - const throwOnInvalid = !inTemplate; - const ch = this.input.charCodeAt(++this.state.pos); - ++this.state.pos; - switch (ch) { - case charCodes.lowercaseN: - return "\n"; - case charCodes.lowercaseR: - return "\r"; - case charCodes.lowercaseX: { - const code = this.readHexChar(2, false, throwOnInvalid); - return code === null ? null : String.fromCharCode(code); - } - case charCodes.lowercaseU: { - const code = this.readCodePoint(throwOnInvalid); - return code === null ? null : String.fromCodePoint(code); - } - case charCodes.lowercaseT: - return "\t"; - case charCodes.lowercaseB: - return "\b"; - case charCodes.lowercaseV: - return "\u000b"; - case charCodes.lowercaseF: - return "\f"; - case charCodes.carriageReturn: - if (this.input.charCodeAt(this.state.pos) === charCodes.lineFeed) { - ++this.state.pos; - } - // fall through - case charCodes.lineFeed: - this.state.lineStart = this.state.pos; - ++this.state.curLine; - // fall through - case charCodes.lineSeparator: - case charCodes.paragraphSeparator: - return ""; - case charCodes.digit8: - case charCodes.digit9: - if (inTemplate) { - return null; - } else { - this.recordStrictModeErrors(Errors.StrictNumericEscape, { - // We immediately follow a "\\", and we're an 8 or a 9, so we must - // be on the same line. - at: createPositionWithColumnOffset(this.state.curPosition(), -1), - }); - } - // fall through - default: - if (ch >= charCodes.digit0 && ch <= charCodes.digit7) { - // We immediately follow a "\\", and we're something between 0 and 7, - // so we must be on the same line. - const codePos = createPositionWithColumnOffset( - this.state.curPosition(), - -1, - ); - const match = this.input - .slice(this.state.pos - 1, this.state.pos + 2) - .match(/^[0-7]+/); - - // This is never null, because of the if condition above. - /*:: invariant(match !== null) */ - let octalStr = match[0]; - - let octal = parseInt(octalStr, 8); - if (octal > 255) { - octalStr = octalStr.slice(0, -1); - octal = parseInt(octalStr, 8); - } - this.state.pos += octalStr.length - 1; - const next = this.input.charCodeAt(this.state.pos); - if ( - octalStr !== "0" || - next === charCodes.digit8 || - next === charCodes.digit9 - ) { - if (inTemplate) { - return null; - } else { - this.recordStrictModeErrors(Errors.StrictNumericEscape, { - at: codePos, - }); - } - } - - return String.fromCharCode(octal); - } - - return String.fromCharCode(ch); - } - } - - // Used to read character escape sequences ('\x', '\u'). - - readHexChar( - len: number, - forceLen: boolean, - throwOnInvalid: boolean, - ): number | null { - const codeLoc = this.state.curPosition(); - const n = this.readInt(16, len, forceLen, false); - if (n === null) { - if (throwOnInvalid) { - this.raise(Errors.InvalidEscapeSequence, { at: codeLoc }); - } else { - this.state.pos = codeLoc.index - 1; - } - } - return n; + const { ch, pos, lineStart, curLine } = readEscapedChar( + this.input, + this.state.pos, + this.state.lineStart, + this.state.curLine, + inTemplate, + this.errorHandlers_readEscapedChar, + ); + this.state.pos = pos; + this.state.lineStart = lineStart; + this.state.curLine = curLine; + return ch; } // Read an identifier, and return it as a string. Sets `this.state.containsEsc` @@ -1803,4 +1539,85 @@ export default class Tokenizer extends CommentsParser { }); } } + + errorHandlers_readInt: IntErrorHandlers = { + invalidDigit: (pos, radix) => { + if (this.options.errorRecovery) { + this.state.pos = pos; + this.raise(Errors.InvalidDigit, { + at: this.state.curPosition(), + radix, + }); + // Continue parsing the number as if there was no invalid digit. + return true; + } + return false; + }, + numericSeparatorInEscapeSequence: pos => { + this.state.pos = pos; + this.raise(Errors.NumericSeparatorInEscapeSequence, { + at: this.state.curPosition(), + }); + }, + unexpectedNumericSeparator: pos => { + this.state.pos = pos; + this.raise(Errors.UnexpectedNumericSeparator, { + at: this.state.curPosition(), + }); + }, + }; + + errorHandlers_readCodePoint: CodePointErrorHandlers = { + ...this.errorHandlers_readInt, + invalidEscapeSequence: (pos, startPos) => { + this.state.pos = pos; + this.raise(Errors.InvalidEscapeSequence, { + at: createPositionWithColumnOffset( + this.state.curPosition(), + startPos - pos, + ), + }); + }, + invalidCodePoint: pos => { + this.state.pos = pos; + this.raise(Errors.InvalidCodePoint, { at: this.state.curPosition() }); + }, + }; + + // $FlowIgnore - flow doesn't like introducing required methods with ... + errorHandlers_readEscapedChar: EscapedCharErrorHandlers = { + ...this.errorHandlers_readCodePoint, + strictNumericEscape: pos => { + this.state.pos = pos; + this.recordStrictModeErrors(Errors.StrictNumericEscape, { + at: this.state.curPosition(), + }); + }, + }; + + // $FlowIgnore - flow doesn't like introducing required methods with ... + errorHandlers_readStringContents_string: StringContentsErrorHandlers = { + ...this.errorHandlers_readEscapedChar, + unterminated: (initialPos, initialLineStart, initialCurLine) => { + this.state.pos = initialPos - 1; // Report the error at the string quote + this.state.lineStart = initialLineStart; + this.state.curLine = initialCurLine; + throw this.raise(Errors.UnterminatedString, { + at: this.state.curPosition(), + }); + }, + }; + + // $FlowIgnore - flow doesn't like introducing required methods with ... + errorHandlers_readStringContents_template: StringContentsErrorHandlers = { + ...this.errorHandlers_readEscapedChar, + unterminated: (initialPos, initialLineStart, initialCurLine) => { + this.state.pos = initialPos; // TODO: For strings, we subtract 1 + this.state.lineStart = initialLineStart; + this.state.curLine = initialCurLine; + throw this.raise(Errors.UnterminatedTemplate, { + at: this.state.curPosition(), + }); + }, + }; } diff --git a/packages/babel-parser/test/fixtures/es2015/uncategorised/217/output.json b/packages/babel-parser/test/fixtures/es2015/uncategorised/217/output.json index 6a1f3c6d1010..aeda76823441 100644 --- a/packages/babel-parser/test/fixtures/es2015/uncategorised/217/output.json +++ b/packages/babel-parser/test/fixtures/es2015/uncategorised/217/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u{}", "raw": "\"\\u{}\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u{}" } diff --git a/packages/babel-parser/test/fixtures/es2015/uncategorised/218/output.json b/packages/babel-parser/test/fixtures/es2015/uncategorised/218/output.json index fdf7528a638d..442e4dbdef0b 100644 --- a/packages/babel-parser/test/fixtures/es2015/uncategorised/218/output.json +++ b/packages/babel-parser/test/fixtures/es2015/uncategorised/218/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u{FFFF", "raw": "\"\\u{FFFF\"", - "expressionValue": "nullFFF" + "expressionValue": "FFF" }, "value": "\\u{FFFF" } diff --git a/packages/babel-parser/test/fixtures/es2015/uncategorised/219/output.json b/packages/babel-parser/test/fixtures/es2015/uncategorised/219/output.json index dc729cf5a8b3..23b80161fbb8 100644 --- a/packages/babel-parser/test/fixtures/es2015/uncategorised/219/output.json +++ b/packages/babel-parser/test/fixtures/es2015/uncategorised/219/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u{FFZ}", "raw": "\"\\u{FFZ}\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u{FFZ}" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-00/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-00/output.json index dd583d183f97..2008022fd864 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-00/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-00/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\x", "raw": "\"\\x\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\x" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-01/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-01/output.json index 5bf165563d75..53f96019db42 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-01/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-01/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\x0", "raw": "\"\\x0\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\x0" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-02/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-02/output.json index b75fdcbcd30e..58fd0584ab5f 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-02/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-02/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\xx", "raw": "\"\\xx\"", - "expressionValue": "nullx" + "expressionValue": "x" }, "value": "\\xx" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-03/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-03/output.json index 4caf1f3aeeb8..5f30d2792b94 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-03/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-03/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u", "raw": "\"\\u\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-04/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-04/output.json index 8b467fd661fa..0479a6dc2a6c 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-04/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-04/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u0", "raw": "\"\\u0\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u0" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-05/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-05/output.json index aae81136b79d..bed7b99e8a2a 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-05/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-05/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\ux", "raw": "\"\\ux\"", - "expressionValue": "nullx" + "expressionValue": "x" }, "value": "\\ux" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-06/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-06/output.json index c5478bacc661..783bef7b7a3d 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-06/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-06/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u00", "raw": "\"\\u00\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u00" } diff --git a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-07/output.json b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-07/output.json index 25ea60d945a9..19e2d8ee4e43 100644 --- a/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-07/output.json +++ b/packages/babel-parser/test/fixtures/esprima/invalid-syntax/GH-1106-07/output.json @@ -20,7 +20,7 @@ "extra": { "rawValue": "\\u000", "raw": "\"\\u000\"", - "expressionValue": "null" + "expressionValue": "" }, "value": "\\u000" } diff --git a/tsconfig.json b/tsconfig.json index d2eba9b804cf..820156ac948e 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -30,6 +30,7 @@ "./packages/babel-helper-simple-access/src/**/*.ts", "./packages/babel-helper-skip-transparent-expression-wrappers/src/**/*.ts", "./packages/babel-helper-split-export-declaration/src/**/*.ts", + "./packages/babel-helper-string-parser/src/**/*.ts", "./packages/babel-helper-transform-fixture-test-runner/src/**/*.ts", "./packages/babel-helper-validator-identifier/src/**/*.ts", "./packages/babel-helper-validator-option/src/**/*.ts", @@ -249,6 +250,9 @@ "@babel/helper-split-export-declaration": [ "./packages/babel-helper-split-export-declaration/src" ], + "@babel/helper-string-parser": [ + "./packages/babel-helper-string-parser/src" + ], "@babel/helper-transform-fixture-test-runner": [ "./packages/babel-helper-transform-fixture-test-runner/src" ], diff --git a/yarn.lock b/yarn.lock index a82f687aa2e3..622fb1d76064 100644 --- a/yarn.lock +++ b/yarn.lock @@ -925,6 +925,14 @@ __metadata: languageName: unknown linkType: soft +"@babel/helper-string-parser@workspace:^, @babel/helper-string-parser@workspace:packages/babel-helper-string-parser": + version: 0.0.0-use.local + resolution: "@babel/helper-string-parser@workspace:packages/babel-helper-string-parser" + dependencies: + charcodes: ^0.2.0 + languageName: unknown + linkType: soft + "@babel/helper-transform-fixture-test-runner@workspace:^, @babel/helper-transform-fixture-test-runner@workspace:packages/babel-helper-transform-fixture-test-runner": version: 0.0.0-use.local resolution: "@babel/helper-transform-fixture-test-runner@workspace:packages/babel-helper-transform-fixture-test-runner" @@ -1063,6 +1071,7 @@ __metadata: "@babel/code-frame": "workspace:^" "@babel/helper-check-duplicate-nodes": "workspace:^" "@babel/helper-fixtures": "workspace:^" + "@babel/helper-string-parser": "workspace:^" "@babel/helper-validator-identifier": "workspace:^" charcodes: ^0.2.0 bin: