Skip to content

Commit

Permalink
refactor(compiler): tidy up interpolation splitting
Browse files Browse the repository at this point in the history
When parsing for i18n messages, interpolated strings are
split into `Text` and `Placeholder` pieces.  The method that
does this `_visitTextWithInterpolation()` was becoming too
complex. This commit refactors that method along with some
associated functions that it uses.
  • Loading branch information
petebacondarwin committed Nov 19, 2020
1 parent 2692411 commit 9ee0012
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 96 deletions.
80 changes: 40 additions & 40 deletions packages/compiler/src/expression_parser/parser.ts
Expand Up @@ -13,10 +13,14 @@ import {escapeRegExp} from '../util';
import {AbsoluteSourceSpan, AST, AstVisitor, ASTWithSource, Binary, BindingPipe, Chain, Conditional, EmptyExpr, ExpressionBinding, FunctionCall, ImplicitReceiver, Interpolation, KeyedRead, KeyedWrite, LiteralArray, LiteralMap, LiteralMapKey, LiteralPrimitive, MethodCall, NonNullAssert, ParserError, ParseSpan, PrefixNot, PropertyRead, PropertyWrite, Quote, RecursiveAstVisitor, SafeMethodCall, SafePropertyRead, TemplateBinding, TemplateBindingIdentifier, ThisReceiver, Unary, VariableBinding} from './ast';
import {EOF, isIdentifier, isQuote, Lexer, Token, TokenType} from './lexer';

export interface InterpolationPiece {
text: string;
start: number;
end: number;
}
export class SplitInterpolation {
constructor(
public strings: string[], public stringSpans: {start: number, end: number}[],
public expressions: string[], public expressionsSpans: {start: number, end: number}[],
public strings: InterpolationPiece[], public expressions: InterpolationPiece[],
public offsets: number[]) {}
}

Expand Down Expand Up @@ -48,7 +52,7 @@ export class Parser {
simpleExpressionChecker = SimpleExpressionChecker;

parseAction(
input: string, location: any, absoluteOffset: number,
input: string, location: string, absoluteOffset: number,
interpolationConfig: InterpolationConfig = DEFAULT_INTERPOLATION_CONFIG): ASTWithSource {
this._checkNoInterpolation(input, location, interpolationConfig);
const sourceToLex = this._stripComments(input);
Expand All @@ -61,7 +65,7 @@ export class Parser {
}

parseBinding(
input: string, location: any, absoluteOffset: number,
input: string, location: string, absoluteOffset: number,
interpolationConfig: InterpolationConfig = DEFAULT_INTERPOLATION_CONFIG): ASTWithSource {
const ast = this._parseBindingAst(input, location, absoluteOffset, interpolationConfig);
return new ASTWithSource(ast, input, location, absoluteOffset, this.errors);
Expand All @@ -85,7 +89,7 @@ export class Parser {
return new ASTWithSource(ast, input, location, absoluteOffset, this.errors);
}

private _reportError(message: string, input: string, errLocation: string, ctxLocation?: any) {
private _reportError(message: string, input: string, errLocation: string, ctxLocation?: string) {
this.errors.push(new ParserError(message, input, errLocation, ctxLocation));
}

Expand All @@ -109,7 +113,7 @@ export class Parser {
.parseChain();
}

private _parseQuote(input: string|null, location: any, absoluteOffset: number): AST|null {
private _parseQuote(input: string|null, location: string, absoluteOffset: number): AST|null {
if (input == null) return null;
const prefixSeparatorIndex = input.indexOf(':');
if (prefixSeparatorIndex == -1) return null;
Expand Down Expand Up @@ -161,33 +165,35 @@ export class Parser {
}

parseInterpolation(
input: string, location: any, absoluteOffset: number,
input: string, location: string, absoluteOffset: number,
interpolationConfig: InterpolationConfig = DEFAULT_INTERPOLATION_CONFIG): ASTWithSource|null {
const split = this.splitInterpolation(input, location, interpolationConfig);
if (split == null) return null;
const {strings, expressions, offsets} =
this.splitInterpolation(input, location, interpolationConfig);
if (expressions.length === 0) return null;

const expressions: AST[] = [];
const expressionNodes: AST[] = [];

for (let i = 0; i < split.expressions.length; ++i) {
const expressionText = split.expressions[i];
for (let i = 0; i < expressions.length; ++i) {
const expressionText = expressions[i].text;
const sourceToLex = this._stripComments(expressionText);
const tokens = this._lexer.tokenize(sourceToLex);
const ast = new _ParseAST(
input, location, absoluteOffset, tokens, sourceToLex.length, false,
this.errors, split.offsets[i] + (expressionText.length - sourceToLex.length))
this.errors, offsets[i] + (expressionText.length - sourceToLex.length))
.parseChain();
expressions.push(ast);
expressionNodes.push(ast);
}

return this.createInterpolationAst(split.strings, expressions, input, location, absoluteOffset);
return this.createInterpolationAst(
strings.map(s => s.text), expressionNodes, input, location, absoluteOffset);
}

/**
* Similar to `parseInterpolation`, but treats the provided string as a single expression
* element that would normally appear within the interpolation prefix and suffix (`{{` and `}}`).
* This is used for parsing the switch expression in ICUs.
*/
parseInterpolationExpression(expression: string, location: any, absoluteOffset: number):
parseInterpolationExpression(expression: string, location: string, absoluteOffset: number):
ASTWithSource {
const sourceToLex = this._stripComments(expression);
const tokens = this._lexer.tokenize(sourceToLex);
Expand Down Expand Up @@ -217,13 +223,10 @@ export class Parser {
*/
splitInterpolation(
input: string, location: string,
interpolationConfig: InterpolationConfig = DEFAULT_INTERPOLATION_CONFIG): SplitInterpolation
|null {
const strings: string[] = [];
const expressions: string[] = [];
interpolationConfig: InterpolationConfig = DEFAULT_INTERPOLATION_CONFIG): SplitInterpolation {
const strings: InterpolationPiece[] = [];
const expressions: InterpolationPiece[] = [];
const offsets: number[] = [];
const stringSpans: {start: number, end: number}[] = [];
const expressionSpans: {start: number, end: number}[] = [];
let i = 0;
let atInterpolation = false;
let extendLastString = false;
Expand All @@ -236,9 +239,8 @@ export class Parser {
if (i === -1) {
i = input.length;
}
const part = input.substring(start, i);
strings.push(part);
stringSpans.push({start, end: i});
const text = input.substring(start, i);
strings.push({text, start, end: i});

atInterpolation = true;
} else {
Expand All @@ -255,17 +257,16 @@ export class Parser {
}
const fullEnd = exprEnd + interpEnd.length;

const part = input.substring(exprStart, exprEnd);
if (part.trim().length > 0) {
expressions.push(part);
const text = input.substring(exprStart, exprEnd);
if (text.trim().length > 0) {
expressions.push({text, start: fullStart, end: fullEnd});
} else {
this._reportError(
'Blank expressions are not allowed in interpolated strings', input,
`at column ${i} in`, location);
expressions.push('$implicit');
expressions.push({text: '$implicit', start: fullStart, end: fullEnd});
}
offsets.push(exprStart);
expressionSpans.push({start: fullStart, end: fullEnd});

i = fullEnd;
atInterpolation = false;
Expand All @@ -274,19 +275,18 @@ export class Parser {
if (!atInterpolation) {
// If we are now at a text section, add the remaining content as a raw string.
if (extendLastString) {
strings[strings.length - 1] += input.substring(i);
stringSpans[stringSpans.length - 1].end = input.length;
const piece = strings[strings.length - 1];
piece.text += input.substring(i);
piece.end = input.length;
} else {
strings.push(input.substring(i));
stringSpans.push({start: i, end: input.length});
strings.push({text: input.substring(i), start: i, end: input.length});
}
}
return expressions.length === 0 ?
null :
new SplitInterpolation(strings, stringSpans, expressions, expressionSpans, offsets);
return new SplitInterpolation(strings, expressions, offsets);
}

wrapLiteralPrimitive(input: string|null, location: any, absoluteOffset: number): ASTWithSource {
wrapLiteralPrimitive(input: string|null, location: string, absoluteOffset: number):
ASTWithSource {
const span = new ParseSpan(0, input == null ? 0 : input.length);
return new ASTWithSource(
new LiteralPrimitive(span, span.toAbsolute(absoluteOffset), input), input, location,
Expand Down Expand Up @@ -316,7 +316,7 @@ export class Parser {
}

private _checkNoInterpolation(
input: string, location: any, interpolationConfig: InterpolationConfig): void {
input: string, location: string, interpolationConfig: InterpolationConfig): void {
const regexp = _getInterpolateRegExp(interpolationConfig);
const parts = input.split(regexp);
if (parts.length > 1) {
Expand Down Expand Up @@ -374,7 +374,7 @@ export class _ParseAST {
index: number = 0;

constructor(
public input: string, public location: any, public absoluteOffset: number,
public input: string, public location: string, public absoluteOffset: number,
public tokens: Token[], public inputLength: number, public parseAction: boolean,
private errors: ParserError[], private offset: number) {}

Expand Down
146 changes: 95 additions & 51 deletions packages/compiler/src/i18n/i18n_parser.ts
Expand Up @@ -7,7 +7,7 @@
*/

import {Lexer as ExpressionLexer} from '../expression_parser/lexer';
import {Parser as ExpressionParser} from '../expression_parser/parser';
import {InterpolationPiece, Parser as ExpressionParser} from '../expression_parser/parser';
import * as html from '../ml_parser/ast';
import {getHtmlTagDefinition} from '../ml_parser/html_tags';
import {InterpolationConfig} from '../ml_parser/interpolation_config';
Expand Down Expand Up @@ -156,73 +156,110 @@ class _I18nVisitor implements html.Visitor {
throw new Error('Unreachable code');
}

/**
* Split the, potentially interpolated, text up into text and placeholder pieces.
*
* @param text The potentially interpolated string to be split.
* @param sourceSpan The span of the whole of the `text` string.
* @param context The current context of the visitor, used to compute and store placeholders.
* @param previousI18n Any i18n metadata associated with this `text` from a previous pass.
*/
private _visitTextWithInterpolation(
text: string, sourceSpan: ParseSourceSpan, context: I18nMessageVisitorContext,
previousI18n: i18n.I18nMeta|undefined): i18n.Node {
const splitInterpolation = this._expressionParser.splitInterpolation(
const {strings, expressions} = this._expressionParser.splitInterpolation(
text, sourceSpan.start.toString(), this._interpolationConfig);

if (!splitInterpolation) {
// No expression, return a single text
// No expressions, return a single text.
if (expressions.length === 0) {
return new i18n.Text(text, sourceSpan);
}

// Return a group of text + expressions
// Return a sequence of `Text` and `Placeholder` nodes grouped in a `Container`.
const nodes: i18n.Node[] = [];
const container = new i18n.Container(nodes, sourceSpan);
const {start: sDelimiter, end: eDelimiter} = this._interpolationConfig;

for (let i = 0; i < splitInterpolation.strings.length - 1; i++) {
const expression = splitInterpolation.expressions[i];
const baseName = _extractPlaceholderName(expression) || 'INTERPOLATION';
const phName = context.placeholderRegistry.getPlaceholderName(baseName, expression);

if (splitInterpolation.strings[i].length) {
// No need to add empty strings
const stringSpan = getOffsetSourceSpan(sourceSpan, splitInterpolation.stringSpans[i]);
nodes.push(new i18n.Text(splitInterpolation.strings[i], stringSpan));
}

const expressionSpan =
getOffsetSourceSpan(sourceSpan, splitInterpolation.expressionsSpans[i]);
nodes.push(new i18n.Placeholder(expression, phName, expressionSpan));
context.placeholderToContent[phName] = {
text: sDelimiter + expression + eDelimiter,
sourceSpan: expressionSpan,
};
for (let i = 0; i < strings.length - 1; i++) {
this._addText(nodes, strings[i], sourceSpan);
this._addPlaceholder(nodes, context, expressions[i], sourceSpan);
}

// The last index contains no expression
const lastStringIdx = splitInterpolation.strings.length - 1;
if (splitInterpolation.strings[lastStringIdx].length) {
const stringSpan =
getOffsetSourceSpan(sourceSpan, splitInterpolation.stringSpans[lastStringIdx]);
nodes.push(new i18n.Text(splitInterpolation.strings[lastStringIdx], stringSpan));
}
this._addText(nodes, strings[strings.length - 1], sourceSpan);

if (previousI18n instanceof i18n.Message) {
// The `previousI18n` is an i18n `Message`, so we are processing an `Attribute` with i18n
// metadata. The `Message` should consist only of a single `Container` that contains the
// parts (`Text` and `Placeholder`) to process.
assertSingleContainerMessage(previousI18n);
previousI18n = previousI18n.nodes[0];
}
// Whitespace removal may have invalidated the interpolation source-spans.
reusePreviousSourceSpans(nodes, previousI18n);

if (previousI18n instanceof i18n.Container) {
// The `previousI18n` is a `Container`, which means that this is a second i18n extraction pass
// after whitespace has been removed from the AST ndoes.
assertEquivalentNodes(previousI18n.children, nodes);
return new i18n.Container(nodes, sourceSpan);
}

// Reuse the source-spans from the first pass.
for (let i = 0; i < nodes.length; i++) {
nodes[i].sourceSpan = previousI18n.children[i].sourceSpan;
}
/**
* Create a new `Text` node from the `textPiece` and add it to the `nodes` collection.
*
* @param nodes The nodes to which the created `Text` node should be added.
* @param textPiece The text and relative span information for this `Text` node.
* @param interpolationSpan The span of the whole interpolated text.
*/
private _addText(
nodes: i18n.Node[], textPiece: InterpolationPiece, interpolationSpan: ParseSourceSpan): void {
if (textPiece.text.length > 0) {
// No need to add empty strings
const stringSpan = getOffsetSourceSpan(interpolationSpan, textPiece);
nodes.push(new i18n.Text(textPiece.text, stringSpan));
}
}

return container;
/**
* Create a new `Placeholder` node from the `expression` and add it to the `nodes` collection.
*
* @param nodes The nodes to which the created `Text` node should be added.
* @param context The current context of the visitor, used to compute and store placeholders.
* @param expression The expression text and relative span information for this `Placeholder`
* node.
* @param interpolationSpan The span of the whole interpolated text.
*/
private _addPlaceholder(
nodes: i18n.Node[], context: I18nMessageVisitorContext, expression: InterpolationPiece,
interpolationSpan: ParseSourceSpan): void {
const sourceSpan = getOffsetSourceSpan(interpolationSpan, expression);
const baseName = extractPlaceholderName(expression.text) || 'INTERPOLATION';
const phName = context.placeholderRegistry.getPlaceholderName(baseName, expression.text);
const text = this._interpolationConfig.start + expression.text + this._interpolationConfig.end;
context.placeholderToContent[phName] = {text, sourceSpan};
nodes.push(new i18n.Placeholder(expression.text, phName, sourceSpan));
}
}

/**
* Re-use the source-spans from `previousI18n` metadata for the `nodes`.
*
* Whitespace removal can invalidate the source-spans of interpolation nodes, so we
* reuse the source-span stored from a previous pass before the whitespace was removed.
*
* @param nodes The `Text` and `Placeholder` nodes to be processed.
* @param previousI18n Any i18n metadata for these `nodes` stored from a previous pass.
*/
function reusePreviousSourceSpans(nodes: i18n.Node[], previousI18n: i18n.I18nMeta|undefined): void {
if (previousI18n instanceof i18n.Message) {
// The `previousI18n` is an i18n `Message`, so we are processing an `Attribute` with i18n
// metadata. The `Message` should consist only of a single `Container` that contains the
// parts (`Text` and `Placeholder`) to process.
assertSingleContainerMessage(previousI18n);
previousI18n = previousI18n.nodes[0];
}

if (previousI18n instanceof i18n.Container) {
// The `previousI18n` is a `Container`, which means that this is a second i18n extraction pass
// after whitespace has been removed from the AST ndoes.
assertEquivalentNodes(previousI18n.children, nodes);

// Reuse the source-spans from the first pass.
for (let i = 0; i < nodes.length; i++) {
nodes[i].sourceSpan = previousI18n.children[i].sourceSpan;
}
}
}

/**
* Asserts that the `message` contains exactly one `Container` node.
*/
function assertSingleContainerMessage(message: i18n.Message): void {
const nodes = message.nodes;
if (nodes.length !== 1 || !(nodes[0] instanceof i18n.Container)) {
Expand All @@ -231,6 +268,10 @@ function assertSingleContainerMessage(message: i18n.Message): void {
}
}

/**
* Asserts that the `previousNodes` and `node` collections have the same number of elements and
* corresponding elements have the same node type.
*/
function assertEquivalentNodes(previousNodes: i18n.Node[], nodes: i18n.Node[]): void {
if (previousNodes.length !== nodes.length) {
throw new Error('The number of i18n message children changed between first and second pass.');
Expand All @@ -241,14 +282,17 @@ function assertEquivalentNodes(previousNodes: i18n.Node[], nodes: i18n.Node[]):
}
}

/**
* Create a new `ParseSourceSpan` from the `sourceSpan`, offset by the `start` and `end` values.
*/
function getOffsetSourceSpan(
sourceSpan: ParseSourceSpan, {start, end}: {start: number, end: number}): ParseSourceSpan {
sourceSpan: ParseSourceSpan, {start, end}: InterpolationPiece): ParseSourceSpan {
return new ParseSourceSpan(sourceSpan.fullStart.moveBy(start), sourceSpan.fullStart.moveBy(end));
}

const _CUSTOM_PH_EXP =
/\/\/[\s\S]*i18n[\s\S]*\([\s\S]*ph[\s\S]*=[\s\S]*("|')([\s\S]*?)\1[\s\S]*\)/g;

function _extractPlaceholderName(input: string): string {
function extractPlaceholderName(input: string): string {
return input.split(_CUSTOM_PH_EXP)[2];
}

0 comments on commit 9ee0012

Please sign in to comment.