refactor(compiler): capture fullStart locations when tokenizing (#3…

…9486) This commit ensures that when leading whitespace is skipped by the tokenizer, the original start location (before skipping) is captured in the `fullStart` property of the token's source-span. PR Close #39486
angular · Nov 5, 2020 · 94d0013 · 94d0013
1 parent 92f833b
commit 94d0013
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 13 deletions.
diff --git a/packages/compiler/src/ml_parser/lexer.ts b/packages/compiler/src/ml_parser/lexer.ts
@@ -917,19 +917,20 @@ class PlainCharacterCursor implements CharacterCursor {
 
   getSpan(start?: this, leadingTriviaCodePoints?: number[]): ParseSourceSpan {
     start = start || this;
-    let cloned = false;
+    let fullStart = start;
     if (leadingTriviaCodePoints) {
       while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
-        if (!cloned) {
+        if (fullStart === start) {
           start = start.clone() as this;
-          cloned = true;
         }
         start.advance();
       }
     }
-    return new ParseSourceSpan(
-        new ParseLocation(start.file, start.state.offset, start.state.line, start.state.column),
-        new ParseLocation(this.file, this.state.offset, this.state.line, this.state.column));
+    const startLocation = this.locationFromCursor(start);
+    const endLocation = this.locationFromCursor(this);
+    const fullStartLocation =
+        fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
+    return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
   }
 
   getChars(start: this): string {
@@ -959,6 +960,11 @@ class PlainCharacterCursor implements CharacterCursor {
   protected updatePeek(state: CursorState): void {
     state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
   }
+
+  private locationFromCursor(cursor: this): ParseLocation {
+    return new ParseLocation(
+        cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
+  }
 }
 
 class EscapedCharacterCursor extends PlainCharacterCursor {

diff --git a/packages/compiler/test/ml_parser/lexer_spec.ts b/packages/compiler/test/ml_parser/lexer_spec.ts
@@ -54,14 +54,14 @@ import {ParseLocation, ParseSourceFile, ParseSourceSpan} from '../../src/parse_u
       });
 
       it('should skip over leading trivia for source-span start', () => {
-        expect(tokenizeAndHumanizeLineColumn(
-                   '<t>\n \t a</t>', {leadingTriviaChars: ['\n', ' ', '\t']}))
+        expect(
+            tokenizeAndHumanizeFullStart('<t>\n \t a</t>', {leadingTriviaChars: ['\n', ' ', '\t']}))
             .toEqual([
-              [lex.TokenType.TAG_OPEN_START, '0:0'],
-              [lex.TokenType.TAG_OPEN_END, '0:2'],
-              [lex.TokenType.TEXT, '1:3'],
-              [lex.TokenType.TAG_CLOSE, '1:4'],
-              [lex.TokenType.EOF, '1:8'],
+              [lex.TokenType.TAG_OPEN_START, '0:0', '0:0'],
+              [lex.TokenType.TAG_OPEN_END, '0:2', '0:2'],
+              [lex.TokenType.TEXT, '1:3', '0:3'],
+              [lex.TokenType.TAG_CLOSE, '1:4', '1:4'],
+              [lex.TokenType.EOF, '1:8', '1:8'],
             ]);
       });
     });
@@ -1560,6 +1560,14 @@ function tokenizeAndHumanizeLineColumn(input: string, options?: lex.TokenizeOpti
       .tokens.map(token => [<any>token.type, humanizeLineColumn(token.sourceSpan.start)]);
 }
 
+function tokenizeAndHumanizeFullStart(input: string, options?: lex.TokenizeOptions): any[] {
+  return tokenizeWithoutErrors(input, options)
+      .tokens.map(
+          token =>
+              [<any>token.type, humanizeLineColumn(token.sourceSpan.start),
+               humanizeLineColumn(token.sourceSpan.fullStart)]);
+}
+
 function tokenizeAndHumanizeErrors(input: string, options?: lex.TokenizeOptions): any[] {
   return lex.tokenize(input, 'someUrl', getHtmlTagDefinition, options)
       .errors.map(e => [<any>e.tokenType, e.msg, humanizeLineColumn(e.span.start)]);