Skip to content

Commit

Permalink
fix: Avoid truncating last char from folded strings
Browse files Browse the repository at this point in the history
As described in goccy#433, if the last character of a folded string is the
very last character in the file (no trailing whitespace), it's not
included the token's `Value`.

While fixing this, I also noticed that the column information for the
folded and literal strings were the end rather than the beginning. I've
(hopefully) fixed that as well.

Fixes goccy#433

Signed-off-by: Charith Ellawala <charith@cerbos.dev>
  • Loading branch information
charithe committed Apr 29, 2024
1 parent 4653a1b commit aee0a87
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 4 deletions.
54 changes: 54 additions & 0 deletions lexer/lexer_test.go
Expand Up @@ -586,6 +586,60 @@ b: 1`,
},
},
},
{
name: "literal string without trailing whitespace",
src: `a: |
Text`,
expect: []testToken{
{
line: 1,
column: 1,
value: "a",
},
{
line: 1,
column: 2,
value: ":",
},
{
line: 1,
column: 4,
value: "|",
},
{
line: 2,
column: 3,
value: "Text",
},
},
},
{
name: "folded string without trailing whitespace",
src: `a: >
Text`,
expect: []testToken{
{
line: 1,
column: 1,
value: "a",
},
{
line: 1,
column: 2,
value: ":",
},
{
line: 1,
column: 4,
value: ">",
},
{
line: 2,
column: 3,
value: "Text",
},
},
},
}

for _, tc := range tests {
Expand Down
12 changes: 8 additions & 4 deletions scanner/scanner.go
Expand Up @@ -377,7 +377,7 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) {
case 'x':
if idx+3 >= size {
// TODO: need to return error
//err = xerrors.New("invalid escape character \\x")
// err = xerrors.New("invalid escape character \\x")
return
}
codeNum := hexRunesToInt(src[idx+2 : idx+4])
Expand All @@ -387,7 +387,7 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) {
case 'u':
if idx+5 >= size {
// TODO: need to return error
//err = xerrors.New("invalid escape character \\u")
// err = xerrors.New("invalid escape character \\u")
return
}
codeNum := hexRunesToInt(src[idx+2 : idx+6])
Expand All @@ -397,7 +397,7 @@ func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) {
case 'U':
if idx+9 >= size {
// TODO: need to return error
//err = xerrors.New("invalid escape character \\U")
// err = xerrors.New("invalid escape character \\U")
return
}
codeNum := hexRunesToInt(src[idx+2 : idx+10])
Expand Down Expand Up @@ -512,9 +512,13 @@ func (s *Scanner) scanLiteral(ctx *Context, c rune) {
if ctx.isEOS() {
if ctx.isLiteral {
ctx.addBuf(c)
} else if ctx.isFolded && !s.isNewLineChar(c) {
ctx.addBuf(c)
}
value := ctx.bufferedSrc()
ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
pos := s.pos()
pos.Column = s.docStartColumn
ctx.addToken(token.String(string(value), string(ctx.obuf), pos))
ctx.resetBuffer()
s.progressColumn(ctx, 1)
} else if s.isNewLineChar(c) {
Expand Down

0 comments on commit aee0a87

Please sign in to comment.