diff --git a/compiler_test.go b/compiler_test.go index d12240aa..0f6463b0 100644 --- a/compiler_test.go +++ b/compiler_test.go @@ -2,6 +2,7 @@ package gojq_test import ( "context" + "errors" "fmt" "log" "os" @@ -275,6 +276,36 @@ func TestCodeCompile_OptimizeJumps(t *testing.T) { } } +func TestParseErrorTokenOffset(t *testing.T) { + testCases := []struct { + src string + offset int + }{ + {src: "^", offset: 1}, + {src: " ^", offset: 2}, + {src: " ^ ", offset: 2}, + {src: "πŸ‘", offset: 4}, + {src: " πŸ‘", offset: 5}, + {src: " πŸ‘ ", offset: 5}, + {src: "testπŸ‘", offset: 8}, + } + for _, tc := range testCases { + t.Run(tc.src, func(t *testing.T) { + _, err := gojq.Parse(tc.src) + if err == nil { + t.Fatal("expected: error") + } + var pe *gojq.ParseError + if !errors.As(err, &pe) { + t.Fatalf("expected: *gojq.ParseError, got %v", err) + } + if pe.Offset != tc.offset { + t.Fatalf("expected: %v, got %v", tc.offset, pe.Offset) + } + }) + } +} + func TestCodeRun_Race(t *testing.T) { query, err := gojq.Parse("range(10)") if err != nil { diff --git a/lexer.go b/lexer.go index 509d60c6..0c2efd12 100644 --- a/lexer.go +++ b/lexer.go @@ -235,7 +235,8 @@ func (l *lexer) Lex(lval *yySymType) (tokenType int) { default: if ch >= utf8.RuneSelf { r, size := utf8.DecodeRuneInString(l.source[l.offset-1:]) - l.offset += size + // -1 to adjust for first byte consumed by next() + l.offset += size - 1 l.token = string(r) } }