Add "extended errors"

The lexer and parser now always return a ParseError; the Error() method is mostly unchanged: toml: line 1 (last key "x.key"): newlines not allowed within inline tables This adds an Extended() method, which will add some context where the error occurred, similar to e.g. clang or the Rust compiler: toml: error: newlines not allowed within inline tables At line 1, column 18: 1 | x = [{ key = 42 # ^ And the ExtendedUsage() also adds some usage guidance (not always present): toml: error: newlines not allowed within inline tables At line 1, column 16: 1 | x = [{ key = 42 ^ Error help: Inline tables must always be on a single line: table = {key = 42, second = 43} It is invalid to split them over multiple lines like so: # INVALID table = { key = 42, second = 43 } Use regular for this: [table] key = 42 second = 43 The line/column information should now also always be correct, and a number of error message have been tweaked a bit. Fixes #158 Fixes #201 Fixes #217
BurntSushi · Sep 23, 2021 · d6f7ab6 · d6f7ab6
1 parent e0af6a2
commit d6f7ab6
Show file tree

Hide file tree

Showing 6 changed files with 452 additions and 128 deletions.
diff --git a/decode_test.go b/decode_test.go
@@ -1,7 +1,6 @@
 package toml
 
 import (
-	"errors"
 	"fmt"
 	"io/ioutil"
 	"os"
@@ -710,39 +709,6 @@ func TestDecodeDatetime(t *testing.T) {
 	}
 }
 
-func TestParseError(t *testing.T) {
-	file :=
-		`a = "a"
-b = "b"
-c = 001  # invalid
-`
-
-	var s struct {
-		A, B string
-		C    int
-	}
-	_, err := Decode(file, &s)
-	if err == nil {
-		t.Fatal("err is nil")
-	}
-
-	var pErr ParseError
-	if !errors.As(err, &pErr) {
-		t.Fatalf("err is not a ParseError: %T %[1]v", err)
-	}
-
-	want := ParseError{
-		Line:    3,
-		LastKey: "c",
-		Message: `Invalid integer "001": cannot have leading zeroes`,
-	}
-	if !strings.Contains(pErr.Message, want.Message) ||
-		pErr.Line != want.Line ||
-		pErr.LastKey != want.LastKey {
-		t.Errorf("unexpected data\nhave: %#v\nwant: %#v", pErr, want)
-	}
-}
-
 // errorContains checks if the error message in have contains the text in
 // want.
 //

diff --git a/error.go b/error.go
@@ -0,0 +1,179 @@
+package toml
+
+import (
+	"fmt"
+	"strings"
+)
+
+// ParseError is used when there is an error decoding TOML data.
+//
+// For example invalid TOML syntax, duplicate keys, etc.
+type ParseError struct {
+	Message  string   // Short technical message.
+	Usage    string   // Longer message with usage guidance; may be blank.
+	Position Position // Position of the error
+	LastKey  string   // Last parsed key, may be blank.
+
+	err   error
+	input string
+}
+
+// Position of an error.
+type Position struct {
+	Line  int // Line number, starting at 1.
+	Start int // Start of error, as byte offset starting at 0.
+	Len   int // Lenght in bytes.
+}
+
+func (pe ParseError) Error() string {
+	msg := pe.Message
+	if msg == "" { // Error from errorf()
+		msg = pe.err.Error()
+	}
+
+	if pe.LastKey == "" {
+		return fmt.Sprintf("toml: line %d: %s", pe.Position.Line, msg)
+	}
+	return fmt.Sprintf("toml: line %d (last key %q): %s",
+		pe.Position.Line, pe.LastKey, msg)
+}
+
+func (pe ParseError) ExtendedWithUsage() string {
+	m := pe.Extended()
+	if u, ok := pe.err.(interface{ Usage() string }); ok && u.Usage() != "" {
+		return m + "Error help:\n\n    " +
+			strings.ReplaceAll(strings.TrimSpace(u.Usage()), "\n", "\n    ") +
+			"\n"
+	}
+	return m
+}
+
+func (pe ParseError) Extended() string {
+	if pe.input == "" { // Should never happen, but just in case.
+		return pe.Error()
+	}
+
+	var (
+		lines = strings.Split(pe.input, "\n")
+		col   = pe.column(lines)
+		b     = new(strings.Builder)
+	)
+
+	msg := pe.Message
+	if msg == "" {
+		msg = pe.err.Error()
+	}
+
+	// TODO: don't show control characters as literals? This may not show up
+	// well everywhere.
+
+	if pe.Position.Len == 1 {
+		fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d:\n\n",
+			msg, pe.Position.Line, col+1)
+	} else {
+		fmt.Fprintf(b, "toml: error: %s\n\nAt line %d, column %d-%d:\n\n",
+			msg, pe.Position.Line, col, col+pe.Position.Len)
+	}
+	if pe.Position.Line > 2 {
+		fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-2, lines[pe.Position.Line-3])
+	}
+	if pe.Position.Line > 1 {
+		fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line-1, lines[pe.Position.Line-2])
+	}
+	fmt.Fprintf(b, "% 7d | %s\n", pe.Position.Line, lines[pe.Position.Line-1])
+	fmt.Fprintf(b, "% 10s%s%s\n", "", strings.Repeat(" ", col), strings.Repeat("^", pe.Position.Len))
+	return b.String()
+}
+
+func (pe ParseError) column(lines []string) int {
+	var pos, col int
+	for i := range lines {
+		ll := len(lines[i]) + 1 // +1 for the removed newline
+		if pos+ll >= pe.Position.Start {
+			col = pe.Position.Start - pos
+			if col < 0 { // Should never happen, but just in case.
+				col = 0
+			}
+			break
+		}
+		pos += ll
+	}
+
+	return col
+}
+
+type (
+	errLexControl       struct{ r rune }
+	errLexEscape        struct{ r rune }
+	errLexUTF8          struct{ b byte }
+	errLexInvalidNum    struct{ v string }
+	errLexInvalidDate   struct{ v string }
+	errLexInlineTableNL struct{}
+	errLexStringNL      struct{}
+)
+
+func (e errLexControl) Error() string {
+	return fmt.Sprintf("TOML files cannot contain control characters: '0x%02x'", e.r)
+}
+func (e errLexControl) Usage() string { return "" }
+
+func (e errLexEscape) Error() string        { return fmt.Sprintf(`invalid escape in string '\%c'`, e.r) }
+func (e errLexEscape) Usage() string        { return usageEscape }
+func (e errLexUTF8) Error() string          { return fmt.Sprintf("invalid UTF-8 byte: 0x%02x", e.b) }
+func (e errLexUTF8) Usage() string          { return "" }
+func (e errLexInvalidNum) Error() string    { return fmt.Sprintf("invalid number: %q", e.v) }
+func (e errLexInvalidNum) Usage() string    { return "" }
+func (e errLexInvalidDate) Error() string   { return fmt.Sprintf("invalid date: %q", e.v) }
+func (e errLexInvalidDate) Usage() string   { return "" }
+func (e errLexInlineTableNL) Error() string { return "newlines not allowed within inline tables" }
+func (e errLexInlineTableNL) Usage() string { return usageInlineNewline }
+func (e errLexStringNL) Error() string      { return "strings cannot contain newlines" }
+func (e errLexStringNL) Usage() string      { return usageStringNewline }
+
+const usageEscape = `
+A '\' inside a "-delimited string is interpreted as an escape character.
+
+The following escape sequences are supported:
+\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX
+
+To prevent a '\' from being recognized as an escape character, use:
+
+- a ' or '''-delimited string; escape characters aren't processed in them; or
+- two backslashes to get a single backslash: '\\'.
+
+If you're trying to add a Windows path (e.g. "C:\Users\martin") then using '/'
+instead of '\' will usually also work: "C:/Users/martin".
+`
+
+const usageInlineNewline = `
+Inline tables must always be on a single line:
+
+    table = {key = 42, second = 43}
+
+It is invalid to split them over multiple lines like so:
+
+    # INVALID
+    table = {
+        key    = 42,
+        second = 43
+    }
+
+Use regular for this:
+
+    [table]
+    key    = 42
+    second = 43
+`
+
+const usageStringNewline = `
+Strings must always be on a single line, and cannot span more than one line:
+
+    # INVALID
+    string = "Hello,
+    world!"
+
+Instead use """ or ''' to split strings over multiple lines:
+
+    string = """Hello,
+    world!"""
+`
diff --git a/error_test.go b/error_test.go
@@ -0,0 +1,117 @@
+//go:build go1.16
+// +build go1.16
+
+package toml_test
+
+import (
+	"errors"
+	"fmt"
+	"io/fs"
+	"strings"
+	"testing"
+
+	"github.com/BurntSushi/toml"
+	tomltest "github.com/BurntSushi/toml/internal/toml-test"
+)
+
+func TestErrorPosition(t *testing.T) {
+	// Note: take care to use leading spaces (not tabs).
+	tests := []struct {
+		test, err string
+	}{
+		{"array/missing-separator.toml", `
+toml: error: expected a comma (',') or array terminator (']'), but got '2'
+
+At line 1, column 13:
+
+      1 | wrong = [ 1 2 3 ]
+                      ^`},
+
+		{"array/no-close-2.toml", `
+toml: error: expected a comma (',') or array terminator (']'), but got end of file
+
+At line 1, column 10:
+
+      1 | x = [42 #
+                   ^`},
+	}
+
+	fsys := tomltest.EmbeddedTests()
+	for _, tt := range tests {
+		t.Run(tt.test, func(t *testing.T) {
+			input, err := fs.ReadFile(fsys, "invalid/"+tt.test)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			var x interface{}
+			_, err = toml.Decode(string(input), &x)
+			if err == nil {
+				t.Fatal("err is nil")
+			}
+
+			var pErr toml.ParseError
+			if !errors.As(err, &pErr) {
+				t.Errorf("err is not a ParseError: %T %[1]v", err)
+			}
+
+			tt.err = tt.err[1:] // Remove first newline.
+			want := pErr.ExtendedWithUsage()
+
+			if !strings.Contains(want, tt.err) {
+				t.Fatalf("\nwant:\n%s\nhave:\n%s", tt.err, want)
+			}
+		})
+	}
+}
+
+// Useful to print all errors, to see if they look alright.
+func TestParseError(t *testing.T) {
+	fsys := tomltest.EmbeddedTests()
+	err := fs.WalkDir(fsys, ".", func(path string, f fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if !strings.HasSuffix(path, ".toml") {
+			return nil
+		}
+		if f.Name() != "datetime-no-secs.toml" {
+			//continue
+		}
+
+		if f.Name() == "string-multiline-escape-space.toml" || f.Name() == "bad-utf8-at-end.toml" {
+			return nil
+		}
+
+		input, err := fs.ReadFile(fsys, path)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		var x interface{}
+		_, err = toml.Decode(string(input), &x)
+		if err == nil {
+			return nil
+		}
+
+		var pErr toml.ParseError
+		if !errors.As(err, &pErr) {
+			t.Errorf("err is not a ParseError: %T %[1]v", err)
+			return nil
+		}
+
+		fmt.Println()
+		fmt.Println("━━━", path, strings.Repeat("━", 65-len(path)))
+		fmt.Print(pErr.Error())
+		fmt.Println()
+		fmt.Println(strings.Repeat("–", 70))
+		fmt.Print(pErr.Extended())
+		fmt.Println(strings.Repeat("–", 70))
+		fmt.Print(pErr.ExtendedWithUsage())
+		fmt.Println(strings.Repeat("━", 70))
+		return nil
+	})
+	if err != nil {
+		t.Fatal(err)
+	}
+}
diff --git a/go.sum b/go.sum