encoding/toml: add support for inline tables

Decoding a field is moved to a method, as now it is used for both top-level key-values as well as key-values inside an inline table. We will soon use this method for tables with headers too. While here, add more edge cases for duplicate keys as well as test cases where the keys are different but in subtle ways. And also ensure that go-toml's Unmarshal errors on all the cases where our decoder errors, primarily to ensure that we are identical in terms of failing on duplicate keys. This test validation with go-toml might need to be tweaked if or when our decoder ever starts giving any CUE-specific errors. Signed-off-by: Daniel Martí <mvdan@mvdan.cc> Change-Id: Idfb92615c2bbdfd2cdc7fe299990438bdf4eb009 Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1194758 Reviewed-by: Roger Peppe <rogpeppe@gmail.com> Unity-Result: CUE porcuepine <cue.porcuepine@gmail.com> TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
cue-lang · May 16, 2024 · 27a0190 · 27a0190
1 parent 3588e1a
commit 27a0190
Show file tree

Hide file tree

Showing 2 changed files with 156 additions and 48 deletions.
diff --git a/encoding/toml/decode.go b/encoding/toml/decode.go
@@ -21,6 +21,7 @@ package toml
 import (
 	"fmt"
 	"io"
+	"strconv"
 
 	toml "github.com/pelletier/go-toml/v2/unstable"
 
@@ -118,43 +119,11 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
 	//       bar: baz: "value"
 	//   }
 	case toml.KeyValue:
-		keys := tnode.Key()
-		curName := string(keys.Node().Data)
-		curField := &ast.Field{
-			Label: &ast.Ident{
-				NamePos: token.NoPos.WithRel(token.Newline),
-				Name:    curName,
-			},
-		}
-
-		topField := curField
-		rootKey := quoteLabelIfNeeded(curName)
-
-		keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
-		for keys.Next() {
-			nextName := string(keys.Node().Data)
-			nextField := &ast.Field{
-				Label: &ast.Ident{
-					NamePos: token.NoPos.WithRel(token.Blank),
-					Name:    nextName,
-				},
-			}
-
-			curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
-			curField = nextField
-			// TODO(mvdan): use an append-like API once we have benchmarks
-			rootKey += "." + quoteLabelIfNeeded(nextName)
-		}
-		if d.seenKeys[rootKey] {
-			return fmt.Errorf("duplicate key: %s", rootKey)
-		}
-		d.seenKeys[rootKey] = true
-		value, err := d.decodeExpr(tnode.Value())
+		field, err := d.decodeField("", tnode)
 		if err != nil {
 			return err
 		}
-		curField.Value = value
-		d.currentFields = append(d.currentFields, topField)
+		d.currentFields = append(d.currentFields, field)
 	// TODO(mvdan): tables
 	// TODO(mvdan): array tables
 	default:
@@ -171,7 +140,7 @@ func quoteLabelIfNeeded(name string) string {
 }
 
 // nextRootNode is called for every top-level expression from the TOML parser.
-func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
+func (d *Decoder) decodeExpr(rootKey string, tnode *toml.Node) (ast.Expr, error) {
 	// TODO(mvdan): we currently assume that TOML basic literals (string, int, float)
 	// are also valid CUE literals; we should double check this, perhaps via fuzzing.
 	data := string(tnode.Data)
@@ -188,16 +157,82 @@ func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
 		list := &ast.ListLit{}
 		elems := tnode.Children()
 		for elems.Next() {
-			elem, err := d.decodeExpr(elems.Node())
+			// A path into an array element is like "arr.3",
+			// which looks very similar to a table's "tbl.key",
+			// particularly since a table key can be any string.
+			// However, we just need these keys to detect duplicates,
+			// and a path cannot be both an array and table, so it's OK.
+			rootKey := rootKey + "." + strconv.Itoa(len(list.Elts))
+			elem, err := d.decodeExpr(rootKey, elems.Node())
 			if err != nil {
 				return nil, err
 			}
 			list.Elts = append(list.Elts, elem)
 		}
 		return list, nil
+	case toml.InlineTable:
+		strct := &ast.StructLit{
+			// We want a single-line struct, just like TOML's inline tables are on a single line.
+			Lbrace: token.NoPos.WithRel(token.Blank),
+			Rbrace: token.NoPos.WithRel(token.Blank),
+		}
+		elems := tnode.Children()
+		for elems.Next() {
+			field, err := d.decodeField(rootKey, elems.Node())
+			if err != nil {
+				return nil, err
+			}
+			strct.Elts = append(strct.Elts, field)
+		}
+		return strct, nil
 	// TODO(mvdan): dates and times
-	// TODO(mvdan): inline tables
 	default:
 		return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v\n", tnode.Kind, tnode)
 	}
 }
+
+func (d *Decoder) decodeField(rootKey string, tnode *toml.Node) (*ast.Field, error) {
+	keys := tnode.Key()
+	curName := string(keys.Node().Data)
+
+	// If we are decoding a top-level field, it follows a newline.
+	// Otherwise, we are in an inline table, and it goes on the same line.
+	relPos := token.Newline
+	if rootKey != "" {
+		rootKey += "."
+		relPos = token.Blank
+	}
+	rootKey += quoteLabelIfNeeded(curName)
+	curField := &ast.Field{
+		Label: &ast.Ident{
+			NamePos: token.NoPos.WithRel(relPos),
+			Name:    curName,
+		},
+	}
+
+	topField := curField
+	keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
+	for keys.Next() {
+		nextName := string(keys.Node().Data)
+		nextField := &ast.Field{
+			Label: &ast.Ident{
+				NamePos: token.NoPos.WithRel(token.Blank),
+				Name:    nextName,
+			},
+		}
+		curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
+		curField = nextField
+		// TODO(mvdan): use an append-like API once we have benchmarks
+		rootKey += "." + quoteLabelIfNeeded(nextName)
+	}
+	if d.seenKeys[rootKey] {
+		return nil, fmt.Errorf("duplicate key: %s", rootKey)
+	}
+	d.seenKeys[rootKey] = true
+	value, err := d.decodeExpr(rootKey, tnode.Value())
+	if err != nil {
+		return nil, err
+	}
+	curField.Value = value
+	return topField, nil
+}
diff --git a/encoding/toml/decode_test.go b/encoding/toml/decode_test.go
@@ -120,12 +120,43 @@ func TestDecoder(t *testing.T) {
 			site: "foo.com": title: "foo bar"
 		`,
 	}, {
-		name: "RootKeysDuplicate",
+		name: "KeysDuplicateSimple",
 		input: `
-			foo = "same value"
-			foo = "same value"
+			foo = "same key"
+			foo = "same key"
 		`,
 		wantErr: `duplicate key: foo`,
+	}, {
+		name: "KeysDuplicateQuoted",
+		input: `
+			"foo" = "same key"
+			foo = "same key"
+		`,
+		wantErr: `duplicate key: foo`,
+	}, {
+		name: "KeysDuplicateWhitespace",
+		input: `
+			foo . bar = "same key"
+			foo.bar = "same key"
+		`,
+		wantErr: `duplicate key: foo\.bar`,
+	}, {
+		name: "KeysDuplicateDots",
+		input: `
+			foo."bar.baz".zzz = "same key"
+			foo."bar.baz".zzz = "same key"
+		`,
+		wantErr: `duplicate key: foo\."bar\.baz"\.zzz`,
+	}, {
+		name: "KeysNotDuplicateDots",
+		input: `
+			foo."bar.baz" = "different key"
+			"foo.bar".baz = "different key"
+		`,
+		wantCUE: `
+			foo: "bar.baz": "different key"
+			"foo.bar": baz: "different key"
+		`,
 	}, {
 		name: "BasicStrings",
 		input: `
@@ -258,21 +289,59 @@ line two.\
 	}, {
 		name: "Arrays",
 		input: `
-			integers      = [ 1, 2, 3 ]
-			colors        = [ "red", "yellow", "green" ]
-			nested_ints   = [ [ 1, 2 ], [3, 4, 5] ]
-			nested_mixed  = [ [ 1, 2 ], ["a", "b", "c"] ]
-			strings       = [ "all", 'strings', """are the same""", '''type''' ]
-			mixed_numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
+			integers      = [1, 2, 3]
+			colors        = ["red", "yellow", "green"]
+			nested_ints   = [[1, 2], [3, 4, 5]]
+			nested_mixed  = [[1, 2], ["a", "b", "c"], {extra = "keys"}]
+			strings       = ["all", 'strings', """are the same""", '''type''']
+			mixed_numbers = [0.1, 0.2, 0.5, 1, 2, 5]
 		`,
 		wantCUE: `
 			integers:      [1, 2, 3]
 			colors:        ["red", "yellow", "green"]
 			nested_ints:   [[1, 2], [3, 4, 5]]
-			nested_mixed:  [[1, 2], ["a", "b", "c"]]
+			nested_mixed:  [[1, 2], ["a", "b", "c"], {extra: "keys"}]
 			strings:       ["all", "strings", "are the same", "type"]
 			mixed_numbers: [0.1, 0.2, 0.5, 1, 2, 5]
 		`,
+	}, {
+		name: "InlineTables",
+		input: `
+			point  = {x = 1, y = 2}
+			animal = {type.name = "pug"}
+			deep   = {l1 = {l2 = {l3 = "leaf"}}}
+		`,
+		wantCUE: `
+			point:  {x: 1, y: 2}
+			animal: {type: name: "pug"}
+			deep:   {l1: {l2: {l3: "leaf"}}}
+		`,
+	}, {
+		name: "InlineTablesDuplicate",
+		input: `
+			point = {x = "same key", x = "same key"}
+		`,
+		wantErr: `duplicate key: point\.x`,
+	}, {
+		name: "ArrayInlineTablesDuplicate",
+		input: `
+			point = [{}, {}, {x = "same key", x = "same key"}]
+		`,
+		wantErr: `duplicate key: point\.2\.x`,
+	}, {
+		name: "InlineTablesNotDuplicateScoping",
+		input: `
+			repeat = {repeat = {repeat = "leaf"}}
+			struct1 = {sibling = "leaf"}
+			struct2 = {sibling = "leaf"}
+			arrays = [{sibling = "leaf"}, {sibling = "leaf"}]
+		`,
+		wantCUE: `
+			repeat: {repeat: {repeat: "leaf"}}
+			struct1: {sibling: "leaf"}
+			struct2: {sibling: "leaf"}
+			arrays: [{sibling: "leaf"}, {sibling: "leaf"}]
+		`,
 	}}
 	for _, test := range tests {
 		test := test
@@ -287,6 +356,10 @@ line two.\
 				qt.Assert(t, qt.IsNil(node))
 				// We don't continue, so we can't expect any decoded CUE.
 				qt.Assert(t, qt.Equals(test.wantCUE, ""))
+
+				// Validate that go-toml's Unmarshal also rejects this input.
+				err = gotoml.Unmarshal([]byte(test.input), new(any))
+				qt.Assert(t, qt.IsNotNil(err))
 				return
 			}
 			qt.Assert(t, qt.IsNil(err))
@@ -311,7 +384,7 @@ line two.\
 			qt.Assert(t, qt.IsNil(val.Validate()))
 
 			// Validate that the decoded CUE value is equivalent
-			// to the Go value that a direct TOML unmarshal produces.
+			// to the Go value that go-toml's Unmarshal produces.
 			// We use JSON equality as some details such as which integer types are used
 			// are not actually relevant to an "equal data" check.
 			var unmarshalTOML any