Skip to content

Commit

Permalink
encoding/toml: add support for inline tables
Browse files Browse the repository at this point in the history
Decoding a field is moved to a method, as now it is used for both
top-level key-values as well as key-values inside an inline table.
We will soon use this method for tables with headers too.

While here, add more edge cases for duplicate keys as well as
test cases where the keys are different but in subtle ways.

And also ensure that go-toml's Unmarshal errors on all the cases
where our decoder errors, primarily to ensure that we are identical
in terms of failing on duplicate keys.
This test validation with go-toml might need to be tweaked
if or when our decoder ever starts giving any CUE-specific errors.

Signed-off-by: Daniel Martí <mvdan@mvdan.cc>
Change-Id: Idfb92615c2bbdfd2cdc7fe299990438bdf4eb009
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1194758
Reviewed-by: Roger Peppe <rogpeppe@gmail.com>
Unity-Result: CUE porcuepine <cue.porcuepine@gmail.com>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
  • Loading branch information
mvdan committed May 16, 2024
1 parent 3588e1a commit 27a0190
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 48 deletions.
109 changes: 72 additions & 37 deletions encoding/toml/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ package toml
import (
"fmt"
"io"
"strconv"

toml "github.com/pelletier/go-toml/v2/unstable"

Expand Down Expand Up @@ -118,43 +119,11 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
// bar: baz: "value"
// }
case toml.KeyValue:
keys := tnode.Key()
curName := string(keys.Node().Data)
curField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(token.Newline),
Name: curName,
},
}

topField := curField
rootKey := quoteLabelIfNeeded(curName)

keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
for keys.Next() {
nextName := string(keys.Node().Data)
nextField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(token.Blank),
Name: nextName,
},
}

curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
curField = nextField
// TODO(mvdan): use an append-like API once we have benchmarks
rootKey += "." + quoteLabelIfNeeded(nextName)
}
if d.seenKeys[rootKey] {
return fmt.Errorf("duplicate key: %s", rootKey)
}
d.seenKeys[rootKey] = true
value, err := d.decodeExpr(tnode.Value())
field, err := d.decodeField("", tnode)
if err != nil {
return err
}
curField.Value = value
d.currentFields = append(d.currentFields, topField)
d.currentFields = append(d.currentFields, field)
// TODO(mvdan): tables
// TODO(mvdan): array tables
default:
Expand All @@ -171,7 +140,7 @@ func quoteLabelIfNeeded(name string) string {
}

// nextRootNode is called for every top-level expression from the TOML parser.
func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
func (d *Decoder) decodeExpr(rootKey string, tnode *toml.Node) (ast.Expr, error) {
// TODO(mvdan): we currently assume that TOML basic literals (string, int, float)
// are also valid CUE literals; we should double check this, perhaps via fuzzing.
data := string(tnode.Data)
Expand All @@ -188,16 +157,82 @@ func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
list := &ast.ListLit{}
elems := tnode.Children()
for elems.Next() {
elem, err := d.decodeExpr(elems.Node())
// A path into an array element is like "arr.3",
// which looks very similar to a table's "tbl.key",
// particularly since a table key can be any string.
// However, we just need these keys to detect duplicates,
// and a path cannot be both an array and table, so it's OK.
rootKey := rootKey + "." + strconv.Itoa(len(list.Elts))
elem, err := d.decodeExpr(rootKey, elems.Node())
if err != nil {
return nil, err
}
list.Elts = append(list.Elts, elem)
}
return list, nil
case toml.InlineTable:
strct := &ast.StructLit{
// We want a single-line struct, just like TOML's inline tables are on a single line.
Lbrace: token.NoPos.WithRel(token.Blank),
Rbrace: token.NoPos.WithRel(token.Blank),
}
elems := tnode.Children()
for elems.Next() {
field, err := d.decodeField(rootKey, elems.Node())
if err != nil {
return nil, err
}
strct.Elts = append(strct.Elts, field)
}
return strct, nil
// TODO(mvdan): dates and times
// TODO(mvdan): inline tables
default:
return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v\n", tnode.Kind, tnode)
}
}

func (d *Decoder) decodeField(rootKey string, tnode *toml.Node) (*ast.Field, error) {
keys := tnode.Key()
curName := string(keys.Node().Data)

// If we are decoding a top-level field, it follows a newline.
// Otherwise, we are in an inline table, and it goes on the same line.
relPos := token.Newline
if rootKey != "" {
rootKey += "."
relPos = token.Blank
}
rootKey += quoteLabelIfNeeded(curName)
curField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(relPos),
Name: curName,
},
}

topField := curField
keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
for keys.Next() {
nextName := string(keys.Node().Data)
nextField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(token.Blank),
Name: nextName,
},
}
curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
curField = nextField
// TODO(mvdan): use an append-like API once we have benchmarks
rootKey += "." + quoteLabelIfNeeded(nextName)
}
if d.seenKeys[rootKey] {
return nil, fmt.Errorf("duplicate key: %s", rootKey)
}
d.seenKeys[rootKey] = true
value, err := d.decodeExpr(rootKey, tnode.Value())
if err != nil {
return nil, err
}
curField.Value = value
return topField, nil
}
95 changes: 84 additions & 11 deletions encoding/toml/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,43 @@ func TestDecoder(t *testing.T) {
site: "foo.com": title: "foo bar"
`,
}, {
name: "RootKeysDuplicate",
name: "KeysDuplicateSimple",
input: `
foo = "same value"
foo = "same value"
foo = "same key"
foo = "same key"
`,
wantErr: `duplicate key: foo`,
}, {
name: "KeysDuplicateQuoted",
input: `
"foo" = "same key"
foo = "same key"
`,
wantErr: `duplicate key: foo`,
}, {
name: "KeysDuplicateWhitespace",
input: `
foo . bar = "same key"
foo.bar = "same key"
`,
wantErr: `duplicate key: foo\.bar`,
}, {
name: "KeysDuplicateDots",
input: `
foo."bar.baz".zzz = "same key"
foo."bar.baz".zzz = "same key"
`,
wantErr: `duplicate key: foo\."bar\.baz"\.zzz`,
}, {
name: "KeysNotDuplicateDots",
input: `
foo."bar.baz" = "different key"
"foo.bar".baz = "different key"
`,
wantCUE: `
foo: "bar.baz": "different key"
"foo.bar": baz: "different key"
`,
}, {
name: "BasicStrings",
input: `
Expand Down Expand Up @@ -258,21 +289,59 @@ line two.\
}, {
name: "Arrays",
input: `
integers = [ 1, 2, 3 ]
colors = [ "red", "yellow", "green" ]
nested_ints = [ [ 1, 2 ], [3, 4, 5] ]
nested_mixed = [ [ 1, 2 ], ["a", "b", "c"] ]
strings = [ "all", 'strings', """are the same""", '''type''' ]
mixed_numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
integers = [1, 2, 3]
colors = ["red", "yellow", "green"]
nested_ints = [[1, 2], [3, 4, 5]]
nested_mixed = [[1, 2], ["a", "b", "c"], {extra = "keys"}]
strings = ["all", 'strings', """are the same""", '''type''']
mixed_numbers = [0.1, 0.2, 0.5, 1, 2, 5]
`,
wantCUE: `
integers: [1, 2, 3]
colors: ["red", "yellow", "green"]
nested_ints: [[1, 2], [3, 4, 5]]
nested_mixed: [[1, 2], ["a", "b", "c"]]
nested_mixed: [[1, 2], ["a", "b", "c"], {extra: "keys"}]
strings: ["all", "strings", "are the same", "type"]
mixed_numbers: [0.1, 0.2, 0.5, 1, 2, 5]
`,
}, {
name: "InlineTables",
input: `
point = {x = 1, y = 2}
animal = {type.name = "pug"}
deep = {l1 = {l2 = {l3 = "leaf"}}}
`,
wantCUE: `
point: {x: 1, y: 2}
animal: {type: name: "pug"}
deep: {l1: {l2: {l3: "leaf"}}}
`,
}, {
name: "InlineTablesDuplicate",
input: `
point = {x = "same key", x = "same key"}
`,
wantErr: `duplicate key: point\.x`,
}, {
name: "ArrayInlineTablesDuplicate",
input: `
point = [{}, {}, {x = "same key", x = "same key"}]
`,
wantErr: `duplicate key: point\.2\.x`,
}, {
name: "InlineTablesNotDuplicateScoping",
input: `
repeat = {repeat = {repeat = "leaf"}}
struct1 = {sibling = "leaf"}
struct2 = {sibling = "leaf"}
arrays = [{sibling = "leaf"}, {sibling = "leaf"}]
`,
wantCUE: `
repeat: {repeat: {repeat: "leaf"}}
struct1: {sibling: "leaf"}
struct2: {sibling: "leaf"}
arrays: [{sibling: "leaf"}, {sibling: "leaf"}]
`,
}}
for _, test := range tests {
test := test
Expand All @@ -287,6 +356,10 @@ line two.\
qt.Assert(t, qt.IsNil(node))
// We don't continue, so we can't expect any decoded CUE.
qt.Assert(t, qt.Equals(test.wantCUE, ""))

// Validate that go-toml's Unmarshal also rejects this input.
err = gotoml.Unmarshal([]byte(test.input), new(any))
qt.Assert(t, qt.IsNotNil(err))
return
}
qt.Assert(t, qt.IsNil(err))
Expand All @@ -311,7 +384,7 @@ line two.\
qt.Assert(t, qt.IsNil(val.Validate()))

// Validate that the decoded CUE value is equivalent
// to the Go value that a direct TOML unmarshal produces.
// to the Go value that go-toml's Unmarshal produces.
// We use JSON equality as some details such as which integer types are used
// are not actually relevant to an "equal data" check.
var unmarshalTOML any
Expand Down

0 comments on commit 27a0190

Please sign in to comment.