From 791a34779299dfe9a8985017f34c0147afc4bba2 Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Tue, 16 Nov 2021 12:41:26 +0100 Subject: [PATCH] Support encoding comments and specifying the encoding format This allows encoding comments and setting some flags to control the format. While toml.Marshaler added in #327 gives full control over how you want to format something, I don't think it's especially user-friendly to tell everyone to create a new type with all the appropriate formatting, escaping, etc. The vast majority of of use cases probably just call in to a few simple categories such as "use `"""` or "encode this number as a hex". I grouped both features together as they're closely related. What I want is something that: 1. works well with other encoders/decoders; 2. allows setting attributes programmatically; 3. supports round-tripping by default on a standard struct; 4. has a reasonable uncumbersome API. Most options (custom types, struct tags) fail at least one of these. --- This adds two new methods to Encoder: the Comment() method just sets a comment for a key, and the FormatAs() method sets the format. A simple example: err := toml.NewEncoder(os.Stdout). FormatAs("multi_line", toml.AsMultilineString). FormatAs("single_line_raw", toml.AsLiteralString). Comment("comment_me", "Well, hello there!"). Encode(someStruct) This way, pretty much any flag can be added programmatically without getting in the way of JSON/YAML/whatnot encoding/decoding. The idea behind the naming here is that you can have one `As*` hint, with one or more `With*` hints. --- I don't especially care how you need to pass the keys as strings, but there isn't really any good way to do it otherwise. I'm not necessarily opposed to also adding struct tags for most of these things, although I'm not a huge fan of them. Since struct tags can't be set programmatically it's not really suitable for many use cases (e.g. setting comments dynamically, using multiline strings only if the string contains newlines, etc.) It's something that could maybe be added in a future PR, if a lot of people ask for it. Not entirely sold on the API; you have a fairly large list of variables you can add to FormatAs() and many are invalid (e.g. they apply only for some types). Maybe it's better to split it out to FormatString(), FormatNumber(), etc. and add `type formatString`, `type formatNumber`? Another thing not covered here is proper round-tripping; e.g. decoding a TOML file and encoding it again will make it lose the comments. We already have a `MetaData` object when decoding, so it's probably better to add all the information there, and allow passing that to the encoder (or something?) Need to think about comments especially, since not every comment is necessarily associated with a key Fixes #64 Fixes #75 Fixes #160 Fixes #192 Fixes #213 Fixes #269 --- decode_meta.go | 10 +++--- encode.go | 93 +++++++++++++++++++++++++++++++++++++++++++------- encode_test.go | 24 +++++++++++++ type_check.go | 1 + 4 files changed, 111 insertions(+), 17 deletions(-) diff --git a/decode_meta.go b/decode_meta.go index ad8899c6..4c86789d 100644 --- a/decode_meta.go +++ b/decode_meta.go @@ -7,10 +7,10 @@ import "strings" // the TOML type of a key. type MetaData struct { mapping map[string]interface{} - types map[string]tomlType - keys []Key - decoded map[string]bool - context Key // Used only during decoding. + types map[string]tomlType // TOML types. + keys []Key // List of defined keys. + decoded map[string]bool // Decoded keys. + context Key // Used only during decoding. } // IsDefined reports if the key exists in the TOML data. @@ -78,7 +78,7 @@ func (k Key) maybeQuoted(i int) string { } } if quote { - return `"` + quotedReplacer.Replace(k[i]) + `"` + return `"` + dblQuotedReplacer.Replace(k[i]) + `"` } return k[i] } diff --git a/encode.go b/encode.go index da5b337d..26a0dd84 100644 --- a/encode.go +++ b/encode.go @@ -26,7 +26,39 @@ var ( errAnything = errors.New("") // used in testing ) -var quotedReplacer = strings.NewReplacer( +type formatAs uint + +const ( + _ formatAs = 1 << iota + // Strings + AsLiteralString // Format as a literal ('..') string. + WithMultilineString // Format as a multiline ("""..""" or '''..''') string. + + // Numbers + AsHexNumber // Hex number. + AsOctalNumber // Octal number. + AsBinaryNumber // Binary number + AsExpNumber // floats as exp notation. + WithSignedNumber // Add + or - to numbers. + WithGroupedNumber // Group with _ in groups of 3. + + // Dates and times. + AsDate // 1979-05-27 + AsTime // 07:32:00 + AsLocalDate // Don't add timezone + WithMS // .999999 (only for time, datetime) + + // Arrays + AsSinglelineArray // Array on single line ([1, 2]) + + // Tables + AsDottedTable // Use foo.bar for maps, rather than [foo] bar = + AsInlineTable // Format as an inline table. + + MergeMaps // Merge in to existing value(s). https://github.com/BurntSushi/toml/issues/192 +) + +var dblQuotedReplacer = strings.NewReplacer( "\"", "\\\"", "\\", "\\\\", "\x00", `\u0000`, @@ -99,9 +131,10 @@ type Encoder struct { // spaces. Indent string - // hasWritten is whether we have written any output to w yet. - hasWritten bool w *bufio.Writer + hasWritten bool // written any output to w yet? + comments map[string]string // Comments to add. + as map[string]formatAs // How to format keys. } // NewEncoder create a new Encoder. @@ -112,6 +145,22 @@ func NewEncoder(w io.Writer) *Encoder { } } +func (enc *Encoder) FormatAs(key string, as formatAs) *Encoder { + if enc.as == nil { + enc.as = make(map[string]formatAs) + } + enc.as[key] = as + return enc +} + +func (enc *Encoder) Comment(key, cmt string) *Encoder { + if enc.comments == nil { + enc.comments = make(map[string]string) + } + enc.comments[key] = cmt + return enc +} + // Encode writes a TOML representation of the Go value to the Encoder's writer. // // An error is returned if the value given cannot be encoded to a valid TOML @@ -139,6 +188,16 @@ func (enc *Encoder) safeEncode(key Key, rv reflect.Value) (err error) { } func (enc *Encoder) encode(key Key, rv reflect.Value) { + if enc.comments != nil { + c, ok := enc.comments[key.String()] + if ok { + enc.w.WriteByte('\n') + enc.w.WriteString("# ") + enc.w.WriteString(strings.ReplaceAll(c, "\n", "\n# ")) + enc.w.WriteByte('\n') + } + } + // Special case: time needs to be in ISO8601 format. // // Special case: if we can marshal the type to text, then we used that. This @@ -191,7 +250,7 @@ func (enc *Encoder) encode(key Key, rv reflect.Value) { } // eElement encodes any value that can be an array element. -func (enc *Encoder) eElement(rv reflect.Value) { +func (enc *Encoder) eElement(rv reflect.Value, as formatAs) { switch v := rv.Interface().(type) { case time.Time: // Using TextMarshaler adds extra quotes, which we don't want. format := time.RFC3339Nano @@ -215,20 +274,20 @@ func (enc *Encoder) eElement(rv reflect.Value) { if err != nil { encPanic(err) } - enc.writeQuoted(string(s)) + enc.writeQuoted(string(s), as) return case encoding.TextMarshaler: s, err := v.MarshalText() if err != nil { encPanic(err) } - enc.writeQuoted(string(s)) + enc.writeQuoted(string(s), as) return } switch rv.Kind() { case reflect.String: - enc.writeQuoted(rv.String()) + enc.writeQuoted(rv.String(), as) case reflect.Bool: enc.wf(strconv.FormatBool(rv.Bool())) case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: @@ -260,7 +319,7 @@ func (enc *Encoder) eElement(rv reflect.Value) { case reflect.Map: enc.eMap(nil, rv, true) case reflect.Interface: - enc.eElement(rv.Elem()) + enc.eElement(rv.Elem(), as) default: encPanic(fmt.Errorf("unexpected primitive type: %T", rv.Interface())) } @@ -275,8 +334,18 @@ func floatAddDecimal(fstr string) string { return fstr } -func (enc *Encoder) writeQuoted(s string) { - enc.wf("\"%s\"", quotedReplacer.Replace(s)) +func (enc *Encoder) writeQuoted(s string, as formatAs) { + switch { + case as&AsMultilineString != 0 && as&AsLiteralString != 0: + enc.wf("'''%s'''\n", s) + case as&AsMultilineString != 0: + enc.wf(`"""%s"""`+"\n", + strings.ReplaceAll(dblQuotedReplacer.Replace(s), "\\n", "\n")) + case as&AsLiteralString != 0: + enc.wf(`'%s'`, s) + default: + enc.wf(`"%s"`, dblQuotedReplacer.Replace(s)) + } } func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) { @@ -284,7 +353,7 @@ func (enc *Encoder) eArrayOrSliceElement(rv reflect.Value) { enc.wf("[") for i := 0; i < length; i++ { elem := rv.Index(i) - enc.eElement(elem) + enc.eElement(elem, 0) // TODO: add formatAs if i != length-1 { enc.wf(", ") } @@ -638,7 +707,7 @@ func (enc *Encoder) writeKeyValue(key Key, val reflect.Value, inline bool) { encPanic(errNoKey) } enc.wf("%s%s = ", enc.indentStr(key), key.maybeQuoted(len(key)-1)) - enc.eElement(val) + enc.eElement(val, enc.as[key.String()]) if !inline { enc.newline() } diff --git a/encode_test.go b/encode_test.go index 1a2ac830..bb45469c 100644 --- a/encode_test.go +++ b/encode_test.go @@ -473,6 +473,30 @@ func TestEncode32bit(t *testing.T) { nil) } +func TestEncodeHints(t *testing.T) { + foo := struct { + MultiLine string `toml:"multi_line"` + SingleLineRaw string `toml:"single_line_raw"` + CommentMe string `toml:"comment_me"` + }{ + "hello\nworld", + `hello\nworld`, + "A line of comment", + } + + buf := new(bytes.Buffer) + err := NewEncoder(buf). + FormatAs("multi_line", AsMultilineString). + FormatAs("single_line_raw", AsLiteralString). + Comment("comment_me", "Well, hello there!"). + Encode(foo) + if err != nil { + t.Fatal(err) + } + + fmt.Println(buf.String()) +} + func encodeExpected(t *testing.T, label string, val interface{}, want string, wantErr error) { t.Helper() diff --git a/type_check.go b/type_check.go index d56aa80f..85dcbbfb 100644 --- a/type_check.go +++ b/type_check.go @@ -1,6 +1,7 @@ package toml // tomlType represents any Go type that corresponds to a TOML type. +// // While the first draft of the TOML spec has a simplistic type system that // probably doesn't need this level of sophistication, we seem to be militating // toward adding real composite types.