From fafe98e1ea27f704efbd8ce38bfb9fcb9058a6ab Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 9 Nov 2021 14:37:53 +0100 Subject: [PATCH] structured logging: support key/value pairs with line breaks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial structured logging output (almost) always produced a single line per log message, with quoting of strings to represent line breaks as \n. This made the output hard to read (see https://github.com/kubernetes/kubernetes/issues/104868). It was still possible to get line breaks when formatting a value with `%+v` and that ended up emitting line breaks; this was probably not intended. Now string values are only quoted if they contain no line break. If they do, start/end markers delimit the text which appears on its own lines, with indention to ensure that those additional lines are not accidentally treated as a new log message when they happen to contain the klog header. It also makes the output more readable. Tabs are used as indention for the value because it makes it stand out visually and because it ensures that inline ">" characters are not mistaken for the end-of-value delimiter. The result of `fmt.Sprintf("%+v")` is printed verbatim without quoting when it contains no line break, otherwise as multi-line value with delimiter and indention. Traditional output: I1112 14:06:35.783354 328441 structured_logging.go:42] someData printed using InfoF: {hello world 0} I1112 14:06:35.783472 328441 structured_logging.go:43] longData printed using InfoF: {long Multiple lines with quite a bit of text. 0} I1112 14:06:35.783483 328441 structured_logging.go:44] stringData printed using InfoF, with the message across multiple lines: long: Multiple lines with quite a bit of text. I1112 14:06:35.898176 142908 structured_logging.go:54] logData printed using InfoF: {log output from some program I0000 12:00:00.000000 123456 main.go:42] Starting E0000 12:00:01.000000 123456 main.go:43] Failed for some reason 0} Old InfoS output before this commit: I1112 14:06:35.783512 328441 structured_logging.go:50] "using InfoS" someData={Name:hello Data:world internal:0} I1112 14:06:35.783529 328441 structured_logging.go:51] "using InfoS" longData={Name:long Data:Multiple lines with quite a bit of text. internal:0} I1112 14:06:35.783549 328441 structured_logging.go:52] "using InfoS with\nthe message across multiple lines" int=1 stringData="long: Multiple\nlines\nwith quite a bit\nof text." str="another value" I1112 14:06:35.783565 328441 structured_logging.go:61] "Did something" item="foobar" I1112 14:06:35.783576 328441 structured_logging.go:63] "This is a full sentence." item="foobar" I1112 14:06:35.898278 142908 structured_logging.go:65] "using InfoS" logData={Name:log output from some program Data:I0000 12:00:00.000000 123456 main.go:42] Starting E0000 12:00:01.000000 123456 main.go:43] Failed for some reason internal:0} New InfoS output: I1126 10:31:50.378182 121736 structured_logging.go:58] "using InfoS" someData={Name:hello Data:world internal:0} I1126 10:31:50.378204 121736 structured_logging.go:59] "using InfoS" longData=< {Name:long Data:Multiple lines with quite a bit of text. internal:0} > I1126 10:31:50.378228 121736 structured_logging.go:60] "using InfoS with\nthe message across multiple lines" int=1 stringData=< long: Multiple lines with quite a bit of text. > str="another value" I1126 10:31:50.378249 121736 structured_logging.go:65] "using InfoS" logData=< {Name:log output from some program Data:I0000 12:00:00.000000 123456 main.go:42] Starting E0000 12:00:01.000000 123456 main.go:43] Failed for some reason internal:0} > Performance is the same as before in most cases. Handling of a v1.Container struct with line breaks in the output gets faster, probably because printing each line individually is more efficient than quoting. $ $GOPATH/bin/benchstat /tmp/reorder /tmp/multi-line name old time/op new time/op delta Logging/container/structured-36 39.6µs ± 1% 21.5µs ± 0% -45.82% (p=0.008 n=5+5) Logging/error-value/structured-36 3.14µs ± 4% 3.11µs ± 2% ~ (p=0.548 n=5+5) Logging/error/structured-36 3.13µs ± 3% 3.13µs ± 3% ~ (p=1.000 n=5+5) Logging/simple/structured-36 2.91µs ± 2% 2.87µs ± 2% ~ (p=0.310 n=5+5) Logging/values/structured-36 4.77µs ± 5% 4.75µs ± 4% ~ (p=1.000 n=5+5) name old alloc/op new alloc/op delta Logging/container/structured-36 9.35kB ± 0% 9.59kB ± 0% +2.59% (p=0.016 n=4+5) Logging/error-value/structured-36 312B ± 0% 312B ± 0% ~ (all equal) Logging/error/structured-36 312B ± 0% 312B ± 0% ~ (all equal) Logging/simple/structured-36 288B ± 0% 288B ± 0% ~ (all equal) Logging/values/structured-36 464B ± 0% 464B ± 0% ~ (all equal) name old allocs/op new allocs/op delta Logging/container/structured-36 62.0 ± 0% 63.0 ± 0% +1.61% (p=0.008 n=5+5) Logging/error-value/structured-36 6.00 ± 0% 6.00 ± 0% ~ (all equal) Logging/error/structured-36 6.00 ± 0% 6.00 ± 0% ~ (all equal) Logging/simple/structured-36 5.00 ± 0% 5.00 ± 0% ~ (all equal) Logging/values/structured-36 11.0 ± 0% 11.0 ± 0% ~ (all equal) --- klog.go | 83 +++++++++++++++++++++++++++++++++++++++++++++------- klog_test.go | 46 +++++++++++++++++++++++++++-- 2 files changed, 116 insertions(+), 13 deletions(-) diff --git a/klog.go b/klog.go index 4cf0b5e5..4d174e8e 100644 --- a/klog.go +++ b/klog.go @@ -803,10 +803,12 @@ func (l *loggingT) infoS(logger *logr.Logger, filter LogFilter, depth int, msg s func (l *loggingT) printS(err error, s severity, depth int, msg string, keysAndValues ...interface{}) { // Only create a new buffer if we don't have one cached. b := l.getBuffer() + // The message is always quoted, even if it contains line breaks. + // If developers want multi-line output, they should use a small, fixed + // message and put the multi-line output into a value. b.WriteString(strconv.Quote(msg)) if err != nil { - b.WriteString(" err=") - b.WriteString(strconv.Quote(err.Error())) + kvListFormat(&b.Buffer, "err", err) } kvListFormat(&b.Buffer, keysAndValues...) l.printDepth(s, logging.logr, nil, depth+1, &b.Buffer) @@ -826,6 +828,10 @@ func kvListFormat(b *bytes.Buffer, keysAndValues ...interface{}) { v = missingValue } b.WriteByte(' ') + // Keys are assumed to be well-formed according to + // https://github.com/kubernetes/community/blob/master/contributors/devel/sig-instrumentation/migration-to-structured-logging.md#name-arguments + // for the sake of performance. Keys with spaces, + // special characters, etc. will break parsing. if k, ok := k.(string); ok { // Avoid one allocation when the key is a string, which // normally it should be. @@ -833,7 +839,6 @@ func kvListFormat(b *bytes.Buffer, keysAndValues ...interface{}) { } else { b.WriteString(fmt.Sprintf("%s", k)) } - b.WriteByte('=') // The type checks are sorted so that more frequently used ones // come first because that is then faster in the common @@ -842,19 +847,77 @@ func kvListFormat(b *bytes.Buffer, keysAndValues ...interface{}) { // (https://github.com/kubernetes/kubernetes/pull/106594#issuecomment-975526235). switch v := v.(type) { case fmt.Stringer: - b.WriteString(strconv.Quote(v.String())) + writeStringValue(b, true, v.String()) case string: - b.WriteString(strconv.Quote(v)) + writeStringValue(b, true, v) case error: - b.WriteString(strconv.Quote(v.Error())) + writeStringValue(b, true, v.Error()) case []byte: - // We cannot use the simpler strconv.Quote here - // because it does not escape unicode characters, which is - // expected by one test!? + // In https://github.com/kubernetes/klog/pull/237 it was decided + // to format byte slices with "%+q". The advantages of that are: + // - readable output if the bytes happen to be printable + // - non-printable bytes get represented as unicode escape + // sequences (\uxxxx) + // + // The downsides are that we cannot use the faster + // strconv.Quote here and that multi-line output is not + // supported. If developers know that a byte array is + // printable and they want multi-line output, they can + // convert the value to string before logging it. + b.WriteByte('=') b.WriteString(fmt.Sprintf("%+q", v)) default: - b.WriteString(fmt.Sprintf("%+v", v)) + writeStringValue(b, false, fmt.Sprintf("%+v", v)) + } + } +} + +func writeStringValue(b *bytes.Buffer, quote bool, v string) { + data := []byte(v) + index := bytes.IndexByte(data, '\n') + if index == -1 { + b.WriteByte('=') + if quote { + // Simple string, quote quotation marks and non-printable characters. + b.WriteString(strconv.Quote(v)) + return } + // Non-string with no line breaks. + b.WriteString(v) + return + } + + // Complex multi-line string, show as-is with indention like this: + // I... "hello world" key=< + // line 1 + // line 2 + // > + // + // Tabs indent the lines of the value while the end of string delimiter + // is indented with a space. That has two purposes: + // - visual difference between the two for a human reader because indention + // will be different + // - no ambiguity when some value line starts with the end delimiter + // + // One downside is that the output cannot distinguish between strings that + // end with a line break and those that don't because the end delimiter + // will always be on the next line. + b.WriteString("=<\n") + for index != -1 { + b.WriteByte('\t') + b.Write(data[0 : index+1]) + data = data[index+1:] + index = bytes.IndexByte(data, '\n') + } + if len(data) == 0 { + // String ended with line break, don't add another. + b.WriteString(" >") + } else { + // No line break at end of last line, write rest of string and + // add one. + b.WriteByte('\t') + b.Write(data) + b.WriteString("\n >") } } diff --git a/klog_test.go b/klog_test.go index 9a44c6be..2f65ef89 100644 --- a/klog_test.go +++ b/klog_test.go @@ -943,6 +943,12 @@ func TestVInfoS(t *testing.T) { return time.Date(2006, 1, 2, 15, 4, 5, .067890e9, time.Local) } pid = 1234 + myData := struct { + Data string + }{ + Data: `This is some long text +with a line break.`, + } var testDataInfo = []struct { msg string format string @@ -963,6 +969,26 @@ func TestVInfoS(t *testing.T) { format: "I0102 15:04:05.067890 1234 klog_test.go:%d] \"test\" err=\"test error\"\n", keysValues: []interface{}{"err", errors.New("test error")}, }, + { + msg: `first message line +second message line`, + format: `I0102 15:04:05.067890 1234 klog_test.go:%d] "first message line\nsecond message line" multiLine=< + first value line + second value line + > +`, + keysValues: []interface{}{"multiLine", `first value line +second value line`}, + }, + { + msg: `message`, + format: `I0102 15:04:05.067890 1234 klog_test.go:%d] "message" myData=< + {Data:This is some long text + with a line break.} + > +`, + keysValues: []interface{}{"myData", myData}, + }, } logging.verbosity.Set("2") @@ -987,7 +1013,7 @@ func TestVInfoS(t *testing.T) { want = "" } if contents(infoLog) != want { - t.Errorf("V(%d).InfoS has unexpected output: \n got:\t%s\nwant:\t%s", l, contents(infoLog), want) + t.Errorf("V(%d).InfoS has unexpected output:\ngot:\n%s\nwant:\n%s\n", l, contents(infoLog), want) } } } @@ -1031,7 +1057,7 @@ func TestErrorS(t *testing.T) { } want := fmt.Sprintf(e.format, line) if contents(errorLog) != want { - t.Errorf("ErrorS has wrong format: \n got:\t%s\nwant:\t%s", contents(errorLog), want) + t.Errorf("ErrorS has wrong format:\ngot:\n%s\nwant:\n%s\n", contents(errorLog), want) } } } @@ -1075,6 +1101,20 @@ func TestKvListFormat(t *testing.T) { keysValues: []interface{}{"pod", "kubedns", "bytes", []byte("��=� ⌘")}, want: " pod=\"kubedns\" bytes=\"\\ufffd\\ufffd=\\ufffd \\u2318\"", }, + { + keysValues: []interface{}{"multiLineString", `Hello world! + Starts with tab. + Starts with spaces. +No whitespace.`, + "pod", "kubedns", + }, + want: ` multiLineString=< + Hello world! + Starts with tab. + Starts with spaces. + No whitespace. + > pod="kubedns"`, + }, { keysValues: []interface{}{"pod", "kubedns", "maps", map[string]int{"three": 4}}, want: " pod=\"kubedns\" maps=map[three:4]", @@ -1113,7 +1153,7 @@ func TestKvListFormat(t *testing.T) { b := &bytes.Buffer{} kvListFormat(b, d.keysValues...) if b.String() != d.want { - t.Errorf("kvlist format error:\n got:\n\t%s\nwant:\t%s", b.String(), d.want) + t.Errorf("kvlist format error:\ngot:\n%s\nwant:\n%s\n", b.String(), d.want) } } }