From 56b8a4ea35ba0fc3be9fc82182e71c2f0116adc1 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 11 Mar 2022 17:35:28 +0100 Subject: [PATCH] structured logging: replacing Fatal/Exit/etc. without loss of flushing When structured logging called for functions like Fatal and Exit to be avoided in favor of klog.ErrorS + os.Exit, it was overlooked that this replacement skips flushing. We could ask developers to use klog.ErrorS + klog.Flush + os.Exit, but that is still not quite the same because klog.Flush could get stuck or take too long. Therefore klog now supports flushing + exiting with a dedicated function that can be combined with some other, arbitrary logging function. Allowing applications to change the default timeout and how specifically the application terminates may be useful, so both values are variables that are exported and users of klog are allowed to change. --- exit.go | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ exit_test.go | 47 +++++++++++++++++++++++++++++++++++ klog.go | 39 +++++++++-------------------- klog_test.go | 3 +++ 4 files changed, 130 insertions(+), 28 deletions(-) create mode 100644 exit.go create mode 100644 exit_test.go diff --git a/exit.go b/exit.go new file mode 100644 index 00000000..320a1477 --- /dev/null +++ b/exit.go @@ -0,0 +1,69 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// Copyright 2022 The Kubernetes Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package klog + +import ( + "fmt" + "os" + "time" +) + +var ( + + // ExitFlushTimeout is the timeout that klog has traditionally used during + // calls like Fatal or Exit when flushing log data right before exiting. + // Applications that replace those calls and do not have some specific + // requirements like "exit immediately" can use this value as parameter + // for FlushAndExit. + // + // Can be set for testing purpose or to change the application's + // default. + ExitFlushTimeout = 10 * time.Second + + // OsExit is the function called by FlushAndExit to terminate the program. + // + // Can be set for testing purpose or to change the application's + // default behavior. Note that the function should not simply return + // because callers of functions like Fatal will not expect that. + OsExit = os.Exit +) + +// FlushAndExit flushes log data for a certain amount of time and then calls +// os.Exit. Combined with some logging call it provides a replacement for +// traditional calls like Fatal or Exit. +func FlushAndExit(flushTimeout time.Duration, exitCode int) { + timeoutFlush(flushTimeout) + OsExit(exitCode) +} + +// timeoutFlush calls Flush and returns when it completes or after timeout +// elapses, whichever happens first. This is needed because the hooks invoked +// by Flush may deadlock when klog.Fatal is called from a hook that holds +// a lock. Flushing also might take too long. +func timeoutFlush(timeout time.Duration) { + done := make(chan bool, 1) + go func() { + Flush() // calls logging.lockAndFlushAll() + done <- true + }() + select { + case <-done: + case <-time.After(timeout): + fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout) + } +} diff --git a/exit_test.go b/exit_test.go new file mode 100644 index 00000000..d0123d97 --- /dev/null +++ b/exit_test.go @@ -0,0 +1,47 @@ +// Copyright 2022 The Kubernetes Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package klog_test + +import ( + "flag" + "fmt" + "os" + + "k8s.io/klog/v2" +) + +func ExampleFlushAndExit() { + // Set up klog so that we can test it below. + var fs flag.FlagSet + klog.InitFlags(&fs) + fs.Set("skip_headers", "true") + defer flag.Set("skip_headers", "false") + klog.SetOutput(os.Stdout) + defer klog.SetOutput(nil) + klog.OsExit = func(exitCode int) { + fmt.Printf("os.Exit(%d)\n", exitCode) + } + + // If we were to return or exit without flushing, this message would + // get lost because it is buffered in memory by klog when writing to + // files. Output to stderr is not buffered. + klog.InfoS("exiting...") + exitCode := 10 + klog.FlushAndExit(klog.ExitFlushTimeout, exitCode) + + // Output: + // "exiting..." + // os.Exit(10) +} diff --git a/klog.go b/klog.go index 0b235f42..95990fd9 100644 --- a/klog.go +++ b/klog.go @@ -847,8 +847,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf if atomic.LoadUint32(&fatalNoStacks) > 0 { l.mu.Unlock() isLocked = false - timeoutFlush(10 * time.Second) - os.Exit(1) + timeoutFlush(ExitFlushTimeout) + OsExit(1) } // Dump all goroutine stacks before exiting. trace := stacks(true) @@ -865,8 +865,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf } l.mu.Unlock() isLocked = false - timeoutFlush(10 * time.Second) - os.Exit(255) // C++ uses -1, which is silly because it's anded(&) with 255 anyway. + timeoutFlush(ExitFlushTimeout) + OsExit(255) // C++ uses -1, which is silly because it's anded with 255 anyway. } l.bufferCache.PutBuffer(buf) @@ -876,23 +876,6 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf } } -// timeoutFlush calls Flush and returns when it completes or after timeout -// elapses, whichever happens first. This is needed because the hooks invoked -// by Flush may deadlock when klog.Fatal is called from a hook that holds -// a lock. -func timeoutFlush(timeout time.Duration) { - done := make(chan bool, 1) - go func() { - Flush() // calls logging.lockAndFlushAll() - done <- true - }() - select { - case <-done: - case <-time.After(timeout): - fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout) - } -} - // stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines. func stacks(all bool) []byte { // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though. @@ -929,7 +912,7 @@ func (l *loggingT) exit(err error) { return } l.flushAll() - os.Exit(2) + OsExit(2) } // syncBuffer joins a bufio.Writer to its underlying file, providing access to the @@ -1518,7 +1501,7 @@ func ErrorSDepth(depth int, err error, msg string, keysAndValues ...interface{}) } // Fatal logs to the FATAL, ERROR, WARNING, and INFO logs, -// including a stack trace of all running goroutines, then calls os.Exit(255). +// including a stack trace of all running goroutines, then calls OsExit(255). // Arguments are handled in the manner of fmt.Print; a newline is appended if missing. func Fatal(args ...interface{}) { logging.print(severity.FatalLog, logging.logr, logging.filter, args...) @@ -1531,7 +1514,7 @@ func FatalDepth(depth int, args ...interface{}) { } // Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs, -// including a stack trace of all running goroutines, then calls os.Exit(255). +// including a stack trace of all running goroutines, then calls OsExit(255). // Arguments are handled in the manner of fmt.Println; a newline is always appended. func Fatalln(args ...interface{}) { logging.println(severity.FatalLog, logging.logr, logging.filter, args...) @@ -1544,7 +1527,7 @@ func FatallnDepth(depth int, args ...interface{}) { } // Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs, -// including a stack trace of all running goroutines, then calls os.Exit(255). +// including a stack trace of all running goroutines, then calls OsExit(255). // Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. func Fatalf(format string, args ...interface{}) { logging.printf(severity.FatalLog, logging.logr, logging.filter, format, args...) @@ -1560,7 +1543,7 @@ func FatalfDepth(depth int, format string, args ...interface{}) { // It allows Exit and relatives to use the Fatal logs. var fatalNoStacks uint32 -// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1). // Arguments are handled in the manner of fmt.Print; a newline is appended if missing. func Exit(args ...interface{}) { atomic.StoreUint32(&fatalNoStacks, 1) @@ -1574,7 +1557,7 @@ func ExitDepth(depth int, args ...interface{}) { logging.printDepth(severity.FatalLog, logging.logr, logging.filter, depth, args...) } -// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1). func Exitln(args ...interface{}) { atomic.StoreUint32(&fatalNoStacks, 1) logging.println(severity.FatalLog, logging.logr, logging.filter, args...) @@ -1587,7 +1570,7 @@ func ExitlnDepth(depth int, args ...interface{}) { logging.printlnDepth(severity.FatalLog, logging.logr, logging.filter, depth, args...) } -// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1). +// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1). // Arguments are handled in the manner of fmt.Printf; a newline is appended if missing. func Exitf(format string, args ...interface{}) { atomic.StoreUint32(&fatalNoStacks, 1) diff --git a/klog_test.go b/klog_test.go index 95553d5f..ea9936f1 100644 --- a/klog_test.go +++ b/klog_test.go @@ -1506,6 +1506,7 @@ func TestCallDepthLogrInfoS(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() // Add wrapper to ensure callDepthTestLogr +2 offset is correct. logFunc := func() { @@ -1528,6 +1529,7 @@ func TestCallDepthLogrErrorS(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() // Add wrapper to ensure callDepthTestLogr +2 offset is correct. logFunc := func() { @@ -1550,6 +1552,7 @@ func TestCallDepthLogrGoLog(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() CopyStandardLogTo("INFO") // Add wrapper to ensure callDepthTestLogr +2 offset is correct.