From 89f5d2c35db49d1479ccc804d6f8bbf3e9a34912 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 11 Mar 2022 17:35:28 +0100 Subject: [PATCH] structured logging: replacing Fatal/Exit/etc. without loss of flushing When structured logging called for functions like Fatal and Exit to be avoided in favor of klog.ErrorS + os.Exit, it was overlooked that this replacement skips flushing. We could ask developers to use klog.ErrorS + klog.Flush + os.Exit, but that is still not quite the same because klog.Flush could get stuck or take too long. Therefore klog now supports flushing + exiting with a dedicated function that can be combined with some other, arbitrary logging function. --- exit.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ exit_test.go | 47 ++++++++++++++++++++++++++++++++++++++++ klog.go | 23 +++----------------- klog_test.go | 3 +++ 4 files changed, 114 insertions(+), 20 deletions(-) create mode 100644 exit.go create mode 100644 exit_test.go diff --git a/exit.go b/exit.go new file mode 100644 index 000000000..e5ded20ff --- /dev/null +++ b/exit.go @@ -0,0 +1,61 @@ +// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/ +// +// Copyright 2013 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package klog + +import ( + "fmt" + "os" + "time" +) + +var ( + + // ExitFlushTimeout is the timeout that klog has traditionally used during + // calls like Fatal or Exit when flushing log data right before exiting. + // Applications that replace those calls and do not have some specific + // requirements like "exit immediately" can use this value as parameter + // for FlushAndExit. + ExitFlushTimeout = 10 * time.Second + + // OsExit is the function called by FlushAndExit to terminate the program. + OsExit = os.Exit +) + +// FlushAndExit flushes log data for a certain amount of time and then calls +// os.Exit. Combined with some logging call it provides a replacement for +// traditional calls like Fatal or Exit. +func FlushAndExit(flushTimeout time.Duration, exitCode int) { + timeoutFlush(flushTimeout) + OsExit(exitCode) +} + +// timeoutFlush calls Flush and returns when it completes or after timeout +// elapses, whichever happens first. This is needed because the hooks invoked +// by Flush may deadlock when klog.Fatal is called from a hook that holds +// a lock. Flushing also might take too long. +func timeoutFlush(timeout time.Duration) { + done := make(chan bool, 1) + go func() { + Flush() // calls logging.lockAndFlushAll() + done <- true + }() + select { + case <-done: + case <-time.After(timeout): + fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout) + } +} diff --git a/exit_test.go b/exit_test.go new file mode 100644 index 000000000..1ee682c54 --- /dev/null +++ b/exit_test.go @@ -0,0 +1,47 @@ +// Copyright 2020 The Kubernetes Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package klog_test + +import ( + "flag" + "fmt" + "os" + + "k8s.io/klog/v2" +) + +func ExampleFlushAndExit() { + // Set up klog so that we can test it below. + var fs flag.FlagSet + klog.InitFlags(&fs) + fs.Set("skip_headers", "true") + defer flag.Set("skip_headers", "false") + klog.SetOutput(os.Stdout) + defer klog.SetOutput(nil) + klog.OsExit = func(exitCode int) { + fmt.Printf("os.Exit(%d)\n", exitCode) + } + + // If we were to return or exit without flushing, this message would + // get lost because it is buffered in memory by klog when writing to + // files. Output to stderr is not buffered. + klog.InfoS("exiting...") + exitCode := 10 + klog.FlushAndExit(klog.ExitFlushTimeout, exitCode) + + // Output: + // "exiting..." + // os.Exit(10) +} diff --git a/klog.go b/klog.go index 0b235f42f..07e6f6279 100644 --- a/klog.go +++ b/klog.go @@ -847,7 +847,7 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf if atomic.LoadUint32(&fatalNoStacks) > 0 { l.mu.Unlock() isLocked = false - timeoutFlush(10 * time.Second) + timeoutFlush(ExitFlushTimeout) os.Exit(1) } // Dump all goroutine stacks before exiting. @@ -865,8 +865,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf } l.mu.Unlock() isLocked = false - timeoutFlush(10 * time.Second) - os.Exit(255) // C++ uses -1, which is silly because it's anded(&) with 255 anyway. + timeoutFlush(ExitFlushTimeout) + os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway. } l.bufferCache.PutBuffer(buf) @@ -876,23 +876,6 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf } } -// timeoutFlush calls Flush and returns when it completes or after timeout -// elapses, whichever happens first. This is needed because the hooks invoked -// by Flush may deadlock when klog.Fatal is called from a hook that holds -// a lock. -func timeoutFlush(timeout time.Duration) { - done := make(chan bool, 1) - go func() { - Flush() // calls logging.lockAndFlushAll() - done <- true - }() - select { - case <-done: - case <-time.After(timeout): - fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout) - } -} - // stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines. func stacks(all bool) []byte { // We don't know how big the traces are, so grow a few times if they don't fit. Start large, though. diff --git a/klog_test.go b/klog_test.go index 95553d5f4..ea9936f14 100644 --- a/klog_test.go +++ b/klog_test.go @@ -1506,6 +1506,7 @@ func TestCallDepthLogrInfoS(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() // Add wrapper to ensure callDepthTestLogr +2 offset is correct. logFunc := func() { @@ -1528,6 +1529,7 @@ func TestCallDepthLogrErrorS(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() // Add wrapper to ensure callDepthTestLogr +2 offset is correct. logFunc := func() { @@ -1550,6 +1552,7 @@ func TestCallDepthLogrGoLog(t *testing.T) { logger.resetCallDepth() l := logr.New(logger) SetLogger(l) + defer ClearLogger() CopyStandardLogTo("INFO") // Add wrapper to ensure callDepthTestLogr +2 offset is correct.