Skip to content

Commit

Permalink
structured logging: replacing Fatal/Exit/etc. without loss of flushing
Browse files Browse the repository at this point in the history
When structured logging called for functions like Fatal and Exit to be avoided
in favor of klog.ErrorS + os.Exit, it was overlooked that this replacement
skips flushing.

We could ask developers to use klog.ErrorS + klog.Flush + os.Exit, but that is
still not quite the same because klog.Flush could get stuck or take too long.
Therefore klog now supports flushing + exiting with a dedicated function that
can be combined with some other, arbitrary logging function.

Allowing applications to change the default timeout and how specifically the
application terminates may be useful, so both values are variables that are
exported and users of klog are allowed to change.
  • Loading branch information
pohly committed Mar 14, 2022
1 parent 7ea6d6a commit 56b8a4e
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 28 deletions.
69 changes: 69 additions & 0 deletions exit.go
@@ -0,0 +1,69 @@
// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
//
// Copyright 2013 Google Inc. All Rights Reserved.
// Copyright 2022 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog

import (
"fmt"
"os"
"time"
)

var (

// ExitFlushTimeout is the timeout that klog has traditionally used during
// calls like Fatal or Exit when flushing log data right before exiting.
// Applications that replace those calls and do not have some specific
// requirements like "exit immediately" can use this value as parameter
// for FlushAndExit.
//
// Can be set for testing purpose or to change the application's
// default.
ExitFlushTimeout = 10 * time.Second

// OsExit is the function called by FlushAndExit to terminate the program.
//
// Can be set for testing purpose or to change the application's
// default behavior. Note that the function should not simply return
// because callers of functions like Fatal will not expect that.
OsExit = os.Exit
)

// FlushAndExit flushes log data for a certain amount of time and then calls
// os.Exit. Combined with some logging call it provides a replacement for
// traditional calls like Fatal or Exit.
func FlushAndExit(flushTimeout time.Duration, exitCode int) {
timeoutFlush(flushTimeout)
OsExit(exitCode)
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock. Flushing also might take too long.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}
47 changes: 47 additions & 0 deletions exit_test.go
@@ -0,0 +1,47 @@
// Copyright 2022 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog_test

import (
"flag"
"fmt"
"os"

"k8s.io/klog/v2"
)

func ExampleFlushAndExit() {
// Set up klog so that we can test it below.
var fs flag.FlagSet
klog.InitFlags(&fs)
fs.Set("skip_headers", "true")
defer flag.Set("skip_headers", "false")
klog.SetOutput(os.Stdout)
defer klog.SetOutput(nil)
klog.OsExit = func(exitCode int) {
fmt.Printf("os.Exit(%d)\n", exitCode)
}

// If we were to return or exit without flushing, this message would
// get lost because it is buffered in memory by klog when writing to
// files. Output to stderr is not buffered.
klog.InfoS("exiting...")
exitCode := 10
klog.FlushAndExit(klog.ExitFlushTimeout, exitCode)

// Output:
// "exiting..."
// os.Exit(10)
}
39 changes: 11 additions & 28 deletions klog.go
Expand Up @@ -847,8 +847,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
if atomic.LoadUint32(&fatalNoStacks) > 0 {
l.mu.Unlock()
isLocked = false
timeoutFlush(10 * time.Second)
os.Exit(1)
timeoutFlush(ExitFlushTimeout)
OsExit(1)
}
// Dump all goroutine stacks before exiting.
trace := stacks(true)
Expand All @@ -865,8 +865,8 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
}
l.mu.Unlock()
isLocked = false
timeoutFlush(10 * time.Second)
os.Exit(255) // C++ uses -1, which is silly because it's anded(&) with 255 anyway.
timeoutFlush(ExitFlushTimeout)
OsExit(255) // C++ uses -1, which is silly because it's anded with 255 anyway.
}
l.bufferCache.PutBuffer(buf)

Expand All @@ -876,23 +876,6 @@ func (l *loggingT) output(s severity.Severity, log *logr.Logger, buf *buffer.Buf
}
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}

// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines.
func stacks(all bool) []byte {
// We don't know how big the traces are, so grow a few times if they don't fit. Start large, though.
Expand Down Expand Up @@ -929,7 +912,7 @@ func (l *loggingT) exit(err error) {
return
}
l.flushAll()
os.Exit(2)
OsExit(2)
}

// syncBuffer joins a bufio.Writer to its underlying file, providing access to the
Expand Down Expand Up @@ -1518,7 +1501,7 @@ func ErrorSDepth(depth int, err error, msg string, keysAndValues ...interface{})
}

// Fatal logs to the FATAL, ERROR, WARNING, and INFO logs,
// including a stack trace of all running goroutines, then calls os.Exit(255).
// including a stack trace of all running goroutines, then calls OsExit(255).
// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
func Fatal(args ...interface{}) {
logging.print(severity.FatalLog, logging.logr, logging.filter, args...)
Expand All @@ -1531,7 +1514,7 @@ func FatalDepth(depth int, args ...interface{}) {
}

// Fatalln logs to the FATAL, ERROR, WARNING, and INFO logs,
// including a stack trace of all running goroutines, then calls os.Exit(255).
// including a stack trace of all running goroutines, then calls OsExit(255).
// Arguments are handled in the manner of fmt.Println; a newline is always appended.
func Fatalln(args ...interface{}) {
logging.println(severity.FatalLog, logging.logr, logging.filter, args...)
Expand All @@ -1544,7 +1527,7 @@ func FatallnDepth(depth int, args ...interface{}) {
}

// Fatalf logs to the FATAL, ERROR, WARNING, and INFO logs,
// including a stack trace of all running goroutines, then calls os.Exit(255).
// including a stack trace of all running goroutines, then calls OsExit(255).
// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
func Fatalf(format string, args ...interface{}) {
logging.printf(severity.FatalLog, logging.logr, logging.filter, format, args...)
Expand All @@ -1560,7 +1543,7 @@ func FatalfDepth(depth int, format string, args ...interface{}) {
// It allows Exit and relatives to use the Fatal logs.
var fatalNoStacks uint32

// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
// Exit logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1).
// Arguments are handled in the manner of fmt.Print; a newline is appended if missing.
func Exit(args ...interface{}) {
atomic.StoreUint32(&fatalNoStacks, 1)
Expand All @@ -1574,7 +1557,7 @@ func ExitDepth(depth int, args ...interface{}) {
logging.printDepth(severity.FatalLog, logging.logr, logging.filter, depth, args...)
}

// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
// Exitln logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1).
func Exitln(args ...interface{}) {
atomic.StoreUint32(&fatalNoStacks, 1)
logging.println(severity.FatalLog, logging.logr, logging.filter, args...)
Expand All @@ -1587,7 +1570,7 @@ func ExitlnDepth(depth int, args ...interface{}) {
logging.printlnDepth(severity.FatalLog, logging.logr, logging.filter, depth, args...)
}

// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls os.Exit(1).
// Exitf logs to the FATAL, ERROR, WARNING, and INFO logs, then calls OsExit(1).
// Arguments are handled in the manner of fmt.Printf; a newline is appended if missing.
func Exitf(format string, args ...interface{}) {
atomic.StoreUint32(&fatalNoStacks, 1)
Expand Down
3 changes: 3 additions & 0 deletions klog_test.go
Expand Up @@ -1506,6 +1506,7 @@ func TestCallDepthLogrInfoS(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
logFunc := func() {
Expand All @@ -1528,6 +1529,7 @@ func TestCallDepthLogrErrorS(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
logFunc := func() {
Expand All @@ -1550,6 +1552,7 @@ func TestCallDepthLogrGoLog(t *testing.T) {
logger.resetCallDepth()
l := logr.New(logger)
SetLogger(l)
defer ClearLogger()
CopyStandardLogTo("INFO")

// Add wrapper to ensure callDepthTestLogr +2 offset is correct.
Expand Down

0 comments on commit 56b8a4e

Please sign in to comment.