Skip to content

Commit

Permalink
structured logging: replacing Fatal/Exit/etc. without loss of flushing
Browse files Browse the repository at this point in the history
When structured logging called for functions like Fatal and Exit to be avoided
in favor of klog.ErrorS + os.Exit, it was overlooked that this replacement
skips flushing.

We could ask developers to use klog.ErrorS + klog.Flush + os.Exit, but that is
still not quite the same because klog.Flush could get stuck or take too long.

Therefore klog now supports flushing + exiting with a dedicated function that
can be combined with some other, arbitrary logging function.
  • Loading branch information
pohly committed Mar 11, 2022
1 parent 02ca14b commit 2ae354b
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 19 deletions.
61 changes: 61 additions & 0 deletions exit.go
@@ -0,0 +1,61 @@
// Go support for leveled logs, analogous to https://code.google.com/p/google-glog/
//
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog

import (
"fmt"
"os"
"time"
)

var (

// ExitFlushTimeout is the timeout that klog has traditionally used during
// calls like Fatal or Exit when flushing log data right before exiting.
// Applications that replace those calls and do not have some specific
// requirements like "exit immediately" can use this value as parameter
// for FlushAndExit.
ExitFlushTimeout = 10 * time.Second

// OsExit is the function called by FlushAndExit to terminate the program.
OsExit = os.Exit
)

// FlushAndExit flushes log data for a certain amount of time and then calls
// os.Exit. Combined with some logging call it provides a replacement for
// traditional calls like Fatal or Exit.
func FlushAndExit(flushTimeout time.Duration, exitCode int) {
timeoutFlush(flushTimeout)
OsExit(exitCode)
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock. Flushing also might take too long.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}
46 changes: 46 additions & 0 deletions exit_test.go
@@ -0,0 +1,46 @@
// Copyright 2020 The Kubernetes Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package klog_test

import (
"flag"
"fmt"
"os"

"k8s.io/klog/v2"
)

func ExampleFlushAndExit() {
// Set up klog so that we can test it below.
klog.InitFlags(nil)
flag.Set("skip_headers", "true")
flag.Set("alsologtostderr", "false")
flag.Set("logtostderr", "false")
klog.SetOutput(os.Stdout)
klog.OsExit = func(exitCode int) {
fmt.Printf("os.Exit(%d)\n", exitCode)
}

// If we were to return or exit without flushing, this message would
// get lost because it is buffered in memory by klog when writing to
// files. Output to stderr is not buffered.
klog.InfoS("exiting...")
exitCode := 10
klog.FlushAndExit(klog.ExitFlushTimeout, exitCode)

// Output:
// "exiting..."
// os.Exit(10)
}
21 changes: 2 additions & 19 deletions klog.go
Expand Up @@ -968,7 +968,7 @@ func (l *loggingT) output(s severity, log logr.Logger, buf *buffer, depth int, f
// If we got here via Exit rather than Fatal, print no stacks.
if atomic.LoadUint32(&fatalNoStacks) > 0 {
l.mu.Unlock()
timeoutFlush(10 * time.Second)
timeoutFlush(ExitFlushTimeout)
os.Exit(1)
}
// Dump all goroutine stacks before exiting.
Expand All @@ -985,7 +985,7 @@ func (l *loggingT) output(s severity, log logr.Logger, buf *buffer, depth int, f
}
}
l.mu.Unlock()
timeoutFlush(10 * time.Second)
timeoutFlush(ExitFlushTimeout)
os.Exit(255) // C++ uses -1, which is silly because it's anded with 255 anyway.
}
l.putBuffer(buf)
Expand All @@ -996,23 +996,6 @@ func (l *loggingT) output(s severity, log logr.Logger, buf *buffer, depth int, f
}
}

// timeoutFlush calls Flush and returns when it completes or after timeout
// elapses, whichever happens first. This is needed because the hooks invoked
// by Flush may deadlock when klog.Fatal is called from a hook that holds
// a lock.
func timeoutFlush(timeout time.Duration) {
done := make(chan bool, 1)
go func() {
Flush() // calls logging.lockAndFlushAll()
done <- true
}()
select {
case <-done:
case <-time.After(timeout):
fmt.Fprintln(os.Stderr, "klog: Flush took longer than", timeout)
}
}

// stacks is a wrapper for runtime.Stack that attempts to recover the data for all goroutines.
func stacks(all bool) []byte {
// We don't know how big the traces are, so grow a few times if they don't fit. Start large, though.
Expand Down

0 comments on commit 2ae354b

Please sign in to comment.