Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize AddCaller+AddStacktrace by sharing stacks #1052

Merged
merged 7 commits into from Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
67 changes: 41 additions & 26 deletions logger.go
Expand Up @@ -24,9 +24,9 @@ import (
"fmt"
"io/ioutil"
"os"
"runtime"
"strings"

"go.uber.org/zap/internal/bufferpool"
"go.uber.org/zap/zapcore"
)

Expand Down Expand Up @@ -259,8 +259,10 @@ func (log *Logger) clone() *Logger {
}

func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry {
// check must always be called directly by a method in the Logger interface
// (e.g., Check, Info, Fatal).
// Logger.check must always be called directly by a method in the
// Logger interface (e.g., Check, Info, Fatal).
// This skipps Logger.check and the Info/Fatal/Check/etc. method that
abhinav marked this conversation as resolved.
Show resolved Hide resolved
// called it.
const callerSkipOffset = 2

// Check the level first to reduce the cost of disabled log calls.
Expand Down Expand Up @@ -307,42 +309,55 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry {

// Thread the error output through to the CheckedEntry.
ce.ErrorOutput = log.errorOutput
if log.addCaller {
frame, defined := getCallerFrame(log.callerSkip + callerSkipOffset)
if !defined {

addStack := log.addStack.Enabled(ce.Level)
if !log.addCaller && !addStack {
return ce
}

// Adding the caller or stack trace requires capturing the callers of
// this function. We'll share information between these two.
stackDepth := stacktraceFirst
if addStack {
stackDepth = stacktraceFull
}
stack := captureStacktrace(log.callerSkip+callerSkipOffset, stackDepth)
defer stack.Free()

if stack.Count() == 0 {
if log.addCaller {
fmt.Fprintf(log.errorOutput, "%v Logger.check error: failed to get caller\n", ent.Time.UTC())
log.errorOutput.Sync()
}
return ce
}

frame, more := stack.Next()

if log.addCaller {
ce.Caller = zapcore.EntryCaller{
Defined: defined,
Defined: frame.PC != 0,
PC: frame.PC,
File: frame.File,
Line: frame.Line,
Function: frame.Function,
}
}
if log.addStack.Enabled(ce.Level) {
ce.Stack = StackSkip("", log.callerSkip+callerSkipOffset).String
}

return ce
}
if addStack {
buffer := bufferpool.Get()
defer buffer.Free()

// getCallerFrame gets caller frame. The argument skip is the number of stack
// frames to ascend, with 0 identifying the caller of getCallerFrame. The
// boolean ok is false if it was not possible to recover the information.
//
// Note: This implementation is similar to runtime.Caller, but it returns the whole frame.
func getCallerFrame(skip int) (frame runtime.Frame, ok bool) {
const skipOffset = 2 // skip getCallerFrame and Callers

pc := make([]uintptr, 1)
numFrames := runtime.Callers(skip+skipOffset, pc)
if numFrames < 1 {
return
stackfmt := newStackFormatter(buffer)

// We've already extracted the first frame, so format that
// separately and defer to stackfmt for the rest.
stackfmt.FormatFrame(frame)
if more {
stackfmt.FormatStack(stack)
}
ce.Stack = buffer.String()
}

frame, _ = runtime.CallersFrames(pc).Next()
return frame, frame.PC != 0
return ce
}
18 changes: 18 additions & 0 deletions logger_bench_test.go
Expand Up @@ -179,6 +179,24 @@ func BenchmarkAddCallerHook(b *testing.B) {
})
}

func BenchmarkAddCallerAndStacktrace(b *testing.B) {
logger := New(
zapcore.NewCore(
zapcore.NewJSONEncoder(NewProductionConfig().EncoderConfig),
&ztest.Discarder{},
InfoLevel,
),
AddCaller(),
AddStacktrace(WarnLevel),
)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
logger.Warn("Caller and stacktrace.")
}
})
}

func Benchmark10Fields(b *testing.B) {
withBenchedLogger(b, func(log *Logger) {
log.Info("Ten fields, passed at the log site.",
Expand Down
175 changes: 133 additions & 42 deletions stacktrace.go
Expand Up @@ -24,62 +24,153 @@ import (
"runtime"
"sync"

"go.uber.org/zap/buffer"
"go.uber.org/zap/internal/bufferpool"
)

var (
_stacktracePool = sync.Pool{
New: func() interface{} {
return newProgramCounters(64)
},
}
var _stacktracePool = sync.Pool{
New: func() interface{} {
return &stacktrace{
storage: make([]uintptr, 64),
}
},
}

type stacktrace struct {
pcs []uintptr // program counters; always a subslice of storage
frames *runtime.Frames

// The size of pcs varies depending on requirements:
// it will be one if the only the first frame was requested,
// and otherwise it will reflect the depth of the call stack.
//
// storage decouples the slice we need (pcs) from the slice we pool.
// We will always allocate a reasonably large storage, but we'll use
// only as much of it as we need.
storage []uintptr
}

// stacktraceDepth specifies how deep of a stack trace should be captured.
type stacktraceDepth int

const (
// stacktraceFirst captures only the first frame.
stacktraceFirst stacktraceDepth = iota

// stacktraceFull captures the entire call stack, allocating more
// storage for it if needed.
stacktraceFull
)

func takeStacktrace(skip int) string {
buffer := bufferpool.Get()
defer buffer.Free()
programCounters := _stacktracePool.Get().(*programCounters)
defer _stacktracePool.Put(programCounters)

var numFrames int
for {
// Skip the call to runtime.Callers and takeStacktrace so that the
// program counters start at the caller of takeStacktrace.
numFrames = runtime.Callers(skip+2, programCounters.pcs)
if numFrames < len(programCounters.pcs) {
break
}
// Don't put the too-short counter slice back into the pool; this lets
// the pool adjust if we consistently take deep stacktraces.
programCounters = newProgramCounters(len(programCounters.pcs) * 2)
// captureStacktrace captures a stack trace of the specified depth, skipping
// the provided number of frames. skip=0 identifies the caller of
// captureStacktrace.
//
// The caller must call Free on the returned stacktrace after using it.
func captureStacktrace(skip int, depth stacktraceDepth) *stacktrace {
stack := _stacktracePool.Get().(*stacktrace)

switch depth {
case stacktraceFirst:
stack.pcs = stack.storage[:1]
case stacktraceFull:
stack.pcs = stack.storage
}

i := 0
frames := runtime.CallersFrames(programCounters.pcs[:numFrames])
// Unlike other "skip"-based APIs, skip=0 identifies runtime.Callers
// itself. +2 to skip captureStacktrace and runtime.Callers.
numFrames := runtime.Callers(
skip+2,
stack.pcs,
)

// Note: On the last iteration, frames.Next() returns false, with a valid
// frame, but we ignore this frame. The last frame is a a runtime frame which
// adds noise, since it's only either runtime.main or runtime.goexit.
for frame, more := frames.Next(); more; frame, more = frames.Next() {
if i != 0 {
buffer.AppendByte('\n')
// runtime.Callers truncates the recorded stacktrace if there is no
// room in the provided slice. For the full stack trace, keep expanding
// storage until there are fewer frames than there is room.
if depth == stacktraceFull {
pcs := stack.pcs
for numFrames == len(pcs) {
pcs = make([]uintptr, len(pcs)*2)
numFrames = runtime.Callers(skip+2, pcs)
}
i++
buffer.AppendString(frame.Function)
buffer.AppendByte('\n')
buffer.AppendByte('\t')
buffer.AppendString(frame.File)
buffer.AppendByte(':')
buffer.AppendInt(int64(frame.Line))

// Discard old storage instead of returning it to the pool.
// This will adjust the pool size over time if stack traces are
// consistently very deep.
stack.storage = pcs
stack.pcs = pcs[:numFrames]
} else {
stack.pcs = stack.pcs[:numFrames]
Comment on lines +102 to +103
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to branch here in the stackTraceFirst case, the loop will just be a no-op right? (or is there a performance difference)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Responded offline: Yes, we need the branch here for when numFrames=0

}

stack.frames = runtime.CallersFrames(stack.pcs)
return stack
}

// Free releases resources associated with this stacktrace
// and returns it back to the pool.
func (st *stacktrace) Free() {
st.frames = nil
st.pcs = nil
_stacktracePool.Put(st)
}

// Count reports the total number of frames in this stacktrace.
// Count DOES NOT change as Next is called.
func (st *stacktrace) Count() int {
return len(st.pcs)
}

// Next returns the next frame in the stack trace,
// and a boolean indicating whether there are more after it.
func (st *stacktrace) Next() (_ runtime.Frame, more bool) {
return st.frames.Next()
}

func takeStacktrace(skip int) string {
stack := captureStacktrace(skip+1, stacktraceFull)
defer stack.Free()

buffer := bufferpool.Get()
defer buffer.Free()

stackfmt := newStackFormatter(buffer)
stackfmt.FormatStack(stack)
return buffer.String()
}

type programCounters struct {
pcs []uintptr
// stackFormatter formats a stack trace into a readable string representation.
type stackFormatter struct {
i int // number of frames already written
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this can be a bool since we only use this for checking if there were already frames written or not in FormatFrame.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replacing with a bool

b *buffer.Buffer
}

// newStackFormatter builds a new stackFormatter.
func newStackFormatter(b *buffer.Buffer) stackFormatter {
return stackFormatter{b: b}
}

// FormatStack formats all remaining frames in the provided stacktrace -- minus
// the final runtime.main/runtime.goexit frame.
func (sf *stackFormatter) FormatStack(stack *stacktrace) {
// Note: On the last iteration, frames.Next() returns false, with a valid
// frame, but we ignore this frame. The last frame is a a runtime frame which
// adds noise, since it's only either runtime.main or runtime.goexit.
for frame, more := stack.Next(); more; frame, more = stack.Next() {
sf.FormatFrame(frame)
}
}

func newProgramCounters(size int) *programCounters {
return &programCounters{make([]uintptr, size)}
// FormatFrame formats the given frame.
func (sf *stackFormatter) FormatFrame(frame runtime.Frame) {
if sf.i != 0 {
sf.b.AppendByte('\n')
}
sf.i++
sf.b.AppendString(frame.Function)
sf.b.AppendByte('\n')
sf.b.AppendByte('\t')
sf.b.AppendString(frame.File)
sf.b.AppendByte(':')
sf.b.AppendInt(int64(frame.Line))
}
32 changes: 32 additions & 0 deletions stacktrace_test.go
Expand Up @@ -21,6 +21,7 @@
package zap

import (
"bytes"
"strings"
"testing"

Expand Down Expand Up @@ -67,8 +68,39 @@ func TestTakeStacktraceWithSkipInnerFunc(t *testing.T) {
)
}

func TestTakeStacktraceDeepStack(t *testing.T) {
const (
N = 500
withStackDepthName = "go.uber.org/zap.withStackDepth"
)
withStackDepth(N, func() {
trace := takeStacktrace(0)
for found := 0; found < N; found++ {
i := strings.Index(trace, withStackDepthName)
if i < 0 {
t.Fatalf(`expected %v occurrences of %q, found %d`,
N, withStackDepthName, found)
}
trace = trace[i+len(withStackDepthName):]
}
})
}

func BenchmarkTakeStacktrace(b *testing.B) {
for i := 0; i < b.N; i++ {
takeStacktrace(0)
}
}

func withStackDepth(depth int, f func()) {
var recurse func(rune) rune
recurse = func(r rune) rune {
if r > 0 {
bytes.Map(recurse, []byte(string([]rune{r - 1})))
} else {
f()
}
return 0
}
recurse(rune(depth))
}