uber-go · abhinav · Apr 21, 2020 · Mar 5, 2020 · Apr 4, 2020 · Apr 4, 2020
diff --git a/benchmarks/zap_test.go b/benchmarks/zap_test.go
@@ -116,7 +116,7 @@ func newZapLogger(lvl zapcore.Level) *zap.Logger {
 }
 
 func newSampledLogger(lvl zapcore.Level) *zap.Logger {
-	return zap.New(zapcore.NewSampler(
+	return zap.New(zapcore.NewSamplerWithOptions(
 		newZapLogger(zap.DebugLevel).Core(),
 		100*time.Millisecond,
 		10, // first

diff --git a/config.go b/config.go
@@ -32,10 +32,15 @@ import (
 // global CPU and I/O load that logging puts on your process while attempting
 // to preserve a representative subset of your logs.
 //
-// Values configured here are per-second. See zapcore.NewSampler for details.
+// Hook is called whenever a Sampler makes a decision. Currently, whenever a
+// log is dropped.
-// Hook is called whenever a Sampler makes a decision. Currently, whenever a
-// log is dropped.
+// If specified, the Sampler will invoke the Hook after each decision.
-// Hook is called whenever a Sampler makes a decision. Currently, whenever a
-// log is dropped.
+// If specified, the Sampler will invoke the Hook after each decision.
+//
+// Values configured here are per-second. See zapcore.NewSamplerWithOptions for
+// details.
 type SamplingConfig struct {
 	Initial    int `json:"initial" yaml:"initial"`
 	Thereafter int `json:"thereafter" yaml:"thereafter"`
+	Hook       func(zapcore.Entry, zapcore.SamplingDecision)
-	Hook       func(zapcore.Entry, zapcore.SamplingDecision)
+	Hook       func(zapcore.Entry, zapcore.SamplingDecision) `json:"-" yaml:"-"`
-	Hook       func(zapcore.Entry, zapcore.SamplingDecision)
+	Hook       func(zapcore.Entry, zapcore.SamplingDecision) `json:"-" yaml:"-"`
 }
 
 // Config offers a declarative way to construct a logger. It doesn't do
@@ -208,9 +213,19 @@ func (cfg Config) buildOptions(errSink zapcore.WriteSyncer) []Option {
 		opts = append(opts, AddStacktrace(stackLevel))
 	}
 
-	if cfg.Sampling != nil {
+	if scfg := cfg.Sampling; scfg != nil {
 		opts = append(opts, WrapCore(func(core zapcore.Core) zapcore.Core {
-			return zapcore.NewSampler(core, time.Second, int(cfg.Sampling.Initial), int(cfg.Sampling.Thereafter))
+			var samplerOpts []zapcore.SamplerOption
+			if scfg.Hook != nil {
+				samplerOpts = append(samplerOpts, zapcore.SamplerHook(scfg.Hook))
+			}
+			return zapcore.NewSamplerWithOptions(
+				core,
+				time.Second,
+				cfg.Sampling.Initial,
+				cfg.Sampling.Thereafter,
+				samplerOpts...,
+			)
 		}))
 	}
 

diff --git a/config_test.go b/config_test.go
@@ -27,6 +27,7 @@ import (
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"go.uber.org/atomic"
 	"go.uber.org/zap/zapcore"
 )
 
@@ -144,3 +145,72 @@ func TestConfigWithMissingAttributes(t *testing.T) {
 		})
 	}
 }
+
+func makeSamplerCountingHook() (func(zapcore.Entry, zapcore.SamplingDecision),
+	*atomic.Int64, *atomic.Int64) {
+	droppedCount := &atomic.Int64{}
+	sampledCount := &atomic.Int64{}
-	droppedCount := &atomic.Int64{}
-	sampledCount := &atomic.Int64{}
+	droppedCount := new(atomic.Int64)
+	sampledCount := new(atomic.Int64)
-	droppedCount := &atomic.Int64{}
-	sampledCount := &atomic.Int64{}
+	var dropped, sampled atomic.Int64
-	droppedCount := &atomic.Int64{}
-	sampledCount := &atomic.Int64{}
+	droppedCount := new(atomic.Int64)
+	sampledCount := new(atomic.Int64)
-	droppedCount := &atomic.Int64{}
-	sampledCount := &atomic.Int64{}
+	var dropped, sampled atomic.Int64
+	h := func(_ zapcore.Entry, dec zapcore.SamplingDecision) {
+		if dec == zapcore.LogDropped {
+			droppedCount.Inc()
+		} else if dec == zapcore.LogSampled {
+			sampledCount.Inc()
+		}
+	}
+	return h, droppedCount, sampledCount
+}
+
+func TestConfigWithSamplingHook(t *testing.T) {
+	shook, dcount, scount := makeSamplerCountingHook()
+	cfg := Config{
+		Level:       NewAtomicLevelAt(InfoLevel),
+		Development: false,
+		Sampling: &SamplingConfig{
+			Initial:    100,
+			Thereafter: 100,
+			Hook:       shook,
+		},
+		Encoding:         "json",
+		EncoderConfig:    NewProductionEncoderConfig(),
+		OutputPaths:      []string{"stderr"},
+		ErrorOutputPaths: []string{"stderr"},
+	}
+	expectN := 2 + 100 + 1 // 2 from initial logs, 100 initial sampled logs, 1 from off-by-one in sampler
+	expectRe := `{"level":"info","caller":"zap/config_test.go:\d+","msg":"info","k":"v","z":"zz"}` + "\n" +
+		`{"level":"warn","caller":"zap/config_test.go:\d+","msg":"warn","k":"v","z":"zz"}` + "\n"
+	expectDropped := 99  // 200 - 100 initial - 1 thereafter
+	expectSampled := 103 // 2 from initial + 100 + 1 thereafter
+
+	temp, err := ioutil.TempFile("", "zap-prod-config-test")
+	require.NoError(t, err, "Failed to create temp file.")
+	defer func() {
+		err := os.Remove(temp.Name())
+		if err != nil {
+			return
+		}
+	}()
+
+	cfg.OutputPaths = []string{temp.Name()}
+	cfg.EncoderConfig.TimeKey = "" // no timestamps in tests
+	cfg.InitialFields = map[string]interface{}{"z": "zz", "k": "v"}
+
+	hook, count := makeCountingHook()
+	logger, err := cfg.Build(Hooks(hook))
+	require.NoError(t, err, "Unexpected error constructing logger.")
+
+	logger.Debug("debug")
+	logger.Info("info")
+	logger.Warn("warn")
+
+	byteContents, err := ioutil.ReadAll(temp)
+	require.NoError(t, err, "Couldn't read log contents from temp file.")
+	logs := string(byteContents)
+	assert.Regexp(t, expectRe, logs, "Unexpected log output.")
+
+	for i := 0; i < 200; i++ {
+		logger.Info("sampling")
+	}
+	assert.Equal(t, int64(expectN), count.Load(), "Hook called an unexpected number of times.")
+	assert.Equal(t, int64(expectDropped), dcount.Load())
+	assert.Equal(t, int64(expectSampled), scount.Load())
+}
diff --git a/zapcore/sampler.go b/zapcore/sampler.go
@@ -81,17 +81,87 @@ func (c *counter) IncCheckReset(t time.Time, tick time.Duration) uint64 {
 	return 1
 }
 
+// SamplingDecision represents a decision made by sampler.
-// SamplingDecision represents a decision made by sampler.
+// SamplingDecision is a decision made by sampler.
-// SamplingDecision represents a decision made by sampler.
+// SamplingDecision is a decision made by sampler.
+type SamplingDecision uint8
+
+const (
+	// LogDropped means that a log was dropped.
-	// LogDropped means that a log was dropped.
+	// LogDropped indicates that the Sampler dropped a log entry.
-	// LogDropped means that a log was dropped.
+	// LogDropped indicates that the Sampler dropped a log entry.
+	LogDropped SamplingDecision = iota
+	// LogSampled means that a log was successfully sampled.
-	// LogSampled means that a log was successfully sampled.
+	// LogSampled indicates that the Sampler sampled a log entry.
-	// LogSampled means that a log was successfully sampled.
+	// LogSampled indicates that the Sampler sampled a log entry.
+	LogSampled
+)
+
+// optionFunc wraps a func so it satisfies the SamplerOption interface.
+type optionFunc func(*sampler)
+
+func (f optionFunc) apply(s *sampler) {
+	f(s)
+}
+
+// SamplerOption configures a Sampler option.
-// SamplerOption configures a Sampler option.
+// SamplerOption configures a Sampler.
-// SamplerOption configures a Sampler option.
+// SamplerOption configures a Sampler.
+type SamplerOption interface {
+	apply(*sampler)
+}
+
+// NopSamplingHook is the default hook used by sampler.
+func NopSamplingHook(_ Entry, _ SamplingDecision) {}
-func NopSamplingHook(_ Entry, _ SamplingDecision) {}
+func NopSamplingHook(Entry, SamplingDecision) {}
-func NopSamplingHook(_ Entry, _ SamplingDecision) {}
+func NopSamplingHook(Entry, SamplingDecision) {}
+
+// SamplerHook registers a function  which will be called when Sampler makes a
+// decision. Currently a hook is called when a log is dropped and
+// zapcore.LogDropped decision is emitted.
-// decision. Currently a hook is called when a log is dropped and
-// zapcore.LogDropped decision is emitted.
+// decision.
-// decision. Currently a hook is called when a log is dropped and
-// zapcore.LogDropped decision is emitted.
+// decision.
+//
+// This hook is useful for side effects, for example emitting number of dropped
+// logs. Note, there is no access to Fields in this hook. In the future, this
+// hook can be expanded to emit whether this is first entry that was dropped,
+// first after a period, etc.
-// This hook is useful for side effects, for example emitting number of dropped
-// logs. Note, there is no access to Fields in this hook. In the future, this
-// hook can be expanded to emit whether this is first entry that was dropped,
-// first after a period, etc.
+// This hook may be used to get visibility into the performance of the sampler.
+// For example, use it to track metrics of dropped versus sampled logs.
-// This hook is useful for side effects, for example emitting number of dropped
-// logs. Note, there is no access to Fields in this hook. In the future, this
-// hook can be expanded to emit whether this is first entry that was dropped,
-// first after a period, etc.
+// This hook may be used to get visibility into the performance of the sampler.
+// For example, use it to track metrics of dropped versus sampled logs.
+func SamplerHook(hook func(entry Entry, dec SamplingDecision)) SamplerOption {
+	return optionFunc(func(s *sampler) {
+		s.hook = hook
+	})
+}
+
+// NewSamplerWithOptions creates a Core that samples incoming entries, which
+// caps the CPU and I/O load of logging while attempting to preserve a
+// representative subset of your logs.
+//
+// Zap samples by logging the first N entries with a given level and message
+// each tick. If more Entries with the same level and message are seen during
+// the same interval, every Mth message is logged and the rest are dropped.
+//
+// Sampler also accepts an optional hook that can be used to count number of
+// dropped logs.
-// Sampler also accepts an optional hook that can be used to count number of
-// dropped logs.
+// Sampler can be configured to report sampling decisions with the SamplerHook
+// option.
-// Sampler also accepts an optional hook that can be used to count number of
-// dropped logs.
+// Sampler can be configured to report sampling decisions with the SamplerHook
+// option.
+//
+// Keep in mind that zap's sampling implementation is optimized for speed over
+// absolute precision; under load, each tick may be slightly over- or
+// under-sampled.
+func NewSamplerWithOptions(core Core, tick time.Duration, first, thereafter int, opts ...SamplerOption) Core {
+	s := &sampler{
+		Core:       core,
+		tick:       tick,
+		counts:     newCounters(),
+		first:      uint64(first),
+		thereafter: uint64(thereafter),
+		hook:       NopSamplingHook,
+	}
+	for _, opt := range opts {
+		opt.apply(s)
+	}
+
+	return s
+}
+
 type sampler struct {
 	Core
 
 	counts            *counters
 	tick              time.Duration
 	first, thereafter uint64
+	hook              func(Entry, SamplingDecision)
 }
 
-// NewSampler creates a Core that samples incoming entries, which caps the CPU
-// and I/O load of logging while attempting to preserve a representative subset
-// of your logs.
+// NewSampler creates a Core that samples incoming entries, which
+// caps the CPU and I/O load of logging while attempting to preserve a
+// representative subset of your logs.
 //
 // Zap samples by logging the first N entries with a given level and message
 // each tick. If more Entries with the same level and message are seen during
@@ -100,13 +170,16 @@ type sampler struct {
 // Keep in mind that zap's sampling implementation is optimized for speed over
 // absolute precision; under load, each tick may be slightly over- or
 // under-sampled.
+//
+// Deprecated: use NewSamplerWithOptions.
 func NewSampler(core Core, tick time.Duration, first, thereafter int) Core {
 	return &sampler{
 		Core:       core,
 		tick:       tick,
 		counts:     newCounters(),
 		first:      uint64(first),
 		thereafter: uint64(thereafter),
+		hook:       NopSamplingHook,
 	}
-	return &sampler{
-		Core:       core,
-		tick:       tick,
-		counts:     newCounters(),
-		first:      uint64(first),
-		thereafter: uint64(thereafter),
-		hook:       NopSamplingHook,
-	}
+	return NewSamplerWithOptions(core, tick, first, thereafter)
-	return &sampler{
-		Core:       core,
-		tick:       tick,
-		counts:     newCounters(),
-		first:      uint64(first),
-		thereafter: uint64(thereafter),
-		hook:       NopSamplingHook,
-	}
+	return NewSamplerWithOptions(core, tick, first, thereafter)
 }
 
@@ -117,6 +190,7 @@ func (s *sampler) With(fields []Field) Core {
 		counts:     s.counts,
 		first:      s.first,
 		thereafter: s.thereafter,
+		hook:       s.hook,
 	}
 }
 
@@ -128,7 +202,9 @@ func (s *sampler) Check(ent Entry, ce *CheckedEntry) *CheckedEntry {
 	counter := s.counts.get(ent.Level, ent.Message)
 	n := counter.IncCheckReset(ent.Time, s.tick)
 	if n > s.first && (n-s.first)%s.thereafter != 0 {
+		s.hook(ent, LogDropped)
 		return ce
 	}
+	s.hook(ent, LogSampled)
 	return s.Core.Check(ent, ce)
 }
diff --git a/zapcore/sampler_bench_test.go b/zapcore/sampler_bench_test.go
@@ -25,6 +25,7 @@ import (
 	"testing"
 	"time"
 
+	"go.uber.org/atomic"
 	"go.uber.org/zap/internal/ztest"
 	. "go.uber.org/zap/zapcore"
 )
@@ -203,7 +204,7 @@ var counterTestCases = [][]string{
 func BenchmarkSampler_Check(b *testing.B) {
 	for _, keys := range counterTestCases {
 		b.Run(fmt.Sprintf("%v keys", len(keys)), func(b *testing.B) {
-			fac := NewSampler(
+			fac := NewSamplerWithOptions(
 				NewCore(
 					NewJSONEncoder(testEncoderConfig()),
 					&ztest.Discarder{},
@@ -228,3 +229,50 @@ func BenchmarkSampler_Check(b *testing.B) {
 		})
 	}
 }
+
+func makeSamplerCountingHook() (func(_ Entry, dec SamplingDecision), *atomic.Int64, *atomic.Int64) {
+	droppedCount := &atomic.Int64{}
+	sampledCount := &atomic.Int64{}
+	h := func(_ Entry, dec SamplingDecision) {
+		if dec == LogDropped {
+			droppedCount.Inc()
+		} else if dec == LogSampled {
+			sampledCount.Inc()
+		}
+	}
+	return h, droppedCount, sampledCount
+}
+
+func BenchmarkSampler_CheckWithHook(b *testing.B) {
+	hook, _, _ := makeSamplerCountingHook()
+	for _, keys := range counterTestCases {
+		b.Run(fmt.Sprintf("%v keys", len(keys)), func(b *testing.B) {
+			fac := NewSamplerWithOptions(
+				NewCore(
+					NewJSONEncoder(testEncoderConfig()),
+					&ztest.Discarder{},
+					DebugLevel,
+				),
+				time.Millisecond,
+				1,
+				1000,
+				SamplerHook(hook),
+			)
+			b.ResetTimer()
+			b.RunParallel(func(pb *testing.PB) {
+				i := 0
+				for pb.Next() {
+					ent := Entry{
+						Level:   DebugLevel + Level(i%4),
+						Message: keys[i],
+					}
+					_ = fac.Check(ent, nil)
+					i++
+					if n := len(keys); i >= n {
+						i -= n
+					}
+				}
+			})
+		})
+	}
+}
diff --git a/zapcore/sampler_test.go b/zapcore/sampler_test.go
@@ -37,6 +37,7 @@ import (
 
 func fakeSampler(lvl LevelEnabler, tick time.Duration, first, thereafter int) (Core, *observer.ObservedLogs) {
 	core, logs := observer.New(lvl)
+	// Keep using deprecated constructor for cc.
 	core = NewSampler(core, tick, first, thereafter)
 	return core, logs
 }
@@ -162,7 +163,7 @@ func TestSamplerConcurrent(t *testing.T) {
 
 	tick := ztest.Timeout(10 * time.Millisecond)
 	cc := &countingCore{}
-	sampler := NewSampler(cc, tick, logsPerTick, 100000)
+	sampler := NewSamplerWithOptions(cc, tick, logsPerTick, 100000)
 
 	var (
 		done atomic.Bool