From f0f2f309cfe30f6b5fae25f847df77bc21090809 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 10 Sep 2021 11:43:20 -0700 Subject: [PATCH] zapcore: Address TestSamplerConcurrent flakiness The TestSamplerConcurrent test frequently fails with the following error in CI: --- FAIL: TestSamplerConcurrent (0.25s) sampler_test.go:198: Error Trace: sampler_test.go:198 Error: Max difference between 1250 and 1004 allowed is 125, but difference was 246 Test: TestSamplerConcurrent Messages: Unexpected number of logs FAIL The test is intended to verify that despite an onsalught of messages from multiple goroutines, we only allow at most `logsPerTick` messages per `tick`. This was accompilshed by spin-looping 10 goroutines for `numTicks`, each logging one of `numMessages` different messages, and then verifying the final log count. The source of flakiness here was the non-determinism in how far a goroutine would get in logging enough messages such that the sampler would be engaged. In #948, we added a `zapcore.Clock` interface with a ticker and a mock implementation. Move that to `ztest` for use here. To unflake the test, use the mock clock to control time and for each goroutine, log `logsPerTick*2` messages `numTicks` times. This gives us, for numGoroutines (10) for numTicks (25) log logsPerTick * 2 (50) messages We end up attempting to log a total of, (numGoroutines * numTicks * logsPerTick * 2) messages = (10 * 25 * 50) messages = 12500 messages Of these, the following should be sampled: numMessages * numTicks * logsPerTick = 5 * 10 * 25 = 1250 Everything else should be dropped. For extra confidence, use a SamplerHook (added in #813) to verify that the number of sampled and dropped messages meet expectations. --- zapcore/sampler_test.go | 77 +++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/zapcore/sampler_test.go b/zapcore/sampler_test.go index 5bcc37a34..fbcdd46cc 100644 --- a/zapcore/sampler_test.go +++ b/zapcore/sampler_test.go @@ -22,6 +22,7 @@ package zapcore_test import ( "fmt" + "runtime" "sync" "testing" "time" @@ -158,50 +159,80 @@ func TestSamplerConcurrent(t *testing.T) { numMessages = 5 numTicks = 25 numGoroutines = 10 + tick = 10 * time.Millisecond + + // We'll make a total of, + // (numGoroutines * numTicks * logsPerTick * 2) log attempts + // with numMessages unique messages. + numLogAttempts = numGoroutines * logsPerTick * numTicks * 2 + // Of those, we'll accept (logsPerTick * numTicks) entries + // for each unique message. expectedCount = numMessages * logsPerTick * numTicks + // The rest will be dropped. + expectedDropped = numLogAttempts - expectedCount ) - tick := ztest.Timeout(10 * time.Millisecond) + clock := ztest.NewMockClock() + cc := &countingCore{} - sampler := NewSamplerWithOptions(cc, tick, logsPerTick, 100000) - var ( - done atomic.Bool - wg sync.WaitGroup - ) + hook, dropped, sampled := makeSamplerCountingHook() + sampler := NewSamplerWithOptions(cc, tick, logsPerTick, 100000, SamplerHook(hook)) + + stop := make(chan struct{}) + var wg sync.WaitGroup for i := 0; i < numGoroutines; i++ { wg.Add(1) - go func(i int) { + go func(i int, ticker *time.Ticker) { defer wg.Done() + defer ticker.Stop() for { - if done.Load() { + select { + case <-stop: return - } - msg := fmt.Sprintf("msg%v", i%numMessages) - ent := Entry{Level: DebugLevel, Message: msg, Time: time.Now()} - if ce := sampler.Check(ent, nil); ce != nil { - ce.Write() - } - // Give a chance for other goroutines to run. - time.Sleep(time.Microsecond) + case <-ticker.C: + for j := 0; j < logsPerTick*2; j++ { + msg := fmt.Sprintf("msg%v", i%numMessages) + ent := Entry{ + Level: DebugLevel, + Message: msg, + Time: clock.Now(), + } + if ce := sampler.Check(ent, nil); ce != nil { + ce.Write() + } + + // Give a chance for other goroutines to run. + runtime.Gosched() + } + } } - }(i) + }(i, clock.NewTicker(tick)) } - time.AfterFunc(numTicks*tick, func() { - done.Store(true) - }) + clock.Add(tick * numTicks) + close(stop) wg.Wait() - assert.InDelta( + assert.Equal( t, expectedCount, - cc.logs.Load(), - expectedCount/10, + int(cc.logs.Load()), "Unexpected number of logs", ) + assert.Equal(t, + expectedCount, + int(sampled.Load()), + "Unexpected number of logs sampled", + ) + assert.Equal(t, + expectedDropped, + int(dropped.Load()), + "Unexpected number of logs dropped", + ) + } func TestSamplerRaces(t *testing.T) {