From f0f2f309cfe30f6b5fae25f847df77bc21090809 Mon Sep 17 00:00:00 2001
From: Abhinav Gupta <abg@uber.com>
Date: Fri, 10 Sep 2021 11:43:20 -0700
Subject: [PATCH] zapcore: Address TestSamplerConcurrent flakiness

The TestSamplerConcurrent test frequently fails with the following error
in CI:

    --- FAIL: TestSamplerConcurrent (0.25s)
        sampler_test.go:198:
        	    Error Trace:	sampler_test.go:198
        	    Error:      	Max difference between 1250 and 1004 allowed is 125, but difference was 246
        	    Test:       	TestSamplerConcurrent
        	    Messages:   	Unexpected number of logs
    FAIL

The test is intended to verify that
despite an onsalught of messages from multiple goroutines,
we only allow at most `logsPerTick` messages per `tick`.

This was accompilshed by spin-looping 10 goroutines for `numTicks`,
each logging one of `numMessages` different messages,
and then verifying the final log count.

The source of flakiness here was the non-determinism in
how far a goroutine would get in logging enough messages such that
the sampler would be engaged.

In #948, we added a `zapcore.Clock` interface with a ticker and
a mock implementation.
Move that to `ztest` for use here.

To unflake the test, use the mock clock to control time and
for each goroutine, log `logsPerTick*2` messages `numTicks` times.
This gives us,

    for numGoroutines (10)
        for numTicks (25)
            log logsPerTick * 2 (50) messages

We end up attempting to log a total of,

    (numGoroutines * numTicks * logsPerTick * 2) messages
    = (10 * 25 * 50) messages
    = 12500 messages

Of these, the following should be sampled:

    numMessages * numTicks * logsPerTick
    = 5 * 10 * 25
    = 1250

Everything else should be dropped.

For extra confidence, use a SamplerHook (added in #813) to verify that
the number of sampled and dropped messages meet expectations.
---
 zapcore/sampler_test.go | 77 +++++++++++++++++++++++++++++------------
 1 file changed, 54 insertions(+), 23 deletions(-)

diff --git a/zapcore/sampler_test.go b/zapcore/sampler_test.go
index 5bcc37a34..fbcdd46cc 100644
--- a/zapcore/sampler_test.go
+++ b/zapcore/sampler_test.go
@@ -22,6 +22,7 @@ package zapcore_test
 
 import (
 	"fmt"
+	"runtime"
 	"sync"
 	"testing"
 	"time"
@@ -158,50 +159,80 @@ func TestSamplerConcurrent(t *testing.T) {
 		numMessages   = 5
 		numTicks      = 25
 		numGoroutines = 10
+		tick          = 10 * time.Millisecond
+
+		// We'll make a total of,
+		// (numGoroutines * numTicks * logsPerTick * 2) log attempts
+		// with numMessages unique messages.
+		numLogAttempts = numGoroutines * logsPerTick * numTicks * 2
+		// Of those, we'll accept (logsPerTick * numTicks) entries
+		// for each unique message.
 		expectedCount = numMessages * logsPerTick * numTicks
+		// The rest will be dropped.
+		expectedDropped = numLogAttempts - expectedCount
 	)
 
-	tick := ztest.Timeout(10 * time.Millisecond)
+	clock := ztest.NewMockClock()
+
 	cc := &countingCore{}
-	sampler := NewSamplerWithOptions(cc, tick, logsPerTick, 100000)
 
-	var (
-		done atomic.Bool
-		wg   sync.WaitGroup
-	)
+	hook, dropped, sampled := makeSamplerCountingHook()
+	sampler := NewSamplerWithOptions(cc, tick, logsPerTick, 100000, SamplerHook(hook))
+
+	stop := make(chan struct{})
+	var wg sync.WaitGroup
 	for i := 0; i < numGoroutines; i++ {
 		wg.Add(1)
-		go func(i int) {
+		go func(i int, ticker *time.Ticker) {
 			defer wg.Done()
+			defer ticker.Stop()
 
 			for {
-				if done.Load() {
+				select {
+				case <-stop:
 					return
-				}
-				msg := fmt.Sprintf("msg%v", i%numMessages)
-				ent := Entry{Level: DebugLevel, Message: msg, Time: time.Now()}
-				if ce := sampler.Check(ent, nil); ce != nil {
-					ce.Write()
-				}
 
-				// Give a chance for other goroutines to run.
-				time.Sleep(time.Microsecond)
+				case <-ticker.C:
+					for j := 0; j < logsPerTick*2; j++ {
+						msg := fmt.Sprintf("msg%v", i%numMessages)
+						ent := Entry{
+							Level:   DebugLevel,
+							Message: msg,
+							Time:    clock.Now(),
+						}
+						if ce := sampler.Check(ent, nil); ce != nil {
+							ce.Write()
+						}
+
+						// Give a chance for other goroutines to run.
+						runtime.Gosched()
+					}
+				}
 			}
-		}(i)
+		}(i, clock.NewTicker(tick))
 	}
 
-	time.AfterFunc(numTicks*tick, func() {
-		done.Store(true)
-	})
+	clock.Add(tick * numTicks)
+	close(stop)
 	wg.Wait()
 
-	assert.InDelta(
+	assert.Equal(
 		t,
 		expectedCount,
-		cc.logs.Load(),
-		expectedCount/10,
+		int(cc.logs.Load()),
 		"Unexpected number of logs",
 	)
+	assert.Equal(t,
+		expectedCount,
+		int(sampled.Load()),
+		"Unexpected number of logs sampled",
+	)
+	assert.Equal(t,
+		expectedDropped,
+		int(dropped.Load()),
+		"Unexpected number of logs dropped",
+	)
+
 }
 
 func TestSamplerRaces(t *testing.T) {