Skip to content

Commit

Permalink
profiler: fix TestStopLatency (take 2)
Browse files Browse the repository at this point in the history
PR #1297 attempted to fix the flakiness noted in issue #1294 by creating
two seperate tests: one which runs a long profile to test the latency of
stopping the profile, and another which runs short profiles and makes
uploading hang indefinitely. However, the upload test had such a short
profiling period that the inner select statement in (*profiler).collect
could take several iterations to actually cancel due to the Go runtime
randomly choosing select cases when multiple cases are ready.

In addition, the "stop-profiler" case didn't actually test what it was
intended to test since profiling doesn't actually run until one full
profiling period has elapsed. Since the period was set to an hour, Stop
was called withouth profiling actually started.

Merge the two tests back into one. This brings us full-circle back to
the original test, but with a more generous window on how long stopping
should take and without relying on modifying internal functions to make
the test work.
  • Loading branch information
nsrip-dd committed May 25, 2022
1 parent f6c2e59 commit 3bcd3ff
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 49 deletions.
83 changes: 34 additions & 49 deletions profiler/profiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,58 +184,43 @@ func TestStartStopIdempotency(t *testing.T) {
// TestStopLatency tries to make sure that calling Stop() doesn't hang, i.e.
// that ongoing profiling or upload operations are immediately canceled.
func TestStopLatency(t *testing.T) {
t.Skip("broken test, see issue #1294")

t.Run("stop-profiles", func(t *testing.T) {
Start(
WithPeriod(time.Hour),
CPUDuration(time.Hour),
)

// give profiling time to start
time.Sleep(50 * time.Millisecond)
start := time.Now()
Stop()
elapsed := time.Since(start)
// CPU profiling polls in 100 millisecond intervals and this can't be
// interrupted by pprof.StopCPUProfile, so we can't guarantee profiling
// will stop faster than 200 milliseconds (one interval to read
// remaining samples, second interval to detect profiling has stopped).
// Set a conservative upper bound on that stop time plus profile
// serialization.
if elapsed > 500*time.Millisecond {
t.Errorf("profiler took %v to stop", elapsed)
received := make(chan struct{})
stop := make(chan struct{})
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
select {
case received <- struct{}{}:
default:
}
})
<-stop
}))
defer server.Close()
defer close(stop)

t.Run("stop-upload", func(t *testing.T) {
received := make(chan struct{})
stop := make(chan struct{})
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
select {
case received <- struct{}{}:
default:
}
<-stop
}))
defer server.Close()
defer close(stop)

Start(
WithAgentAddr(server.Listener.Addr().String()),
WithPeriod(10*time.Millisecond),
CPUDuration(10*time.Millisecond),
WithUploadTimeout(time.Hour),
)
Start(
WithAgentAddr(server.Listener.Addr().String()),
WithPeriod(time.Second),
CPUDuration(time.Second),
WithUploadTimeout(time.Hour),
)

<-received
start := time.Now()
Stop()
elapsed := time.Since(start)
if elapsed > 500*time.Millisecond {
t.Errorf("profiler took %v to stop", elapsed)
}
})
<-received
// received indicates that an upload has started, and due to waiting on
// stop the upload won't actually complete. So we know calling Stop()
// should interrupt the upload. Ideally profiling is also currently
// running, though that is harder to detect and guarantee.
start := time.Now()
Stop()

// CPU profiling can take up to 200ms to stop. This is because the inner
// loop of CPU profiling has an uninterruptible 100ms sleep. Each
// iteration reads available profile samples, so it can take two
// iterations to stop: one to read the remaining samples, and a second
// to detect that there are no more samples and exit. We give extra time
// on top of that to account for CI slowness.
elapsed := time.Since(start)
if elapsed > 500*time.Millisecond {
t.Errorf("profiler took %v to stop", elapsed)
}
}

func TestProfilerInternal(t *testing.T) {
Expand Down
3 changes: 3 additions & 0 deletions profiler/upload.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ func (p *profiler) doRequest(bat batch) error {
cancel()
}()
req, err := http.NewRequestWithContext(ctx, "POST", p.cfg.targetURL, body)
if err != nil {
return err
}
if p.cfg.apiKey != "" {
req.Header.Set("DD-API-KEY", p.cfg.apiKey)
}
Expand Down

0 comments on commit 3bcd3ff

Please sign in to comment.