Skip to content

Commit

Permalink
Resolved race condition in collector when calling Shutdown (#4878)
Browse files Browse the repository at this point in the history
* Refactored collector shutdown pattern to remove race condition

Signed-off-by: Corbin Phelps <corbin.phelps@bluemedora.com>

* Updated changelog

Signed-off-by: Corbin Phelps <corbin.phelps@bluemedora.com>

* Remove Created state from collector to make shutdown behave more in line with language standards

Signed-off-by: Corbin Phelps <corbin.phelps@bluemedora.com>

* Removed log from shutdown recover that caused race condition

Signed-off-by: Corbin Phelps <corbin.phelps@bluemedora.com>

* Added lint ignore for shutdown recover

Signed-off-by: Corbin Phelps <corbin.phelps@bluemedora.com>
  • Loading branch information
Corbin Phelps committed Mar 2, 2022
1 parent 5f2d874 commit 02d0fe5
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
that calls shutdown to terminate it; this is done per memory limiter instance.
Added memory limiter factory to cache initiated object and be reused by similar config. This guarantees a single
running `checkMemLimits` per config (#4886)
- Resolved race condition in collector when calling `Shutdown` (#4878)

## v0.45.0 Beta

Expand Down
17 changes: 10 additions & 7 deletions service/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ func New(set CollectorSettings) (*Collector, error) {

set: set,
state: state,

shutdownChan: make(chan struct{}),
}, nil

}
Expand All @@ -113,12 +115,14 @@ func (col *Collector) GetLogger() *zap.Logger {

// Shutdown shuts down the collector server.
func (col *Collector) Shutdown() {
defer func() {
if r := recover(); r != nil {
col.logger.Info("shutdownChan already closed")
}
}()
close(col.shutdownChan)
// Only shutdown if we're in a Running or Starting State else noop
state := col.GetState()
if state == Running || state == Starting {
defer func() {
recover() // nolint:errcheck
}()
close(col.shutdownChan)
}
}

// runAndWaitForShutdownEvent waits for one of the shutdown events that can happen.
Expand All @@ -131,7 +135,6 @@ func (col *Collector) runAndWaitForShutdownEvent(ctx context.Context) error {
signal.Notify(col.signalsChannel, os.Interrupt, syscall.SIGTERM)
}

col.shutdownChan = make(chan struct{})
col.setCollectorState(Running)
LOOP:
for {
Expand Down
36 changes: 36 additions & 0 deletions service/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,42 @@ func TestCollector_ShutdownNoop(t *testing.T) {
require.NotPanics(t, func() { col.Shutdown() })
}

func TestCollector_ShutdownBeforeRun(t *testing.T) {
// use a mock AppTelemetry struct to return an error on shutdown
preservedAppTelemetry := collectorTelemetry
collectorTelemetry = &colTelemetry{}
defer func() { collectorTelemetry = preservedAppTelemetry }()

factories, err := testcomponents.NewDefaultFactories()
require.NoError(t, err)

set := CollectorSettings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: factories,
ConfigProvider: MustNewDefaultConfigProvider([]string{filepath.Join("testdata", "otelcol-config.yaml")}, nil),
}
col, err := New(set)
require.NoError(t, err)

// Calling shutdown before collector is running should cause it to return quickly
col.Shutdown()

colDone := make(chan struct{})
go func() {
defer close(colDone)
colErr := col.Run(context.Background())
if colErr != nil {
err = colErr
}
}()

col.Shutdown()
<-colDone
assert.Eventually(t, func() bool {
return Closed == col.GetState()
}, time.Second*2, time.Millisecond*200)
}

type mockColTelemetry struct{}

func (tel *mockColTelemetry) init(*Collector) error {
Expand Down

0 comments on commit 02d0fe5

Please sign in to comment.