diff --git a/pkg/manager/internal.go b/pkg/manager/internal.go index d253b9a6ad..fd92ea1cb4 100644 --- a/pkg/manager/internal.go +++ b/pkg/manager/internal.go @@ -117,6 +117,10 @@ type controllerManager struct { // it must be deferred until after gracefulShutdown is done. leaderElectionCancel context.CancelFunc + // leaderElectionStopped is an internal channel used to signal the stopping procedure that the + // LeaderElection.Run(...) function has returned and the shutdown can proceed. + leaderElectionStopped chan struct{} + // stop procedure engaged. In other words, we should not add anything else to the manager stopProcedureEngaged bool @@ -545,11 +549,16 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e // waitForRunnableToEnd blocks until all runnables ended or the // tearDownTimeout was reached. In the latter case, an error is returned. -func (cm *controllerManager) waitForRunnableToEnd(shutdownCancel context.CancelFunc) error { +func (cm *controllerManager) waitForRunnableToEnd(shutdownCancel context.CancelFunc) (retErr error) { // Cancel leader election only after we waited. It will os.Exit() the app for safety. defer func() { - if cm.leaderElectionCancel != nil { + if retErr == nil && cm.leaderElectionCancel != nil { + // After asking the context to be cancelled, make sure + // we wait for the leader stopped channel to be closed, otherwise + // we might encounter race conditions between this code + // and the event recorder, which is used within leader election code. cm.leaderElectionCancel() + <-cm.leaderElectionStopped } }() @@ -652,7 +661,11 @@ func (cm *controllerManager) startLeaderElection() (err error) { } // Start the leader elector process - go l.Run(ctx) + go func() { + l.Run(ctx) + <-ctx.Done() + close(cm.leaderElectionStopped) + }() return nil } diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 2f676ad7a0..3fd790d34e 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -350,6 +350,7 @@ func New(config *rest.Config, options Options) (Manager, error) { livenessEndpointName: options.LivenessEndpointName, gracefulShutdownTimeout: *options.GracefulShutdownTimeout, internalProceduresStop: make(chan struct{}), + leaderElectionStopped: make(chan struct{}), }, nil }