From 418c141e649aeec2c31d964e0cf7cd7f7735d767 Mon Sep 17 00:00:00 2001 From: Cory Snider Date: Mon, 22 Aug 2022 22:34:51 -0400 Subject: [PATCH] [20.10 backport] daemon: kill exec process on ctx cancel Terminating the exec process when the context is canceled has been broken since Docker v17.11 so nobody has been able to depend upon that behaviour in five years of releases. We are thus free from backwards- compatibility constraints. conflicts: - minor conflict in daemon/exec.go, as 2ec2b65e45ca6ae28480c6da49aaf06fda1a091f is not in the 20.10 branch, so had to cast the signal to an int. - minor conflict in daemon/health.go, where a comment was updated, which was added in bdc6473d2de1eca6fe5d0c2e59c9411a463281cc, which is not in the 20.10 branch - remove the skip.If() from TestHealthCheckProcessKilled, as the 20.10 branch is not testing on Windows with containerd (and the RuntimeIsWindowsContainerd does not exist), but kept a "FIXME" comment. Co-authored-by: Nicolas De Loof Co-authored-by: Sebastiaan van Stijn Signed-off-by: Nicolas De Loof Signed-off-by: Cory Snider Signed-off-by: Sebastiaan van Stijn (cherry picked from commit 4b84a3321723a849295d5cbf7342ec36077f9179) Signed-off-by: Sebastiaan van Stijn --- daemon/exec.go | 29 ++++++++++------------ integration/container/health_test.go | 37 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 16 deletions(-) diff --git a/daemon/exec.go b/daemon/exec.go index 78a593a6e2184..7ad5c4a17efbc 100644 --- a/daemon/exec.go +++ b/daemon/exec.go @@ -22,9 +22,6 @@ import ( "github.com/sirupsen/logrus" ) -// Seconds to wait after sending TERM before trying KILL -const termProcessTimeout = 10 * time.Second - func (daemon *Daemon) registerExecCommand(container *container.Container, config *exec.Config) { // Storing execs in container in order to kill them gracefully whenever the container is stopped or removed. container.ExecCommands.Add(config.ID, config) @@ -255,7 +252,10 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, stdin CloseStdin: true, } ec.StreamConfig.AttachStreams(&attachConfig) - attachErr := ec.StreamConfig.CopyStreams(ctx, &attachConfig) + // using context.Background() so that attachErr does not race ctx.Done(). + copyCtx, cancel := context.WithCancel(context.Background()) + defer cancel() + attachErr := ec.StreamConfig.CopyStreams(copyCtx, &attachConfig) // Synchronize with libcontainerd event loop ec.Lock() @@ -275,18 +275,15 @@ func (daemon *Daemon) ContainerExecStart(ctx context.Context, name string, stdin select { case <-ctx.Done(): - logrus.Debugf("Sending TERM signal to process %v in container %v", name, c.ID) - daemon.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["TERM"])) - - timeout := time.NewTimer(termProcessTimeout) - defer timeout.Stop() - - select { - case <-timeout.C: - logrus.Infof("Container %v, process %v failed to exit within %v of signal TERM - using the force", c.ID, name, termProcessTimeout) - daemon.containerd.SignalProcess(ctx, c.ID, name, int(signal.SignalMap["KILL"])) - case <-attachErr: - // TERM signal worked + log := logrus. + WithField("container", c.ID). + WithField("exec", name) + log.Debug("Sending KILL signal to container process") + sigCtx, cancelFunc := context.WithTimeout(context.Background(), 30*time.Second) + defer cancelFunc() + err := daemon.containerd.SignalProcess(sigCtx, c.ID, name, int(signal.SignalMap["KILL"])) + if err != nil { + log.WithError(err).Error("Could not send KILL signal to container process") } return ctx.Err() case err := <-attachErr: diff --git a/integration/container/health_test.go b/integration/container/health_test.go index ad0245c6a80d4..d3851edbb7703 100644 --- a/integration/container/health_test.go +++ b/integration/container/health_test.go @@ -2,6 +2,7 @@ package container // import "github.com/docker/docker/integration/container" import ( "context" + "fmt" "testing" "time" @@ -93,6 +94,42 @@ while true; do sleep 1; done poll.WaitOn(t, pollForHealthStatus(ctxPoll, client, id, "healthy"), poll.WithDelay(100*time.Millisecond)) } +// TestHealthCheckProcessKilled verifies that health-checks exec get killed on time-out. +func TestHealthCheckProcessKilled(t *testing.T) { + // FIXME: Broken on Windows + containerd combination + defer setupTest(t)() + ctx := context.Background() + apiClient := testEnv.APIClient() + + cID := container.Run(ctx, t, apiClient, func(c *container.TestContainerConfig) { + c.Config.Healthcheck = &containertypes.HealthConfig{ + Test: []string{"CMD", "sh", "-c", "sleep 60"}, + Interval: 100 * time.Millisecond, + Timeout: 50 * time.Millisecond, + Retries: 1, + } + }) + poll.WaitOn(t, pollForHealthCheckLog(ctx, apiClient, cID, "Health check exceeded timeout (50ms)")) +} + +func pollForHealthCheckLog(ctx context.Context, client client.APIClient, containerID string, expected string) func(log poll.LogT) poll.Result { + return func(log poll.LogT) poll.Result { + inspect, err := client.ContainerInspect(ctx, containerID) + if err != nil { + return poll.Error(err) + } + healthChecksTotal := len(inspect.State.Health.Log) + if healthChecksTotal > 0 { + output := inspect.State.Health.Log[healthChecksTotal-1].Output + if output == expected { + return poll.Success() + } + return poll.Error(fmt.Errorf("expected %q, got %q", expected, output)) + } + return poll.Continue("waiting for container healthcheck logs") + } +} + func pollForHealthStatus(ctx context.Context, client client.APIClient, containerID string, healthStatus string) func(log poll.LogT) poll.Result { return func(log poll.LogT) poll.Result { inspect, err := client.ContainerInspect(ctx, containerID)