From 3889a6cac891854d5a4e8d3b9a034178fe981279 Mon Sep 17 00:00:00 2001 From: SataQiu Date: Wed, 29 Jun 2022 17:54:56 +0800 Subject: [PATCH] kubeadm: support retry mechanism for removing container in reset phase --- cmd/kubeadm/app/constants/constants.go | 2 ++ cmd/kubeadm/app/util/runtime/runtime.go | 23 +++++++++++++++----- cmd/kubeadm/app/util/runtime/runtime_test.go | 11 ++++------ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/cmd/kubeadm/app/constants/constants.go b/cmd/kubeadm/app/constants/constants.go index 124295d05127..ceed2ac2bb5f 100644 --- a/cmd/kubeadm/app/constants/constants.go +++ b/cmd/kubeadm/app/constants/constants.go @@ -218,6 +218,8 @@ const ( APICallWithReadTimeout = 15 * time.Second // PullImageRetry specifies how many times ContainerRuntime retries when pulling image failed PullImageRetry = 5 + // RemoveContainerRetry specifies how many times ContainerRuntime retries when removing container failed + RemoveContainerRetry = 5 // DefaultControlPlaneTimeout specifies the default control plane (actually API Server) timeout for use by kubeadm DefaultControlPlaneTimeout = 4 * time.Minute diff --git a/cmd/kubeadm/app/util/runtime/runtime.go b/cmd/kubeadm/app/util/runtime/runtime.go index e76f4836cb96..0d0bf976eaac 100644 --- a/cmd/kubeadm/app/util/runtime/runtime.go +++ b/cmd/kubeadm/app/util/runtime/runtime.go @@ -23,6 +23,7 @@ import ( "github.com/pkg/errors" errorsutil "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/klog/v2" utilsexec "k8s.io/utils/exec" "k8s.io/kubernetes/cmd/kubeadm/app/constants" @@ -97,15 +98,25 @@ func (runtime *CRIRuntime) ListKubeContainers() ([]string, error) { func (runtime *CRIRuntime) RemoveContainers(containers []string) error { errs := []error{} for _, container := range containers { - out, err := runtime.crictl("stopp", container).CombinedOutput() - if err != nil { - // don't stop on errors, try to remove as many containers as possible - errs = append(errs, errors.Wrapf(err, "failed to stop running pod %s: output: %s, error", container, string(out))) - } else { + var lastErr error + for i := 0; i < constants.RemoveContainerRetry; i++ { + klog.V(5).Infof("Attempting to remove container %v", container) + out, err := runtime.crictl("stopp", container).CombinedOutput() + if err != nil { + lastErr = errors.Wrapf(err, "failed to stop running pod %s: output: %s", container, string(out)) + continue + } out, err = runtime.crictl("rmp", container).CombinedOutput() if err != nil { - errs = append(errs, errors.Wrapf(err, "failed to remove running container %s: output: %s, error", container, string(out))) + lastErr = errors.Wrapf(err, "failed to remove running container %s: output: %s", container, string(out)) + continue } + lastErr = nil + break + } + + if lastErr != nil { + errs = append(errs, lastErr) } } return errorsutil.NewAggregate(errs) diff --git a/cmd/kubeadm/app/util/runtime/runtime_test.go b/cmd/kubeadm/app/util/runtime/runtime_test.go index 31040481623e..3e394d980d70 100644 --- a/cmd/kubeadm/app/util/runtime/runtime_test.go +++ b/cmd/kubeadm/app/util/runtime/runtime_test.go @@ -167,11 +167,8 @@ func TestRemoveContainers(t *testing.T) { fcmd := fakeexec.FakeCmd{ CombinedOutputScript: []fakeexec.FakeAction{ fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, // Test case 1 - fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeOK, - fakeErr, fakeOK, fakeOK, fakeErr, fakeOK, - fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, fakeOK, - fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeOK, - fakeErr, fakeOK, fakeOK, fakeErr, fakeOK, + fakeOK, fakeOK, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeErr, fakeOK, fakeOK, // Test case 2 + fakeErr, fakeErr, fakeErr, fakeErr, fakeErr, fakeOK, fakeOK, fakeOK, fakeOK, // Test case 3 }, } execer := fakeexec.FakeExec{ @@ -186,8 +183,8 @@ func TestRemoveContainers(t *testing.T) { isError bool }{ {"valid: remove containers using CRI", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, false}, // Test case 1 - {"invalid: CRI rmp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, - {"invalid: CRI stopp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, + {"invalid: CRI rmp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, // Test case 2 + {"invalid: CRI stopp failure", "unix:///var/run/crio/crio.sock", []string{"k8s_p1", "k8s_p2", "k8s_p3"}, true}, // Test case 3 } for _, tc := range cases {