From b4932cd60bd32acfdf2d07eb7d902fa5a07b20ce Mon Sep 17 00:00:00 2001 From: Qiutong Song Date: Mon, 3 Oct 2022 12:12:40 +0000 Subject: [PATCH 1/3] Persist container and sandbox if resource cleanup fails, like teardownPodNetwork Signed-off-by: Qiutong Song --- .../container_update_resources_other.go | 24 ++- .../container_update_resources_windows.go | 24 ++- pkg/cri/server/sandbox_run.go | 164 +++++++++++------- pkg/cri/server/sandbox_run_linux.go | 9 + pkg/cri/server/sandbox_run_other.go | 3 + pkg/cri/server/sandbox_run_windows.go | 4 + pkg/cri/store/sandbox/status.go | 36 ++-- 7 files changed, 184 insertions(+), 80 deletions(-) diff --git a/pkg/cri/server/container_update_resources_other.go b/pkg/cri/server/container_update_resources_other.go index 38dc89dc7001..2eb5b81f7d67 100644 --- a/pkg/cri/server/container_update_resources_other.go +++ b/pkg/cri/server/container_update_resources_other.go @@ -20,8 +20,13 @@ package server import ( + "context" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" + "github.com/containerd/typeurl" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" - "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" containerstore "github.com/containerd/containerd/pkg/cri/store/container" @@ -43,3 +48,20 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up } return &runtime.UpdateContainerResourcesResponse{}, nil } + +// updateContainerSpec updates container spec. +// Copied from container_update_resources_linux.go because it only builds on Linux +// updateContainerSpec updates container spec. +func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { + any, err := typeurl.MarshalAny(spec) + if err != nil { + return errors.Wrapf(err, "failed to marshal spec %+v", spec) + } + if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error { + c.Spec = any + return nil + }); err != nil { + return errors.Wrap(err, "failed to update container spec") + } + return nil +} diff --git a/pkg/cri/server/container_update_resources_windows.go b/pkg/cri/server/container_update_resources_windows.go index a7cb2489b3ab..747af0084649 100644 --- a/pkg/cri/server/container_update_resources_windows.go +++ b/pkg/cri/server/container_update_resources_windows.go @@ -20,8 +20,14 @@ package server import ( + "context" + + "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" "github.com/containerd/containerd/errdefs" - "golang.org/x/net/context" + "github.com/containerd/typeurl" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -30,3 +36,19 @@ import ( func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) { return nil, errdefs.ErrNotImplemented } + +// updateContainerSpec updates container spec. +// Copied from container_update_resources_linux.go because it only builds on Linux +func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { + any, err := typeurl.MarshalAny(spec) + if err != nil { + return errors.Wrapf(err, "failed to marshal spec %+v", spec) + } + if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error { + c.Spec = any + return nil + }); err != nil { + return errors.Wrap(err, "failed to update container spec") + } + return nil +} diff --git a/pkg/cri/server/sandbox_run.go b/pkg/cri/server/sandbox_run.go index 642cf4082483..56777db9d4a3 100644 --- a/pkg/cri/server/sandbox_run.go +++ b/pkg/cri/server/sandbox_run.go @@ -67,15 +67,23 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, errors.New("sandbox config must include metadata") } name := makeSandboxName(metadata) - log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name) + log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name) + + // cleanupErr records the last error returned by the critical cleanup operations in deferred functions, + // like CNI teardown and stopping the running sandbox task. + // If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox + // in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow. + var cleanupErr error + // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the // same sandbox. if err := c.sandboxNameIndex.Reserve(name, id); err != nil { return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name) } defer func() { - // Release the name if the function returns with an error. - if retErr != nil { + // Release the name if the function returns with an error and all the resource cleanup is done. + // When cleanupErr != nil, the name will be cleaned in sandbox_remove. + if retErr != nil && cleanupErr == nil { c.sandboxNameIndex.ReleaseByName(name) } }() @@ -109,61 +117,12 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox } log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id) - podNetwork := true - // Pod network is always needed on windows. - if goruntime.GOOS != "windows" && - config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { - // Pod network is not needed on linux with host network. - podNetwork = false - } - if podNetwork { - // If it is not in host network namespace then create a namespace and set the sandbox - // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network - // namespaces. If the pod is in host network namespace then both are empty and should not - // be used. - var netnsMountDir = "/var/run/netns" - if c.config.NetNSMountsUnderStateDir { - netnsMountDir = filepath.Join(c.config.StateDir, "netns") - } - sandbox.NetNS, err = netns.NewNetNS(netnsMountDir) - if err != nil { - return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id) - } - sandbox.NetNSPath = sandbox.NetNS.GetPath() - defer func() { - if retErr != nil { - deferCtx, deferCancel := ctrdutil.DeferContext() - defer deferCancel() - // Teardown network if an error is returned. - if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id) - } - - if err := sandbox.NetNS.Remove(); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) - } - sandbox.NetNSPath = "" - } - }() - - // Setup network for sandbox. - // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524) - // rely on the assumption that CRI shim will not be querying the network namespace to check the - // network states such as IP. - // In future runtime implementation should avoid relying on CRI shim implementation details. - // In this case however caching the IP will add a subtle performance enhancement by avoiding - // calls to network namespace of the pod to query the IP of the veth interface on every - // SandboxStatus request. - if err := c.setupPodNetwork(ctx, &sandbox); err != nil { - return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id) - } - } - // Create sandbox container. // NOTE: sandboxContainerSpec SHOULD NOT have side // effect, e.g. accessing/creating files, so that we can test // it safely. - spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations) + // NOTE: the network namespace path will be created later and update through updateNetNamespacePath function + spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations) if err != nil { return nil, errors.Wrap(err, "failed to generate sandbox container spec") } @@ -212,12 +171,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox if err != nil { return nil, errors.Wrap(err, "failed to create containerd container") } + + // Add container into sandbox store in INIT state. + sandbox.Container = container + defer func() { - if retErr != nil { + // Put the sandbox into sandbox store when the some resource fails to be cleaned. + if retErr != nil && cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id) + if err := c.sandboxStore.Add(sandbox); err != nil { + log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox) + } + } + }() + + defer func() { + // Delete container only if all the resource cleanup is done. + if retErr != nil && cleanupErr == nil { deferCtx, deferCancel := ctrdutil.DeferContext() defer deferCancel() - if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil { - log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id) + if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id) } } }() @@ -271,6 +245,78 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, errors.Wrap(err, "failed to get sandbox container info") } + podNetwork := true + + if goruntime.GOOS != "windows" && + config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE { + // Pod network is not needed on linux with host network. + podNetwork = false + } + + if podNetwork { + // If it is not in host network namespace then create a namespace and set the sandbox + // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network + // namespaces. If the pod is in host network namespace then both are empty and should not + // be used. + var netnsMountDir = "/var/run/netns" + if c.config.NetNSMountsUnderStateDir { + netnsMountDir = filepath.Join(c.config.StateDir, "netns") + } + sandbox.NetNS, err = netns.NewNetNS(netnsMountDir) + if err != nil { + return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id) + } + sandbox.NetNSPath = sandbox.NetNS.GetPath() + + defer func() { + // Remove the network namespace only if all the resource cleanup is done. + if retErr != nil && cleanupErr == nil { + if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id) + return + } + sandbox.NetNSPath = "" + } + }() + + // Update network namespace in the container's spec + c.updateNetNamespacePath(spec, sandbox.NetNSPath) + + if err := container.Update(ctx, + // Update spec of the container + containerd.UpdateContainerOpts(containerd.WithSpec(spec)), + // Update sandbox metadata to include NetNS info + containerd.UpdateContainerOpts(containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata))); err != nil { + return nil, errors.Wrapf(err, "failed to update the network namespace for the sandbox container %q", id) + } + + // Define this defer to teardownPodNetwork prior to the setupPodNetwork function call. + // This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions. + defer func() { + // Teardown the network only if all the resource cleanup is done. + if retErr != nil && cleanupErr == nil { + deferCtx, deferCancel := ctrdutil.DeferContext() + defer deferCancel() + // Teardown network if an error is returned. + if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id) + } + } + }() + + // Setup network for sandbox. + // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524) + // rely on the assumption that CRI shim will not be querying the network namespace to check the + // network states such as IP. + // In future runtime implementation should avoid relying on CRI shim implementation details. + // In this case however caching the IP will add a subtle performance enhancement by avoiding + // calls to network namespace of the pod to query the IP of the veth interface on every + // SandboxStatus request. + if err := c.setupPodNetwork(ctx, &sandbox); err != nil { + return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id) + } + } + // Create sandbox task in containerd. log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).", id, name) @@ -288,6 +334,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox // Cleanup the sandbox container if an error is returned. if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) { log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id) + cleanupErr = err } } }() @@ -326,9 +373,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox return nil, errors.Wrap(err, "failed to update sandbox status") } - // Add sandbox into sandbox store in INIT state. - sandbox.Container = container - if err := c.sandboxStore.Add(sandbox); err != nil { return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox) } diff --git a/pkg/cri/server/sandbox_run_linux.go b/pkg/cri/server/sandbox_run_linux.go index 85cfeb37574a..634dc6e27e81 100644 --- a/pkg/cri/server/sandbox_run_linux.go +++ b/pkg/cri/server/sandbox_run_linux.go @@ -320,3 +320,12 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { return taskOpts } + +func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { + for i := range spec.Linux.Namespaces { + if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace { + spec.Linux.Namespaces[i].Path = nsPath + break + } + } +} diff --git a/pkg/cri/server/sandbox_run_other.go b/pkg/cri/server/sandbox_run_other.go index e4e876c012d6..59ed5ef22e82 100644 --- a/pkg/cri/server/sandbox_run_other.go +++ b/pkg/cri/server/sandbox_run_other.go @@ -54,3 +54,6 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { return []containerd.NewTaskOpts{} } + +func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { +} diff --git a/pkg/cri/server/sandbox_run_windows.go b/pkg/cri/server/sandbox_run_windows.go index c165344b392f..06709ff5c697 100644 --- a/pkg/cri/server/sandbox_run_windows.go +++ b/pkg/cri/server/sandbox_run_windows.go @@ -92,3 +92,7 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts { return nil } + +func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) { + spec.Windows.Network.NetworkNamespace = nsPath +} diff --git a/pkg/cri/store/sandbox/status.go b/pkg/cri/store/sandbox/status.go index e9198eb976a7..787d31968ba0 100644 --- a/pkg/cri/store/sandbox/status.go +++ b/pkg/cri/store/sandbox/status.go @@ -29,24 +29,24 @@ import ( // | | // | Create(Run) | Load // | | -// Start | | -// (failed) | | -// +------------------+ +-----------+ -// | | | | -// | | | | -// | | | | -// | | Start(Run) | | -// | | | | -// | PortForward +----v----+ | | -// | +------+ | | | -// | | | READY <---------+ | -// | +------> | | | -// | +----+----+ | | -// | | | | -// | | Stop/Exit | | -// | | | | -// | +----v----+ | | -// | | <---------+ +----v----+ +// | | +// | | Start +// | |(failed and not cleaned) +// Start |--------------|--------------+ +//(failed but cleaned)| | | +// +------------------+ |-----------+ | +// | | Start(Run) | | | +// | | | | | +// | PortForward +----v----+ | | | +// | +------+ | | | | +// | | | READY <---------+ | | +// | +------> | | | | +// | +----+----+ | | | +// | | | | | +// | | Stop/Exit | | | +// | | | | | +// | +----v----+ | | | +// | | <---------+ +----v--v-+ // | | NOTREADY| | | // | | <----------------+ UNKNOWN | // | +----+----+ Stop | | From b40d1ef9f6b49978260bc97c81de52bd05ddf42c Mon Sep 17 00:00:00 2001 From: Qiutong Song Date: Tue, 27 Sep 2022 14:39:07 +0000 Subject: [PATCH 2/3] Add integration tests with failpoint Signed-off-by: Qiutong Song (cherry picked from commit 4196fad0278d6001666ba210168e873555c31735) Signed-off-by: Qiutong Song (cherry picked from commit b9a35c6af9519630179b745e48bd29fd4d067c83) Signed-off-by: Qiutong Song --- integration/main_test.go | 10 ++ .../sandbox_run_rollback_linux_test.go | 139 +++++++++++++++++- 2 files changed, 144 insertions(+), 5 deletions(-) diff --git a/integration/main_test.go b/integration/main_test.go index 0c8a3dcbfe71..7f6a838cb0ad 100644 --- a/integration/main_test.go +++ b/integration/main_test.go @@ -164,6 +164,15 @@ func WithPodHostname(hostname string) PodSandboxOpts { } } +// Add pod labels. +func WithPodLabels(kvs map[string]string) PodSandboxOpts { + return func(p *runtime.PodSandboxConfig) { + for k, v := range kvs { + p.Labels[k] = v + } + } +} + // PodSandboxConfig generates a pod sandbox config for test. func PodSandboxConfig(name, ns string, opts ...PodSandboxOpts) *runtime.PodSandboxConfig { config := &runtime.PodSandboxConfig{ @@ -176,6 +185,7 @@ func PodSandboxConfig(name, ns string, opts ...PodSandboxOpts) *runtime.PodSandb }, Linux: &runtime.LinuxPodSandboxConfig{}, Annotations: make(map[string]string), + Labels: make(map[string]string), } for _, opt := range opts { opt(config) diff --git a/integration/sandbox_run_rollback_linux_test.go b/integration/sandbox_run_rollback_linux_test.go index e46a475b1392..c42d4895ef01 100644 --- a/integration/sandbox_run_rollback_linux_test.go +++ b/integration/sandbox_run_rollback_linux_test.go @@ -23,6 +23,7 @@ import ( "io/ioutil" "os" "path/filepath" + "runtime" "strings" "testing" "time" @@ -32,7 +33,7 @@ import ( "github.com/containerd/go-cni" "github.com/pkg/errors" "github.com/stretchr/testify/require" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + criapiv1alpha2 "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) const ( @@ -84,6 +85,134 @@ func TestRunPodSandboxWithShimStartFailure(t *testing.T) { require.Equal(t, true, strings.Contains(err.Error(), "no hard feelings")) } +// TestRunPodSandboxWithShimDeleteFailure should keep the sandbox record if +// failed to rollback shim by shim.Delete API. +func TestRunPodSandboxWithShimDeleteFailure(t *testing.T) { + testCase := func(restart bool) func(*testing.T) { + return func(t *testing.T) { + t.Log("Init PodSandboxConfig with specific label") + labels := map[string]string{ + t.Name(): "true", + } + sbConfig := PodSandboxConfig(t.Name(), "failpoint", WithPodLabels(labels)) + + t.Log("Inject Shim failpoint") + injectShimFailpoint(t, sbConfig, map[string]string{ + "Start": "1*error(failed to start shim)", + "Delete": "1*error(please retry)", // inject failpoint during rollback shim + }) + + t.Log("Create a sandbox") + _, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to start shim") + + t.Log("ListPodSandbox with the specific label") + l, err := runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{LabelSelector: labels}) + require.NoError(t, err) + require.Len(t, l, 1) + + sb := l[0] + require.Equal(t, sb.State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + require.Equal(t, sb.Metadata.Name, sbConfig.Metadata.Name) + require.Equal(t, sb.Metadata.Namespace, sbConfig.Metadata.Namespace) + require.Equal(t, sb.Metadata.Uid, sbConfig.Metadata.Uid) + require.Equal(t, sb.Metadata.Attempt, sbConfig.Metadata.Attempt) + + t.Log("Check PodSandboxStatus") + sbStatus, err := runtimeService.PodSandboxStatus(sb.Id) + require.NoError(t, err) + require.Equal(t, sbStatus.State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + require.Greater(t, len(sbStatus.Network.Ip), 0) + + if restart { + t.Log("Restart containerd") + RestartContainerd(t) + + t.Log("ListPodSandbox with the specific label") + l, err = runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{Id: sb.Id}) + require.NoError(t, err) + require.Len(t, l, 1) + require.Equal(t, l[0].State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + + t.Log("Check PodSandboxStatus") + sbStatus, err := runtimeService.PodSandboxStatus(sb.Id) + require.NoError(t, err) + t.Log(sbStatus.Network) + require.Equal(t, sbStatus.State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + } + + t.Log("Cleanup leaky sandbox") + err = runtimeService.RemovePodSandbox(sb.Id) + require.NoError(t, err) + } + } + + t.Run("CleanupAfterRestart", testCase(true)) + t.Run("JustCleanup", testCase(false)) +} + +// TestRunPodSandboxWithShimStartAndTeardownCNIFailure should keep the sandbox +// record if failed to rollback CNI API. +func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) { + testCase := func(restart bool) func(*testing.T) { + return func(t *testing.T) { + defer prepareFailpointCNI(t)() + + t.Log("Init PodSandboxConfig with specific key") + labels := map[string]string{ + t.Name(): "true", + } + sbConfig := PodSandboxConfig(t.Name(), "failpoint", WithPodLabels(labels)) + + t.Log("Inject Shim failpoint") + injectShimFailpoint(t, sbConfig, map[string]string{ + "Start": "1*error(failed to start shim)", + }) + + t.Log("Inject CNI failpoint") + conf := &failpointConf{ + Del: "1*error(please retry)", + } + injectCNIFailpoint(t, sbConfig, conf) + + t.Log("Create a sandbox") + _, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler) + require.Error(t, err) + require.Contains(t, err.Error(), "failed to start shim") + + t.Log("ListPodSandbox with the specific label") + l, err := runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{LabelSelector: labels}) + require.NoError(t, err) + require.Len(t, l, 1) + + sb := l[0] + require.Equal(t, sb.State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + require.Equal(t, sb.Metadata.Name, sbConfig.Metadata.Name) + require.Equal(t, sb.Metadata.Namespace, sbConfig.Metadata.Namespace) + require.Equal(t, sb.Metadata.Uid, sbConfig.Metadata.Uid) + require.Equal(t, sb.Metadata.Attempt, sbConfig.Metadata.Attempt) + + if restart { + t.Log("Restart containerd") + RestartContainerd(t) + + t.Log("ListPodSandbox with the specific label") + l, err = runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{Id: sb.Id}) + require.NoError(t, err) + require.Len(t, l, 1) + require.Equal(t, l[0].State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + } + + t.Log("Cleanup leaky sandbox") + err = runtimeService.RemovePodSandbox(sb.Id) + require.NoError(t, err) + } + } + t.Run("CleanupAfterRestart", testCase(true)) + t.Run("JustCleanup", testCase(false)) +} + // failpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint. type failpointConf struct { Add string `json:"cmdAdd"` @@ -91,12 +220,12 @@ type failpointConf struct { Check string `json:"cmdCheck"` } -func injectCNIFailpoint(t *testing.T, sbConfig *runtime.PodSandboxConfig, conf *failpointConf) { +func injectCNIFailpoint(t *testing.T, sbConfig *criapiv1alpha2.PodSandboxConfig, conf *failpointConf) { stateDir := t.TempDir() metadata := sbConfig.Metadata fpFilename := filepath.Join(stateDir, - fmt.Sprintf("%s-%s.json", metadata.Namespace, metadata.Name)) + fmt.Sprintf("%s-%s.json", metadata.Namespace, strings.Replace(metadata.Name, "/", "-", -1))) data, err := json.Marshal(conf) require.NoError(t, err) @@ -107,7 +236,7 @@ func injectCNIFailpoint(t *testing.T, sbConfig *runtime.PodSandboxConfig, conf * sbConfig.Annotations[failpointCNIConfPathKey] = fpFilename } -func injectShimFailpoint(t *testing.T, sbConfig *runtime.PodSandboxConfig, methodFps map[string]string) { +func injectShimFailpoint(t *testing.T, sbConfig *criapiv1alpha2.PodSandboxConfig, methodFps map[string]string) { for method, fp := range methodFps { _, err := failpoint.NewFailpoint(method, fp) require.NoError(t, err, "check failpoint %s for shim method %s", fp, method) @@ -210,7 +339,7 @@ func getCNIConfig() (*cni.ConfigResult, error) { return nil, errors.Wrap(err, "failed to get raw runtime client") } - resp, err := client.Status(context.Background(), &runtime.StatusRequest{Verbose: true}) + resp, err := client.Status(context.Background(), &criapiv1alpha2.StatusRequest{Verbose: true}) if err != nil { return nil, errors.Wrap(err, "failed to get status") } From 0db865d8c94e16f69f2b0a631b31184707ccc69e Mon Sep 17 00:00:00 2001 From: Qiutong Song Date: Wed, 5 Oct 2022 14:48:23 +0000 Subject: [PATCH 3/3] Update container with sandbox metadata after NetNS is created Signed-off-by: Qiutong Song (cherry picked from commit b41d6f40bba0042a1db75f57e6f6b315bfb7f101) Signed-off-by: Qiutong Song (cherry picked from commit f2376e659ffa55e4ff2578baf4e4c7aab54042e4) Signed-off-by: Qiutong Song --- integration/main_test.go | 93 +++++++++++- integration/restart_test.go | 3 +- .../sandbox_run_rollback_linux_test.go | 137 +++++++++++++++++- .../container_update_resources_other.go | 24 +-- .../container_update_resources_windows.go | 21 --- script/test/utils.sh | 3 + 6 files changed, 229 insertions(+), 52 deletions(-) diff --git a/integration/main_test.go b/integration/main_test.go index 7f6a838cb0ad..37dc71e30f41 100644 --- a/integration/main_test.go +++ b/integration/main_test.go @@ -20,18 +20,23 @@ package integration import ( + "bytes" "context" "encoding/json" "flag" "fmt" + "io" "os" "os/exec" + "path/filepath" "strconv" "strings" + "syscall" "testing" "time" "github.com/containerd/containerd" + "github.com/containerd/containerd/containers" "github.com/pkg/errors" "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" @@ -328,8 +333,10 @@ func Randomize(str string) string { } // KillProcess kills the process by name. pkill is used. -func KillProcess(name string) error { - output, err := exec.Command("pkill", "-x", fmt.Sprintf("^%s$", name)).CombinedOutput() +func KillProcess(name string, signal syscall.Signal) error { + command := []string{"pkill", "-" + strconv.Itoa(int(signal)), "-x", fmt.Sprintf("^%s$", name)} + + output, err := exec.Command(command[0], command[1:]...).CombinedOutput() if err != nil { return errors.Errorf("failed to kill %q - error: %v, output: %q", name, err, output) } @@ -358,6 +365,80 @@ func PidOf(name string) (int, error) { return strconv.Atoi(output) } +// PidsOf returns pid(s) of a process by name +func PidsOf(name string) ([]int, error) { + if len(name) == 0 { + return []int{}, fmt.Errorf("name is required") + } + + procDirFD, err := os.Open("/proc") + if err != nil { + return nil, fmt.Errorf("failed to open /proc: %w", err) + } + defer procDirFD.Close() + + res := []int{} + for { + fileInfos, err := procDirFD.Readdir(100) + if err != nil { + if err == io.EOF { + break + } + return nil, fmt.Errorf("failed to readdir: %w", err) + } + + for _, fileInfo := range fileInfos { + if !fileInfo.IsDir() { + continue + } + + pid, err := strconv.Atoi(fileInfo.Name()) + if err != nil { + continue + } + + exePath, err := os.Readlink(filepath.Join("/proc", fileInfo.Name(), "exe")) + if err != nil { + continue + } + + if strings.HasSuffix(exePath, name) { + res = append(res, pid) + } + } + } + return res, nil +} + +// PidEnvs returns the environ of pid in key-value pairs. +func PidEnvs(pid int) (map[string]string, error) { + envPath := filepath.Join("/proc", strconv.Itoa(pid), "environ") + + b, err := os.ReadFile(envPath) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", envPath, err) + } + + values := bytes.Split(b, []byte{0}) + if len(values) == 0 { + return nil, nil + } + + res := make(map[string]string) + for _, value := range values { + value := strings.TrimSpace(string(value)) + if len(value) == 0 { + continue + } + + parts := strings.SplitN(value, "=", 2) + if len(parts) == 2 { + res[parts[0]] = parts[1] + } + } + return res, nil +} + // RawRuntimeClient returns a raw grpc runtime service client. func RawRuntimeClient() (runtime.RuntimeServiceClient, error) { addr, dialer, err := dialer.GetAddressAndDialer(*criEndpoint) @@ -411,8 +492,8 @@ func SandboxInfo(id string) (*runtime.PodSandboxStatus, *server.SandboxInfo, err return status, &info, nil } -func RestartContainerd(t *testing.T) { - require.NoError(t, KillProcess(*containerdBin)) +func RestartContainerd(t *testing.T, signal syscall.Signal) { + require.NoError(t, KillProcess(*containerdBin, signal)) // Use assert so that the 3rd wait always runs, this makes sure // containerd is running before this function returns. @@ -428,3 +509,7 @@ func RestartContainerd(t *testing.T) { return ConnectDaemons() == nil, nil }, time.Second, 30*time.Second), "wait for containerd to be restarted") } + +func GetContainer(id string) (containers.Container, error) { + return containerdClient.ContainerService().Get(context.Background(), id) +} diff --git a/integration/restart_test.go b/integration/restart_test.go index 644207a218d5..2cd980e8b9dc 100644 --- a/integration/restart_test.go +++ b/integration/restart_test.go @@ -21,6 +21,7 @@ package integration import ( "sort" + "syscall" "testing" "github.com/containerd/containerd" @@ -146,7 +147,7 @@ func TestContainerdRestart(t *testing.T) { assert.NoError(t, err) t.Logf("Restart containerd") - RestartContainerd(t) + RestartContainerd(t, syscall.SIGTERM) t.Logf("Check sandbox and container state after restart") loadedSandboxes, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{}) diff --git a/integration/sandbox_run_rollback_linux_test.go b/integration/sandbox_run_rollback_linux_test.go index c42d4895ef01..728bc801c100 100644 --- a/integration/sandbox_run_rollback_linux_test.go +++ b/integration/sandbox_run_rollback_linux_test.go @@ -23,21 +23,27 @@ import ( "io/ioutil" "os" "path/filepath" - "runtime" "strings" + "sync" + "syscall" "testing" "time" + "github.com/containerd/containerd/pkg/cri/store/sandbox" "github.com/containerd/containerd/pkg/failpoint" "github.com/containerd/continuity" "github.com/containerd/go-cni" + "github.com/containerd/typeurl" + runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" criapiv1alpha2 "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) const ( failpointRuntimeHandler = "runc-fp" + failpointCNIBinary = "cni-bridge-fp" failpointShimPrefixKey = "io.containerd.runtime.v2.shim.failpoint." @@ -127,7 +133,7 @@ func TestRunPodSandboxWithShimDeleteFailure(t *testing.T) { if restart { t.Log("Restart containerd") - RestartContainerd(t) + RestartContainerd(t, syscall.SIGTERM) t.Log("ListPodSandbox with the specific label") l, err = runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{Id: sb.Id}) @@ -143,6 +149,8 @@ func TestRunPodSandboxWithShimDeleteFailure(t *testing.T) { } t.Log("Cleanup leaky sandbox") + err = runtimeService.StopPodSandbox(sb.Id) + require.NoError(t, err) err = runtimeService.RemovePodSandbox(sb.Id) require.NoError(t, err) } @@ -195,7 +203,7 @@ func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) { if restart { t.Log("Restart containerd") - RestartContainerd(t) + RestartContainerd(t, syscall.SIGTERM) t.Log("ListPodSandbox with the specific label") l, err = runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{Id: sb.Id}) @@ -205,6 +213,8 @@ func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) { } t.Log("Cleanup leaky sandbox") + err = runtimeService.StopPodSandbox(sb.Id) + require.NoError(t, err) err = runtimeService.RemovePodSandbox(sb.Id) require.NoError(t, err) } @@ -213,6 +223,127 @@ func TestRunPodSandboxWithShimStartAndTeardownCNIFailure(t *testing.T) { t.Run("JustCleanup", testCase(false)) } +// TestRunPodSandboxWithShimStartAndTeardownCNISlow should keep the sandbox +// record if failed to rollback CNI API. +func TestRunPodSandboxAndTeardownCNISlow(t *testing.T) { + defer prepareFailpointCNI(t)() + + t.Log("Init PodSandboxConfig with specific key") + sbName := t.Name() + labels := map[string]string{ + sbName: "true", + } + sbConfig := PodSandboxConfig(sbName, "failpoint", WithPodLabels(labels)) + + t.Log("Inject CNI failpoint") + conf := &failpointConf{ + // Delay 1 day + Add: "1*delay(86400000)", + } + injectCNIFailpoint(t, sbConfig, conf) + + var wg sync.WaitGroup + wg.Add(1) + + go func() { + defer wg.Done() + t.Log("Create a sandbox") + _, err := runtimeService.RunPodSandbox(sbConfig, failpointRuntimeHandler) + require.Error(t, err) + require.Contains(t, err.Error(), "transport is closing") + }() + + assert.NoError(t, ensureCNIAddRunning(t, sbName), "check that failpoint CNI.Add is running") + + // Use SIGKILL to prevent containerd server gracefulshutdown which may cause indeterministic invocation of defer functions + t.Log("Restart containerd") + RestartContainerd(t, syscall.SIGKILL) + + wg.Wait() + + t.Log("ListPodSandbox with the specific label") + l, err := runtimeService.ListPodSandbox(&criapiv1alpha2.PodSandboxFilter{LabelSelector: labels}) + require.NoError(t, err) + require.Len(t, l, 1) + + sb := l[0] + + defer func() { + t.Log("Cleanup leaky sandbox") + err := runtimeService.StopPodSandbox(sb.Id) + assert.NoError(t, err) + err = runtimeService.RemovePodSandbox(sb.Id) + require.NoError(t, err) + }() + + assert.Equal(t, sb.State, criapiv1alpha2.PodSandboxState_SANDBOX_NOTREADY) + assert.Equal(t, sb.Metadata.Name, sbConfig.Metadata.Name) + assert.Equal(t, sb.Metadata.Namespace, sbConfig.Metadata.Namespace) + assert.Equal(t, sb.Metadata.Uid, sbConfig.Metadata.Uid) + assert.Equal(t, sb.Metadata.Attempt, sbConfig.Metadata.Attempt) + + t.Log("Get sandbox info") + _, info, err := SandboxInfo(sb.Id) + require.NoError(t, err) + require.False(t, info.NetNSClosed) + + var netNS string + for _, n := range info.RuntimeSpec.Linux.Namespaces { + if n.Type == runtimespec.NetworkNamespace { + netNS = n.Path + } + } + assert.NotEmpty(t, netNS, "network namespace should be set") + + t.Log("Get sandbox container") + c, err := GetContainer(sb.Id) + require.NoError(t, err) + any, ok := c.Extensions["io.cri-containerd.sandbox.metadata"] + require.True(t, ok, "sandbox metadata should exist in extension") + i, err := typeurl.UnmarshalAny(&any) + require.NoError(t, err) + require.IsType(t, &sandbox.Metadata{}, i) + metadata, ok := i.(*sandbox.Metadata) + require.True(t, ok) + assert.NotEmpty(t, metadata.NetNSPath) + assert.Equal(t, netNS, metadata.NetNSPath, "network namespace path should be the same in runtime spec and sandbox metadata") +} + +func ensureCNIAddRunning(t *testing.T, sbName string) error { + return Eventually(func() (bool, error) { + pids, err := PidsOf(failpointCNIBinary) + if err != nil || len(pids) == 0 { + return false, err + } + + for _, pid := range pids { + envs, err := PidEnvs(pid) + if err != nil { + t.Logf("failed to read environ of pid %v: %v: skip it", pid, err) + continue + } + + args, ok := envs["CNI_ARGS"] + if !ok { + t.Logf("expected CNI_ARGS env but got nothing, skip pid=%v", pid) + continue + } + + for _, arg := range strings.Split(args, ";") { + kv := strings.SplitN(arg, "=", 2) + if len(kv) != 2 { + continue + } + + if kv[0] == "K8S_POD_NAME" && kv[1] == sbName { + return true, nil + } + } + } + return false, nil + }, time.Second, 30*time.Second) +} + // failpointConf is used to describe cmdAdd/cmdDel/cmdCheck command's failpoint. type failpointConf struct { Add string `json:"cmdAdd"` diff --git a/pkg/cri/server/container_update_resources_other.go b/pkg/cri/server/container_update_resources_other.go index 2eb5b81f7d67..38dc89dc7001 100644 --- a/pkg/cri/server/container_update_resources_other.go +++ b/pkg/cri/server/container_update_resources_other.go @@ -20,13 +20,8 @@ package server import ( - "context" - - "github.com/containerd/containerd" - "github.com/containerd/containerd/containers" - "github.com/containerd/typeurl" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" + "golang.org/x/net/context" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" containerstore "github.com/containerd/containerd/pkg/cri/store/container" @@ -48,20 +43,3 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up } return &runtime.UpdateContainerResourcesResponse{}, nil } - -// updateContainerSpec updates container spec. -// Copied from container_update_resources_linux.go because it only builds on Linux -// updateContainerSpec updates container spec. -func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { - any, err := typeurl.MarshalAny(spec) - if err != nil { - return errors.Wrapf(err, "failed to marshal spec %+v", spec) - } - if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error { - c.Spec = any - return nil - }); err != nil { - return errors.Wrap(err, "failed to update container spec") - } - return nil -} diff --git a/pkg/cri/server/container_update_resources_windows.go b/pkg/cri/server/container_update_resources_windows.go index 747af0084649..2eda746b8829 100644 --- a/pkg/cri/server/container_update_resources_windows.go +++ b/pkg/cri/server/container_update_resources_windows.go @@ -22,12 +22,7 @@ package server import ( "context" - "github.com/containerd/containerd" - "github.com/containerd/containerd/containers" "github.com/containerd/containerd/errdefs" - "github.com/containerd/typeurl" - runtimespec "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" ) @@ -36,19 +31,3 @@ import ( func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) { return nil, errdefs.ErrNotImplemented } - -// updateContainerSpec updates container spec. -// Copied from container_update_resources_linux.go because it only builds on Linux -func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error { - any, err := typeurl.MarshalAny(spec) - if err != nil { - return errors.Wrapf(err, "failed to marshal spec %+v", spec) - } - if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error { - c.Spec = any - return nil - }); err != nil { - return errors.Wrap(err, "failed to update container spec") - } - return nil -} diff --git a/script/test/utils.sh b/script/test/utils.sh index 844072095b0e..9befe826a4ee 100755 --- a/script/test/utils.sh +++ b/script/test/utils.sh @@ -113,6 +113,9 @@ test_teardown() { # keepalive runs a command and keeps it alive. # keepalive process is eventually killed in test_teardown. keepalive() { + # The command may return non-zero and we want to continue this script. + # e.g. containerd receives SIGKILL + set +e local command=$1 echo ${command} local wait_period=$2