Skip to content

Commit

Permalink
Persist container and sandbox if resource cleanup fails, like teardow…
Browse files Browse the repository at this point in the history
…nPodNetwork

Signed-off-by: Qiutong Song <songqt01@gmail.com>
  • Loading branch information
qiutongs committed Oct 3, 2022
1 parent b5f1e2e commit 2316027
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 80 deletions.
24 changes: 23 additions & 1 deletion pkg/cri/server/container_update_resources_other.go
Expand Up @@ -20,8 +20,13 @@
package server

import (
"context"

"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/typeurl"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"

containerstore "github.com/containerd/containerd/pkg/cri/store/container"
Expand All @@ -43,3 +48,20 @@ func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.Up
}
return &runtime.UpdateContainerResourcesResponse{}, nil
}

// updateContainerSpec updates container spec.
// Copied from container_update_resources_linux.go because it only builds on Linux
// updateContainerSpec updates container spec.
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
any, err := typeurl.MarshalAny(spec)
if err != nil {
return errors.Wrapf(err, "failed to marshal spec %+v", spec)
}
if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = any
return nil
}); err != nil {
return errors.Wrap(err, "failed to update container spec")
}
return nil
}
24 changes: 23 additions & 1 deletion pkg/cri/server/container_update_resources_windows.go
Expand Up @@ -20,8 +20,14 @@
package server

import (
"context"

"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/errdefs"
"golang.org/x/net/context"
"github.com/containerd/typeurl"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)

Expand All @@ -30,3 +36,19 @@ import (
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) {
return nil, errdefs.ErrNotImplemented
}

// updateContainerSpec updates container spec.
// Copied from container_update_resources_linux.go because it only builds on Linux
func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
any, err := typeurl.MarshalAny(spec)
if err != nil {
return errors.Wrapf(err, "failed to marshal spec %+v", spec)
}
if err := cntr.Update(ctx, func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
c.Spec = any
return nil
}); err != nil {
return errors.Wrap(err, "failed to update container spec")
}
return nil
}
160 changes: 100 additions & 60 deletions pkg/cri/server/sandbox_run.go
Expand Up @@ -18,6 +18,7 @@ package server

import (
"encoding/json"
"fmt"
"math"
"path/filepath"
goruntime "runtime"
Expand Down Expand Up @@ -67,15 +68,23 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, errors.New("sandbox config must include metadata")
}
name := makeSandboxName(metadata)
log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name)
log.G(ctx).WithField("podsandboxid", id).Debugf("generated id for sandbox name %q", name)

// cleanupErr records the last error returned by the critical cleanup operations in deferred functions,
// like CNI teardown and stopping the running sandbox task.
// If cleanup is not completed for some reason, the CRI-plugin will leave the sandbox
// in a not-ready state, which can later be cleaned up by the next execution of the kubelet's syncPod workflow.
var cleanupErr error

// Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
// same sandbox.
if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name)
}
defer func() {
// Release the name if the function returns with an error.
if retErr != nil {
// Release the name if the function returns with an error and all the resource cleanup is done.
// When cleanupErr != nil, the name will be cleaned in sandbox_remove.
if retErr != nil && cleanupErr == nil {
c.sandboxNameIndex.ReleaseByName(name)
}
}()
Expand Down Expand Up @@ -109,61 +118,12 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id)

podNetwork := true
// Pod network is always needed on windows.
if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Pod network is not needed on linux with host network.
podNetwork = false
}
if podNetwork {
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()
defer func() {
if retErr != nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if err := c.teardownPodNetwork(deferCtx, sandbox); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
}

if err := sandbox.NetNS.Remove(); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
}
sandbox.NetNSPath = ""
}
}()

// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
}
}

// Create sandbox container.
// NOTE: sandboxContainerSpec SHOULD NOT have side
// effect, e.g. accessing/creating files, so that we can test
// it safely.
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
// NOTE: the network namespace path will be created later and update through updateNetNamespacePath function
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, "", ociRuntime.PodAnnotations)
if err != nil {
return nil, errors.Wrap(err, "failed to generate sandbox container spec")
}
Expand Down Expand Up @@ -212,12 +172,27 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd container")
}

// Add container into sandbox store in INIT state.
sandbox.Container = container

defer func() {
if retErr != nil {
// Put the sandbox into sandbox store when the some resource fails to be cleaned.
if retErr != nil && cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("encountered an error cleaning up failed sandbox %q, marking sandbox state as SANDBOX_UNKNOWN", id)
if err := c.sandboxStore.Add(sandbox); err != nil {
log.G(ctx).WithError(err).Errorf("failed to add sandbox %+v into store", sandbox)
}
}
}()

defer func() {
// Delete container only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
if cleanupErr = container.Delete(deferCtx, containerd.WithSnapshotCleanup); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to delete containerd container %q", id)
}
}
}()
Expand Down Expand Up @@ -271,6 +246,73 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, errors.Wrap(err, "failed to get sandbox container info")
}

podNetwork := true

if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Pod network is not needed on linux with host network.
podNetwork = false
}

if podNetwork {
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
// be used.
var netnsMountDir = "/var/run/netns"
if c.config.NetNSMountsUnderStateDir {
netnsMountDir = filepath.Join(c.config.StateDir, "netns")
}
sandbox.NetNS, err = netns.NewNetNS(netnsMountDir)
if err != nil {
return nil, fmt.Errorf("failed to create network namespace for sandbox %q: %w", id, err)
}
sandbox.NetNSPath = sandbox.NetNS.GetPath()

defer func() {
// Remove the network namespace only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
if cleanupErr = sandbox.NetNS.Remove(); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
return
}
sandbox.NetNSPath = ""
}
}()

// Update network namespace in the container's spec
c.updateNetNamespacePath(spec, sandbox.NetNSPath)
if err := updateContainerSpec(ctx, container, spec); err != nil {
return nil, fmt.Errorf("failed to update the network namespace for the sandbox container %q: %w", id, err)
}

// Define this defer to teardownPodNetwork prior to the setupPodNetwork function call.
// This is because in setupPodNetwork the resource is allocated even if it returns error, unlike other resource creation functions.
defer func() {
// Teardown the network only if all the resource cleanup is done.
if retErr != nil && cleanupErr == nil {
deferCtx, deferCancel := ctrdutil.DeferContext()
defer deferCancel()
// Teardown network if an error is returned.
if cleanupErr = c.teardownPodNetwork(deferCtx, sandbox); cleanupErr != nil {
log.G(ctx).WithError(cleanupErr).Errorf("Failed to destroy network for sandbox %q", id)
}
}
}()

// Setup network for sandbox.
// Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
// rely on the assumption that CRI shim will not be querying the network namespace to check the
// network states such as IP.
// In future runtime implementation should avoid relying on CRI shim implementation details.
// In this case however caching the IP will add a subtle performance enhancement by avoiding
// calls to network namespace of the pod to query the IP of the veth interface on every
// SandboxStatus request.
if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
return nil, fmt.Errorf("failed to setup network for sandbox %q: %w", id, err)
}
}

// Create sandbox task in containerd.
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
Expand All @@ -288,6 +330,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
// Cleanup the sandbox container if an error is returned.
if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
cleanupErr = err
}
}
}()
Expand Down Expand Up @@ -326,9 +369,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return nil, errors.Wrap(err, "failed to update sandbox status")
}

// Add sandbox into sandbox store in INIT state.
sandbox.Container = container

if err := c.sandboxStore.Add(sandbox); err != nil {
return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox)
}
Expand Down
9 changes: 9 additions & 0 deletions pkg/cri/server/sandbox_run_linux.go
Expand Up @@ -320,3 +320,12 @@ func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {

return taskOpts
}

func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
for i := range spec.Linux.Namespaces {
if spec.Linux.Namespaces[i].Type == runtimespec.NetworkNamespace {
spec.Linux.Namespaces[i].Path = nsPath
break
}
}
}
3 changes: 3 additions & 0 deletions pkg/cri/server/sandbox_run_other.go
Expand Up @@ -54,3 +54,6 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return []containerd.NewTaskOpts{}
}

func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
}
4 changes: 4 additions & 0 deletions pkg/cri/server/sandbox_run_windows.go
Expand Up @@ -92,3 +92,7 @@ func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxCo
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return nil
}

func (c *criService) updateNetNamespacePath(spec *runtimespec.Spec, nsPath string) {
spec.Windows.Network.NetworkNamespace = nsPath
}
36 changes: 18 additions & 18 deletions pkg/cri/store/sandbox/status.go
Expand Up @@ -29,24 +29,24 @@ import (
// | |
// | Create(Run) | Load
// | |
// Start | |
// (failed) | |
// +------------------+ +-----------+
// | | | |
// | | | |
// | | | |
// | | Start(Run) | |
// | | | |
// | PortForward +----v----+ | |
// | +------+ | | |
// | | | READY <---------+ |
// | +------> | | |
// | +----+----+ | |
// | | | |
// | | Stop/Exit | |
// | | | |
// | +----v----+ | |
// | | <---------+ +----v----+
// | |
// | | Start
// | |(failed and not cleaned)
// Start |--------------|--------------+
//(failed but cleaned)| | |
// +------------------+ |-----------+ |
// | | Start(Run) | | |
// | | | | |
// | PortForward +----v----+ | | |
// | +------+ | | | |
// | | | READY <---------+ | |
// | +------> | | | |
// | +----+----+ | | |
// | | | | |
// | | Stop/Exit | | |
// | | | | |
// | +----v----+ | | |
// | | <---------+ +----v--v-+
// | | NOTREADY| | |
// | | <----------------+ UNKNOWN |
// | +----+----+ Stop | |
Expand Down

0 comments on commit 2316027

Please sign in to comment.