From 0064010cddfa009fe16ae23fcd0c57f4f15d227c Mon Sep 17 00:00:00 2001 From: jinxu Date: Thu, 28 Jul 2022 08:03:20 -0700 Subject: [PATCH] Promote Local storage capacity isolation feature to GA This change is to promote local storage capacity isolation feature to GA At the same time, to allow rootless system disable this feature due to unable to get root fs, this change introduced a new kubelet config "localStorageCapacityIsolation". By default it is set to true. For rootless systems, they can set this configuration to false to disable the feature. Once it is set, user cannot set ephemeral-storage request/limit because capacity and allocatable will not be set. Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a --- cmd/kubelet/app/options/options.go | 1 + cmd/kubelet/app/server.go | 2 +- hack/local-up-cluster.sh | 2 + pkg/api/pod/util.go | 23 ---- pkg/api/pod/util_test.go | 111 ------------------ pkg/api/v1/resource/helpers.go | 7 -- pkg/features/kube_features.go | 5 +- pkg/generated/openapi/zz_generated.openapi.go | 7 ++ pkg/kubelet/apis/config/fuzzer/fuzzer.go | 3 +- pkg/kubelet/apis/config/helpers_test.go | 1 + .../KubeletConfiguration/after/v1beta1.yaml | 1 + .../roundtrip/default/v1beta1.yaml | 1 + pkg/kubelet/apis/config/types.go | 10 ++ pkg/kubelet/apis/config/v1beta1/defaults.go | 3 + .../apis/config/v1beta1/defaults_test.go | 64 +++++----- .../config/v1beta1/zz_generated.conversion.go | 6 + pkg/kubelet/cadvisor/cadvisor_linux.go | 4 +- pkg/kubelet/cadvisor/cadvisor_unsupported.go | 2 +- pkg/kubelet/cadvisor/cadvisor_windows.go | 2 +- pkg/kubelet/cm/container_manager.go | 4 +- pkg/kubelet/cm/container_manager_linux.go | 9 +- .../cm/container_manager_linux_test.go | 42 +++---- pkg/kubelet/cm/container_manager_stub.go | 7 +- .../cm/container_manager_unsupported.go | 2 +- pkg/kubelet/cm/container_manager_windows.go | 9 +- pkg/kubelet/cm/fake_container_manager.go | 7 +- pkg/kubelet/eviction/eviction_manager.go | 36 +++--- pkg/kubelet/eviction/helpers_test.go | 73 +----------- pkg/kubelet/kubelet.go | 9 +- pkg/kubelet/kubelet_node_status.go | 5 +- pkg/kubelet/kubelet_node_status_test.go | 9 +- pkg/kubelet/kubelet_pods_linux_test.go | 2 +- pkg/kubelet/kubelet_test.go | 10 +- pkg/kubelet/nodestatus/setters.go | 23 ++-- pkg/kubelet/nodestatus/setters_test.go | 95 +++++++++++---- pkg/kubelet/runonce_test.go | 2 +- pkg/kubemark/hollow_kubelet.go | 1 + .../plugins/noderesources/fit_test.go | 16 +-- pkg/scheduler/framework/types.go | 11 +- pkg/scheduler/util/pod_resources.go | 7 -- .../k8s.io/kubelet/config/v1beta1/types.go | 11 ++ .../config/v1beta1/zz_generated.deepcopy.go | 5 + 42 files changed, 267 insertions(+), 383 deletions(-) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go index aecfd5fbd8cc..a2353c4af3a7 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go @@ -492,6 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.") fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.") + fs.BoolVar(&c.LocalStorageCapacityIsolation, "local-storage-capacity-isolation", c.LocalStorageCapacityIsolation, "If true, local ephemeral storage isolation is enabled. Otherwise, local storage isolation feature will be disabled") // Flags intended for testing, not recommended used in production environments. fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.") diff --git a/cmd/kubelet/app/server.go b/cmd/kubelet/app/server.go index f1f81730853e..c8a67942b17a 100644 --- a/cmd/kubelet/app/server.go +++ b/cmd/kubelet/app/server.go @@ -646,7 +646,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend if kubeDeps.CAdvisorInterface == nil { imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint) - kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint)) + kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint), s.LocalStorageCapacityIsolation) if err != nil { return err } diff --git a/hack/local-up-cluster.sh b/hack/local-up-cluster.sh index c04f1e15e61d..0a49b29697a4 100755 --- a/hack/local-up-cluster.sh +++ b/hack/local-up-cluster.sh @@ -237,6 +237,7 @@ CPU_CFS_QUOTA=${CPU_CFS_QUOTA:-true} ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"} CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default +LOCAL_STORAGE_CAPACITY_ISOLATION=${LOCAL_STORAGE_CAPACITY_ISOLATION:-"true"} # current default # This is the default dir and filename where the apiserver will generate a self-signed cert # which should be able to be used as the CA to verify itself CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"} @@ -754,6 +755,7 @@ cgroupRoot: "${CGROUP_ROOT}" cgroupsPerQOS: ${CGROUPS_PER_QOS} cpuCFSQuota: ${CPU_CFS_QUOTA} enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH} +localStorageCapacityIsolation: ${LOCAL_STORAGE_CAPACITY_ISOLATION} evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}" failSwapOn: ${FAIL_SWAP_ON} port: ${KUBELET_PORT} diff --git a/pkg/api/pod/util.go b/pkg/api/pod/util.go index a0af5b15de9a..4d62a5128701 100644 --- a/pkg/api/pod/util.go +++ b/pkg/api/pod/util.go @@ -517,14 +517,6 @@ func dropDisabledFields( } } - if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) && !emptyDirSizeLimitInUse(oldPodSpec) { - for i := range podSpec.Volumes { - if podSpec.Volumes[i].EmptyDir != nil { - podSpec.Volumes[i].EmptyDir.SizeLimit = nil - } - } - } - if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) { // Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool { @@ -703,21 +695,6 @@ func appArmorInUse(podAnnotations map[string]string) bool { return false } -// emptyDirSizeLimitInUse returns true if any pod's EmptyDir volumes use SizeLimit. -func emptyDirSizeLimitInUse(podSpec *api.PodSpec) bool { - if podSpec == nil { - return false - } - for i := range podSpec.Volumes { - if podSpec.Volumes[i].EmptyDir != nil { - if podSpec.Volumes[i].EmptyDir.SizeLimit != nil { - return true - } - } - } - return false -} - // probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use // of the probe-level terminationGracePeriodSeconds feature func probeGracePeriodInUse(podSpec *api.PodSpec) bool { diff --git a/pkg/api/pod/util_test.go b/pkg/api/pod/util_test.go index c536cc006fff..17c7ef0f2dad 100644 --- a/pkg/api/pod/util_test.go +++ b/pkg/api/pod/util_test.go @@ -25,7 +25,6 @@ import ( "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" @@ -703,116 +702,6 @@ func TestDropProcMount(t *testing.T) { } } -func TestDropEmptyDirSizeLimit(t *testing.T) { - sizeLimit := resource.MustParse("1Gi") - podWithEmptyDirSizeLimit := func() *api.Pod { - return &api.Pod{ - Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyNever, - Volumes: []api.Volume{ - { - Name: "a", - VolumeSource: api.VolumeSource{ - EmptyDir: &api.EmptyDirVolumeSource{ - Medium: "memory", - SizeLimit: &sizeLimit, - }, - }, - }, - }, - }, - } - } - podWithoutEmptyDirSizeLimit := func() *api.Pod { - return &api.Pod{ - Spec: api.PodSpec{ - RestartPolicy: api.RestartPolicyNever, - Volumes: []api.Volume{ - { - Name: "a", - VolumeSource: api.VolumeSource{ - EmptyDir: &api.EmptyDirVolumeSource{ - Medium: "memory", - }, - }, - }, - }, - }, - } - } - - podInfo := []struct { - description string - hasEmptyDirSizeLimit bool - pod func() *api.Pod - }{ - { - description: "has EmptyDir Size Limit", - hasEmptyDirSizeLimit: true, - pod: podWithEmptyDirSizeLimit, - }, - { - description: "does not have EmptyDir Size Limit", - hasEmptyDirSizeLimit: false, - pod: podWithoutEmptyDirSizeLimit, - }, - { - description: "is nil", - hasEmptyDirSizeLimit: false, - pod: func() *api.Pod { return nil }, - }, - } - - for _, enabled := range []bool{true, false} { - for _, oldPodInfo := range podInfo { - for _, newPodInfo := range podInfo { - oldPodHasEmptyDirSizeLimit, oldPod := oldPodInfo.hasEmptyDirSizeLimit, oldPodInfo.pod() - newPodHasEmptyDirSizeLimit, newPod := newPodInfo.hasEmptyDirSizeLimit, newPodInfo.pod() - if newPod == nil { - continue - } - - t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, enabled)() - - var oldPodSpec *api.PodSpec - if oldPod != nil { - oldPodSpec = &oldPod.Spec - } - dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil) - - // old pod should never be changed - if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) { - t.Errorf("old pod changed: %v", cmp.Diff(oldPod, oldPodInfo.pod())) - } - - switch { - case enabled || oldPodHasEmptyDirSizeLimit: - // new pod should not be changed if the feature is enabled, or if the old pod had EmptyDir SizeLimit - if !reflect.DeepEqual(newPod, newPodInfo.pod()) { - t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod())) - } - case newPodHasEmptyDirSizeLimit: - // new pod should be changed - if reflect.DeepEqual(newPod, newPodInfo.pod()) { - t.Errorf("new pod was not changed") - } - // new pod should not have EmptyDir SizeLimit - if !reflect.DeepEqual(newPod, podWithoutEmptyDirSizeLimit()) { - t.Errorf("new pod had EmptyDir SizeLimit: %v", cmp.Diff(newPod, podWithoutEmptyDirSizeLimit())) - } - default: - // new pod should not need to be changed - if !reflect.DeepEqual(newPod, newPodInfo.pod()) { - t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod())) - } - } - }) - } - } - } -} - func TestDropAppArmor(t *testing.T) { podWithAppArmor := func() *api.Pod { return &api.Pod{ diff --git a/pkg/api/v1/resource/helpers.go b/pkg/api/v1/resource/helpers.go index 707cfc2242a2..904eb1441662 100644 --- a/pkg/api/v1/resource/helpers.go +++ b/pkg/api/v1/resource/helpers.go @@ -24,8 +24,6 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - utilfeature "k8s.io/apiserver/pkg/util/feature" - "k8s.io/kubernetes/pkg/features" ) // PodRequestsAndLimits returns a dictionary of all defined resources summed up for all @@ -131,11 +129,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou requestQuantity = resource.Quantity{Format: resource.DecimalSI} } - if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - // if the local storage capacity isolation feature gate is disabled, pods request 0 disk - return requestQuantity - } - for _, container := range pod.Spec.Containers { if rQuantity, ok := container.Resources.Requests[resourceName]; ok { requestQuantity.Add(rQuantity) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 22708622451b..7a24fee3765a 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -515,8 +515,9 @@ const ( // owner: @jinxu // beta: v1.10 + // stable: v1.25 // - // New local storage types to support local storage capacity isolation + // Support local ephemeral storage types for local storage capacity isolation feature. LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation" // owner: @RobertKrawitz @@ -988,7 +989,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta}, - LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta}, + LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27 LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta}, diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 4097987d0f1b..f12f383b2b01 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -54589,6 +54589,13 @@ func schema_k8sio_kubelet_config_v1beta1_KubeletConfiguration(ref common.Referen Ref: ref("k8s.io/component-base/tracing/api/v1.TracingConfiguration"), }, }, + "localStorageCapacityIsolation": { + SchemaProps: spec.SchemaProps{ + Description: "LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk usage from the volume exceeds the limit. This feature depends on the capability of detecting correct root file system disk usage. For certain systems, such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. Default: true", + Type: []string{"boolean"}, + Format: "", + }, + }, }, }, }, diff --git a/pkg/kubelet/apis/config/fuzzer/fuzzer.go b/pkg/kubelet/apis/config/fuzzer/fuzzer.go index 5d7ebb9cd585..b1ad59b0ebc0 100644 --- a/pkg/kubelet/apis/config/fuzzer/fuzzer.go +++ b/pkg/kubelet/apis/config/fuzzer/fuzzer.go @@ -20,7 +20,7 @@ import ( "math/rand" "time" - "github.com/google/gofuzz" + fuzz "github.com/google/gofuzz" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer" @@ -110,6 +110,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} { } obj.EnableSystemLogHandler = true obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64()) + obj.LocalStorageCapacityIsolation = true }, } } diff --git a/pkg/kubelet/apis/config/helpers_test.go b/pkg/kubelet/apis/config/helpers_test.go index 6934f8193d92..029eb119a4ef 100644 --- a/pkg/kubelet/apis/config/helpers_test.go +++ b/pkg/kubelet/apis/config/helpers_test.go @@ -282,5 +282,6 @@ var ( "MemoryThrottlingFactor", "Tracing.Endpoint", "Tracing.SamplingRatePerMillion", + "LocalStorageCapacityIsolation", ) ) diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml index 3cb76b89923c..a1a6cd6367ef 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/after/v1beta1.yaml @@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14 kind: KubeletConfiguration kubeAPIBurst: 10 kubeAPIQPS: 5 +localStorageCapacityIsolation: true logging: flushFrequency: 5000000000 format: text diff --git a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml index 3cb76b89923c..a1a6cd6367ef 100644 --- a/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml +++ b/pkg/kubelet/apis/config/scheme/testdata/KubeletConfiguration/roundtrip/default/v1beta1.yaml @@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14 kind: KubeletConfiguration kubeAPIBurst: 10 kubeAPIQPS: 5 +localStorageCapacityIsolation: true logging: flushFrequency: 5000000000 format: text diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index bcf25b977eef..0ec23416fabb 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -450,6 +450,16 @@ type KubeletConfiguration struct { // +featureGate=KubeletTracing // +optional Tracing *tracingapi.TracingConfiguration + + // LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. + // This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way + // as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk + // usage from the volume exceeds the limit. + // This feature depends on the capability of detecting correct root file system disk usage. For certain systems, + // such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be + // disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. + // +optional + LocalStorageCapacityIsolation bool } // KubeletAuthorizationMode denotes the authorization mode for the kubelet diff --git a/pkg/kubelet/apis/config/v1beta1/defaults.go b/pkg/kubelet/apis/config/v1beta1/defaults.go index ca2d147eda61..823154030819 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults.go @@ -264,4 +264,7 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura if obj.RegisterNode == nil { obj.RegisterNode = utilpointer.BoolPtr(true) } + if obj.LocalStorageCapacityIsolation == nil { + obj.LocalStorageCapacityIsolation = utilpointer.BoolPtr(true) + } } diff --git a/pkg/kubelet/apis/config/v1beta1/defaults_test.go b/pkg/kubelet/apis/config/v1beta1/defaults_test.go index 59d2e35fd1fe..e16b36986245 100644 --- a/pkg/kubelet/apis/config/v1beta1/defaults_test.go +++ b/pkg/kubelet/apis/config/v1beta1/defaults_test.go @@ -115,12 +115,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { Format: "text", FlushFrequency: 5 * time.Second, }, - EnableSystemLogHandler: utilpointer.BoolPtr(true), - EnableProfilingHandler: utilpointer.BoolPtr(true), - EnableDebugFlagsHandler: utilpointer.BoolPtr(true), - SeccompDefault: utilpointer.BoolPtr(false), - MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), - RegisterNode: utilpointer.BoolPtr(true), + EnableSystemLogHandler: utilpointer.BoolPtr(true), + EnableProfilingHandler: utilpointer.BoolPtr(true), + EnableDebugFlagsHandler: utilpointer.BoolPtr(true), + SeccompDefault: utilpointer.BoolPtr(false), + MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), + RegisterNode: utilpointer.BoolPtr(true), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(true), }, }, { @@ -245,6 +246,7 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { SeccompDefault: utilpointer.Bool(false), MemoryThrottlingFactor: utilpointer.Float64(0), RegisterNode: utilpointer.BoolPtr(false), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(false), }, &v1beta1.KubeletConfiguration{ EnableServer: utilpointer.BoolPtr(false), @@ -333,13 +335,14 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { Format: "text", FlushFrequency: 5 * time.Second, }, - EnableSystemLogHandler: utilpointer.Bool(false), - ReservedMemory: []v1beta1.MemoryReservation{}, - EnableProfilingHandler: utilpointer.Bool(false), - EnableDebugFlagsHandler: utilpointer.Bool(false), - SeccompDefault: utilpointer.Bool(false), - MemoryThrottlingFactor: utilpointer.Float64(0), - RegisterNode: utilpointer.BoolPtr(false), + EnableSystemLogHandler: utilpointer.Bool(false), + ReservedMemory: []v1beta1.MemoryReservation{}, + EnableProfilingHandler: utilpointer.Bool(false), + EnableDebugFlagsHandler: utilpointer.Bool(false), + SeccompDefault: utilpointer.Bool(false), + MemoryThrottlingFactor: utilpointer.Float64(0), + RegisterNode: utilpointer.BoolPtr(false), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(false), }, }, { @@ -481,11 +484,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}, }, }, - EnableProfilingHandler: utilpointer.Bool(true), - EnableDebugFlagsHandler: utilpointer.Bool(true), - SeccompDefault: utilpointer.Bool(true), - MemoryThrottlingFactor: utilpointer.Float64(1), - RegisterNode: utilpointer.BoolPtr(true), + EnableProfilingHandler: utilpointer.Bool(true), + EnableDebugFlagsHandler: utilpointer.Bool(true), + SeccompDefault: utilpointer.Bool(true), + MemoryThrottlingFactor: utilpointer.Float64(1), + RegisterNode: utilpointer.BoolPtr(true), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(true), }, &v1beta1.KubeletConfiguration{ EnableServer: utilpointer.BoolPtr(true), @@ -624,11 +628,12 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}, }, }, - EnableProfilingHandler: utilpointer.Bool(true), - EnableDebugFlagsHandler: utilpointer.Bool(true), - SeccompDefault: utilpointer.Bool(true), - MemoryThrottlingFactor: utilpointer.Float64(1), - RegisterNode: utilpointer.BoolPtr(true), + EnableProfilingHandler: utilpointer.Bool(true), + EnableDebugFlagsHandler: utilpointer.Bool(true), + SeccompDefault: utilpointer.Bool(true), + MemoryThrottlingFactor: utilpointer.Float64(1), + RegisterNode: utilpointer.BoolPtr(true), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(true), }, }, { @@ -709,12 +714,13 @@ func TestSetDefaultsKubeletConfiguration(t *testing.T) { Format: "text", FlushFrequency: 5 * time.Second, }, - EnableSystemLogHandler: utilpointer.BoolPtr(true), - EnableProfilingHandler: utilpointer.BoolPtr(true), - EnableDebugFlagsHandler: utilpointer.BoolPtr(true), - SeccompDefault: utilpointer.BoolPtr(false), - MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), - RegisterNode: utilpointer.BoolPtr(true), + EnableSystemLogHandler: utilpointer.BoolPtr(true), + EnableProfilingHandler: utilpointer.BoolPtr(true), + EnableDebugFlagsHandler: utilpointer.BoolPtr(true), + SeccompDefault: utilpointer.BoolPtr(false), + MemoryThrottlingFactor: utilpointer.Float64Ptr(DefaultMemoryThrottlingFactor), + RegisterNode: utilpointer.BoolPtr(true), + LocalStorageCapacityIsolation: utilpointer.BoolPtr(true), }, }, } diff --git a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go index a6f3ad2a1b97..daa73aa1f8fe 100644 --- a/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go @@ -508,6 +508,9 @@ func autoConvert_v1beta1_KubeletConfiguration_To_config_KubeletConfiguration(in return err } out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing)) + if err := v1.Convert_Pointer_bool_To_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil { + return err + } return nil } @@ -683,6 +686,9 @@ func autoConvert_config_KubeletConfiguration_To_v1beta1_KubeletConfiguration(in return err } out.Tracing = (*apiv1.TracingConfiguration)(unsafe.Pointer(in.Tracing)) + if err := v1.Convert_bool_To_Pointer_bool(&in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation, s); err != nil { + return err + } return nil } diff --git a/pkg/kubelet/cadvisor/cadvisor_linux.go b/pkg/kubelet/cadvisor/cadvisor_linux.go index 1b118722777e..370e374df55a 100644 --- a/pkg/kubelet/cadvisor/cadvisor_linux.go +++ b/pkg/kubelet/cadvisor/cadvisor_linux.go @@ -80,7 +80,7 @@ func init() { } // New creates a new cAdvisor Interface for linux systems. -func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) { +func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) { sysFs := sysfs.NewRealSysFs() includedMetrics := cadvisormetrics.MetricSet{ @@ -99,7 +99,7 @@ func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots [ includedMetrics[cadvisormetrics.AcceleratorUsageMetrics] = struct{}{} } - if usingLegacyStats || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { + if usingLegacyStats || localStorageCapacityIsolation { includedMetrics[cadvisormetrics.DiskUsageMetrics] = struct{}{} } diff --git a/pkg/kubelet/cadvisor/cadvisor_unsupported.go b/pkg/kubelet/cadvisor/cadvisor_unsupported.go index 7f33ba9b03aa..40113a9ce821 100644 --- a/pkg/kubelet/cadvisor/cadvisor_unsupported.go +++ b/pkg/kubelet/cadvisor/cadvisor_unsupported.go @@ -33,7 +33,7 @@ type cadvisorUnsupported struct { var _ Interface = new(cadvisorUnsupported) // New creates a new cAdvisor Interface for unsupported systems. -func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats bool) (Interface, error) { +func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupsRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) { return &cadvisorUnsupported{}, nil } diff --git a/pkg/kubelet/cadvisor/cadvisor_windows.go b/pkg/kubelet/cadvisor/cadvisor_windows.go index a3fd930055b5..ded51e7caa12 100644 --- a/pkg/kubelet/cadvisor/cadvisor_windows.go +++ b/pkg/kubelet/cadvisor/cadvisor_windows.go @@ -34,7 +34,7 @@ type cadvisorClient struct { var _ Interface = new(cadvisorClient) // New creates a cAdvisor and exports its API on the specified port if port > 0. -func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats bool) (Interface, error) { +func New(imageFsInfoProvider ImageFsInfoProvider, rootPath string, cgroupRoots []string, usingLegacyStats, localStorageCapacityIsolation bool) (Interface, error) { client, err := winstats.NewPerfCounterClient() return &cadvisorClient{ rootPath: rootPath, diff --git a/pkg/kubelet/cm/container_manager.go b/pkg/kubelet/cm/container_manager.go index e67dbfc3daaa..bf52a1e4c063 100644 --- a/pkg/kubelet/cm/container_manager.go +++ b/pkg/kubelet/cm/container_manager.go @@ -47,7 +47,7 @@ type ContainerManager interface { // Runs the container manager's housekeeping. // - Ensures that the Docker daemon is in a container. // - Creates the system container where all non-containerized processes run. - Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService) error + Start(*v1.Node, ActivePodsFunc, config.SourcesReady, status.PodStatusProvider, internalapi.RuntimeService, bool) error // SystemCgroupsLimit returns resources allocated to system cgroups in the machine. // These cgroups include the system and Kubernetes services. @@ -73,7 +73,7 @@ type ContainerManager interface { GetNodeAllocatableReservation() v1.ResourceList // GetCapacity returns the amount of compute resources tracked by container manager available on the node. - GetCapacity() v1.ResourceList + GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList // GetDevicePluginResourceCapacity returns the node capacity (amount of total device plugin resources), // node allocatable (amount of total healthy resources reported by device plugin), diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index e15b759d3e10..44c8cda6c409 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -554,7 +554,8 @@ func (cm *containerManagerImpl) Start(node *v1.Node, activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, - runtimeService internalapi.RuntimeService) error { + runtimeService internalapi.RuntimeService, + localStorageCapacityIsolation bool) error { // Initialize CPU manager if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.CPUManager) { @@ -578,7 +579,7 @@ func (cm *containerManagerImpl) Start(node *v1.Node, // allocatable of the node cm.nodeInfo = node - if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { + if localStorageCapacityIsolation { rootfs, err := cm.cadvisorInterface.RootFsInfo() if err != nil { return fmt.Errorf("failed to get rootfs info: %v", err) @@ -915,8 +916,8 @@ func isKernelPid(pid int) bool { // GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*" // At present this method is only invoked when introspecting ephemeral storage -func (cm *containerManagerImpl) GetCapacity() v1.ResourceList { - if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { +func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { + if localStorageCapacityIsolation { // We store allocatable ephemeral-storage in the capacity property once we Start() the container manager if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok { // If we haven't yet stored the capacity for ephemeral-storage, we can try to fetch it directly from cAdvisor, diff --git a/pkg/kubelet/cm/container_manager_linux_test.go b/pkg/kubelet/cm/container_manager_linux_test.go index 01434ccb5870..258cd73a841f 100644 --- a/pkg/kubelet/cm/container_manager_linux_test.go +++ b/pkg/kubelet/cm/container_manager_linux_test.go @@ -28,9 +28,6 @@ import ( gomock "github.com/golang/mock/gomock" cadvisorapiv2 "github.com/google/cadvisor/info/v2" - utilfeature "k8s.io/apiserver/pkg/util/feature" - featuregatetesting "k8s.io/component-base/featuregate/testing" - kubefeatures "k8s.io/kubernetes/pkg/features" "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/stretchr/testify/assert" @@ -193,11 +190,11 @@ func TestGetCapacity(t *testing.T) { mockCadvisorError := cadvisortest.NewMockInterface(mockCtrlError) mockCadvisorError.EXPECT().RootFsInfo().Return(cadvisorapiv2.FsInfo{}, errors.New("Unable to get rootfs data from cAdvisor interface")) cases := []struct { - name string - cm *containerManagerImpl - expectedResourceQuantity *resource.Quantity - expectedNoEphemeralStorage bool - enableLocalStorageCapacityIsolation bool + name string + cm *containerManagerImpl + expectedResourceQuantity *resource.Quantity + expectedNoEphemeralStorage bool + disablelocalStorageCapacityIsolation bool }{ { name: "capacity property has ephemeral-storage", @@ -207,9 +204,8 @@ func TestGetCapacity(t *testing.T) { v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), }, }, - expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), - expectedNoEphemeralStorage: false, - enableLocalStorageCapacityIsolation: true, + expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), + expectedNoEphemeralStorage: false, }, { name: "capacity property does not have ephemeral-storage", @@ -217,9 +213,8 @@ func TestGetCapacity(t *testing.T) { cadvisorInterface: mockCadvisor, capacity: v1.ResourceList{}, }, - expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI), - expectedNoEphemeralStorage: false, - enableLocalStorageCapacityIsolation: true, + expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCadvisor, resource.BinarySI), + expectedNoEphemeralStorage: false, }, { name: "capacity property does not have ephemeral-storage, error from rootfs", @@ -227,8 +222,7 @@ func TestGetCapacity(t *testing.T) { cadvisorInterface: mockCadvisorError, capacity: v1.ResourceList{}, }, - expectedNoEphemeralStorage: true, - enableLocalStorageCapacityIsolation: true, + expectedNoEphemeralStorage: true, }, { name: "capacity property does not have ephemeral-storage, cadvisor interface is nil", @@ -236,26 +230,24 @@ func TestGetCapacity(t *testing.T) { cadvisorInterface: nil, capacity: v1.ResourceList{}, }, - expectedNoEphemeralStorage: true, - enableLocalStorageCapacityIsolation: true, + expectedNoEphemeralStorage: true, }, { - name: "LocalStorageCapacityIsolation feature flag is disabled", + name: "capacity property has ephemeral-storage, but localStorageCapacityIsolation is disabled", cm: &containerManagerImpl{ cadvisorInterface: mockCadvisor, capacity: v1.ResourceList{ - v1.ResourceCPU: resource.MustParse("4"), - v1.ResourceMemory: resource.MustParse("16G"), + v1.ResourceEphemeralStorage: *resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), }, }, - expectedNoEphemeralStorage: true, - enableLocalStorageCapacityIsolation: false, + expectedResourceQuantity: resource.NewQuantity(ephemeralStorageFromCapacity, resource.BinarySI), + expectedNoEphemeralStorage: true, + disablelocalStorageCapacityIsolation: true, }, } for _, c := range cases { t.Run(c.name, func(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, kubefeatures.LocalStorageCapacityIsolation, c.enableLocalStorageCapacityIsolation)() - ret := c.cm.GetCapacity() + ret := c.cm.GetCapacity(!c.disablelocalStorageCapacityIsolation) if v, exists := ret[v1.ResourceEphemeralStorage]; !exists { if !c.expectedNoEphemeralStorage { t.Errorf("did not get any ephemeral storage data") diff --git a/pkg/kubelet/cm/container_manager_stub.go b/pkg/kubelet/cm/container_manager_stub.go index a5b0e523b137..6bd5f3e36894 100644 --- a/pkg/kubelet/cm/container_manager_stub.go +++ b/pkg/kubelet/cm/container_manager_stub.go @@ -41,7 +41,7 @@ type containerManagerStub struct { var _ ContainerManager = &containerManagerStub{} -func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { +func (cm *containerManagerStub) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { klog.V(2).InfoS("Starting stub container manager") return nil } @@ -74,7 +74,10 @@ func (cm *containerManagerStub) GetNodeAllocatableReservation() v1.ResourceList return nil } -func (cm *containerManagerStub) GetCapacity() v1.ResourceList { +func (cm *containerManagerStub) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { + if !localStorageCapacityIsolation { + return v1.ResourceList{} + } c := v1.ResourceList{ v1.ResourceEphemeralStorage: *resource.NewQuantity( int64(0), diff --git a/pkg/kubelet/cm/container_manager_unsupported.go b/pkg/kubelet/cm/container_manager_unsupported.go index 1a0587c36dca..fb1c4a79dbfe 100644 --- a/pkg/kubelet/cm/container_manager_unsupported.go +++ b/pkg/kubelet/cm/container_manager_unsupported.go @@ -38,7 +38,7 @@ type unsupportedContainerManager struct { var _ ContainerManager = &unsupportedContainerManager{} -func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { +func (unsupportedContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { return fmt.Errorf("Container Manager is unsupported in this build") } diff --git a/pkg/kubelet/cm/container_manager_windows.go b/pkg/kubelet/cm/container_manager_windows.go index ec56fcf2c73b..dc325a3a9f91 100644 --- a/pkg/kubelet/cm/container_manager_windows.go +++ b/pkg/kubelet/cm/container_manager_windows.go @@ -30,11 +30,9 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" internalapi "k8s.io/cri-api/pkg/apis" podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1" - kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cm/admission" "k8s.io/kubernetes/pkg/kubelet/cm/cpumanager" @@ -72,10 +70,11 @@ func (cm *containerManagerImpl) Start(node *v1.Node, activePods ActivePodsFunc, sourcesReady config.SourcesReady, podStatusProvider status.PodStatusProvider, - runtimeService internalapi.RuntimeService) error { + runtimeService internalapi.RuntimeService, + localStorageCapacityIsolation bool) error { klog.V(2).InfoS("Starting Windows container manager") - if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.LocalStorageCapacityIsolation) { + if localStorageCapacityIsolation { rootfs, err := cm.cadvisorInterface.RootFsInfo() if err != nil { return fmt.Errorf("failed to get rootfs info: %v", err) @@ -171,7 +170,7 @@ func (cm *containerManagerImpl) GetNodeAllocatableReservation() v1.ResourceList return result } -func (cm *containerManagerImpl) GetCapacity() v1.ResourceList { +func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { return cm.capacity } diff --git a/pkg/kubelet/cm/fake_container_manager.go b/pkg/kubelet/cm/fake_container_manager.go index c907301a6d4e..685246276c5b 100644 --- a/pkg/kubelet/cm/fake_container_manager.go +++ b/pkg/kubelet/cm/fake_container_manager.go @@ -50,7 +50,7 @@ func NewFakeContainerManager() *FakeContainerManager { } } -func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService) error { +func (cm *FakeContainerManager) Start(_ *v1.Node, _ ActivePodsFunc, _ config.SourcesReady, _ status.PodStatusProvider, _ internalapi.RuntimeService, _ bool) error { cm.Lock() defer cm.Unlock() cm.CalledFunctions = append(cm.CalledFunctions, "Start") @@ -106,10 +106,13 @@ func (cm *FakeContainerManager) GetNodeAllocatableReservation() v1.ResourceList return nil } -func (cm *FakeContainerManager) GetCapacity() v1.ResourceList { +func (cm *FakeContainerManager) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { cm.Lock() defer cm.Unlock() cm.CalledFunctions = append(cm.CalledFunctions, "GetCapacity") + if !localStorageCapacityIsolation { + return v1.ResourceList{} + } c := v1.ResourceList{ v1.ResourceEphemeralStorage: *resource.NewQuantity( int64(0), diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index f905ae079833..8332a522e25f 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -26,13 +26,11 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/client-go/tools/record" v1helper "k8s.io/component-helpers/scheduling/corev1" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" apiv1resource "k8s.io/kubernetes/pkg/api/v1/resource" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" - "k8s.io/kubernetes/pkg/features" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/metrics" @@ -97,6 +95,8 @@ type managerImpl struct { thresholdNotifiers []ThresholdNotifier // thresholdsLastUpdated is the last time the thresholdNotifiers were updated. thresholdsLastUpdated time.Time + // whether can support local storage capacity isolation + localStorageCapacityIsolation bool } // ensure it implements the required interface @@ -113,21 +113,23 @@ func NewManager( recorder record.EventRecorder, nodeRef *v1.ObjectReference, clock clock.WithTicker, + localStorageCapacityIsolation bool, ) (Manager, lifecycle.PodAdmitHandler) { manager := &managerImpl{ - clock: clock, - killPodFunc: killPodFunc, - mirrorPodFunc: mirrorPodFunc, - imageGC: imageGC, - containerGC: containerGC, - config: config, - recorder: recorder, - summaryProvider: summaryProvider, - nodeRef: nodeRef, - nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, - thresholdsFirstObservedAt: thresholdsObservedAt{}, - dedicatedImageFs: nil, - thresholdNotifiers: []ThresholdNotifier{}, + clock: clock, + killPodFunc: killPodFunc, + mirrorPodFunc: mirrorPodFunc, + imageGC: imageGC, + containerGC: containerGC, + config: config, + recorder: recorder, + summaryProvider: summaryProvider, + nodeRef: nodeRef, + nodeConditionsLastObservedAt: nodeConditionsObservedAt{}, + thresholdsFirstObservedAt: thresholdsObservedAt{}, + dedicatedImageFs: nil, + thresholdNotifiers: []ThresholdNotifier{}, + localStorageCapacityIsolation: localStorageCapacityIsolation, } return manager, manager } @@ -230,7 +232,7 @@ func (m *managerImpl) IsUnderPIDPressure() bool { func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc ActivePodsFunc) []*v1.Pod { // if we have nothing to do, just return thresholds := m.config.Thresholds - if len(thresholds) == 0 && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { + if len(thresholds) == 0 && !m.localStorageCapacityIsolation { return nil } @@ -318,7 +320,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act // evict pods if there is a resource usage violation from local volume temporary storage // If eviction happens in localStorageEviction function, skip the rest of eviction action - if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { + if m.localStorageCapacityIsolation { if evictedPods := m.localStorageEviction(activePods, statsFunc); len(evictedPods) > 0 { return evictedPods } diff --git a/pkg/kubelet/eviction/helpers_test.go b/pkg/kubelet/eviction/helpers_test.go index aaf648e93749..46ba55001636 100644 --- a/pkg/kubelet/eviction/helpers_test.go +++ b/pkg/kubelet/eviction/helpers_test.go @@ -18,12 +18,13 @@ package eviction import ( "fmt" - "k8s.io/apimachinery/pkg/util/diff" "reflect" "sort" "testing" "time" + "k8s.io/apimachinery/pkg/util/diff" + v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -31,6 +32,7 @@ import ( utilfeature "k8s.io/apiserver/pkg/util/feature" featuregatetesting "k8s.io/component-base/featuregate/testing" statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1" + "k8s.io/kubernetes/pkg/features" evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api" kubetypes "k8s.io/kubernetes/pkg/kubelet/types" @@ -699,7 +701,6 @@ func TestOrderedByExceedsRequestMemory(t *testing.T) { } func TestOrderedByExceedsRequestDisk(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)() below := newPod("below-requests", -1, []v1.Container{ newContainer("below-requests", v1.ResourceList{v1.ResourceEphemeralStorage: resource.MustParse("200Mi")}, newResourceList("", "", "")), }, nil) @@ -748,7 +749,6 @@ func TestOrderedByPriority(t *testing.T) { } func TestOrderedbyDisk(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)() pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), }, []v1.Volume{ @@ -813,73 +813,6 @@ func TestOrderedbyDisk(t *testing.T) { } } -// Tests that we correctly ignore disk requests when the local storage feature gate is disabled. -func TestOrderedbyDiskDisableLocalStorage(t *testing.T) { - defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, false)() - pod1 := newPod("best-effort-high", defaultPriority, []v1.Container{ - newContainer("best-effort-high", newResourceList("", "", ""), newResourceList("", "", "")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod2 := newPod("best-effort-low", defaultPriority, []v1.Container{ - newContainer("best-effort-low", newResourceList("", "", ""), newResourceList("", "", "")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod3 := newPod("burstable-high", defaultPriority, []v1.Container{ - newContainer("burstable-high", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod4 := newPod("burstable-low", defaultPriority, []v1.Container{ - newContainer("burstable-low", newResourceList("", "", "100Mi"), newResourceList("", "", "400Mi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod5 := newPod("guaranteed-high", defaultPriority, []v1.Container{ - newContainer("guaranteed-high", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - pod6 := newPod("guaranteed-low", defaultPriority, []v1.Container{ - newContainer("guaranteed-low", newResourceList("", "", "400Mi"), newResourceList("", "", "400Mi")), - }, []v1.Volume{ - newVolume("local-volume", v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{}, - }), - }) - stats := map[*v1.Pod]statsapi.PodStats{ - pod1: newPodDiskStats(pod1, resource.MustParse("50Mi"), resource.MustParse("100Mi"), resource.MustParse("150Mi")), // 300Mi - pod2: newPodDiskStats(pod2, resource.MustParse("25Mi"), resource.MustParse("25Mi"), resource.MustParse("50Mi")), // 100Mi - pod3: newPodDiskStats(pod3, resource.MustParse("150Mi"), resource.MustParse("150Mi"), resource.MustParse("50Mi")), // 350Mi - pod4: newPodDiskStats(pod4, resource.MustParse("25Mi"), resource.MustParse("35Mi"), resource.MustParse("50Mi")), // 110Mi - pod5: newPodDiskStats(pod5, resource.MustParse("225Mi"), resource.MustParse("100Mi"), resource.MustParse("50Mi")), // 375Mi - pod6: newPodDiskStats(pod6, resource.MustParse("25Mi"), resource.MustParse("45Mi"), resource.MustParse("50Mi")), // 120Mi - } - statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) { - result, found := stats[pod] - return result, found - } - pods := []*v1.Pod{pod1, pod3, pod2, pod4, pod5, pod6} - orderedBy(disk(statsFn, []fsStatsType{fsStatsRoot, fsStatsLogs, fsStatsLocalVolumeSource}, v1.ResourceEphemeralStorage)).Sort(pods) - expected := []*v1.Pod{pod5, pod3, pod1, pod6, pod4, pod2} - for i := range expected { - if pods[i] != expected[i] { - t.Errorf("Expected pod[%d]: %s, but got: %s", i, expected[i].Name, pods[i].Name) - } - } -} - func TestOrderedbyInodes(t *testing.T) { defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, true)() low := newPod("low", defaultPriority, []v1.Container{ diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index bc784890ae28..b72047b3a9db 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -773,7 +773,8 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, klet.backOff = flowcontrol.NewBackOff(backOffPeriod, MaxContainerBackOff) // setup eviction manager - evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(klet.resourceAnalyzer, evictionConfig, + killPodNow(klet.podWorkers, kubeDeps.Recorder), klet.podManager.GetMirrorPodByPod, klet.imageManager, klet.containerGC, kubeDeps.Recorder, nodeRef, klet.clock, kubeCfg.LocalStorageCapacityIsolation) klet.evictionManager = evictionManager klet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) @@ -1384,7 +1385,7 @@ func (kl *Kubelet) initializeRuntimeDependentModules() { os.Exit(1) } // containerManager must start after cAdvisor because it needs filesystem capacity information - if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService); err != nil { + if err := kl.containerManager.Start(node, kl.GetActivePods, kl.sourcesReady, kl.statusManager, kl.runtimeService, kl.supportLocalStorageCapacityIsolation()); err != nil { // Fail kubelet and rely on the babysitter to retry starting kubelet. klog.ErrorS(err, "Failed to start ContainerManager") os.Exit(1) @@ -2489,6 +2490,10 @@ func (kl *Kubelet) CheckpointContainer( return nil } +func (kl *Kubelet) supportLocalStorageCapacityIsolation() bool { + return kl.GetConfiguration().LocalStorageCapacityIsolation +} + // isSyncPodWorthy filters out events that are not worthy of pod syncing func isSyncPodWorthy(event *pleg.PodLifecycleEvent) bool { // ContainerRemoved doesn't affect pod state diff --git a/pkg/kubelet/kubelet_node_status.go b/pkg/kubelet/kubelet_node_status.go index aabb75bcd2e1..b28e6288b6ec 100644 --- a/pkg/kubelet/kubelet_node_status.go +++ b/pkg/kubelet/kubelet_node_status.go @@ -624,7 +624,7 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error { setters = append(setters, nodestatus.NodeAddress(kl.nodeIPs, kl.nodeIPValidator, kl.hostname, kl.hostnameOverridden, kl.externalCloudProvider, kl.cloud, nodeAddressesFunc), nodestatus.MachineInfo(string(kl.nodeName), kl.maxPods, kl.podsPerCore, kl.GetCachedMachineInfo, kl.containerManager.GetCapacity, - kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent), + kl.containerManager.GetDevicePluginResourceCapacity, kl.containerManager.GetNodeAllocatableReservation, kl.recordEvent, kl.supportLocalStorageCapacityIsolation()), nodestatus.VersionInfo(kl.cadvisor.VersionInfo, kl.containerRuntime.Type, kl.containerRuntime.Version), nodestatus.DaemonEndpoints(kl.daemonEndpoints), nodestatus.Images(kl.nodeStatusMaxImages, kl.imageManager.GetImageList), @@ -637,7 +637,8 @@ func (kl *Kubelet) defaultNodeStatusFuncs() []func(*v1.Node) error { nodestatus.MemoryPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderMemoryPressure, kl.recordNodeStatusEvent), nodestatus.DiskPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderDiskPressure, kl.recordNodeStatusEvent), nodestatus.PIDPressureCondition(kl.clock.Now, kl.evictionManager.IsUnderPIDPressure, kl.recordNodeStatusEvent), - nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent), + nodestatus.ReadyCondition(kl.clock.Now, kl.runtimeState.runtimeErrors, kl.runtimeState.networkErrors, kl.runtimeState.storageErrors, + validateHostFunc, kl.containerManager.Status, kl.shutdownManager.ShutdownStatus, kl.recordNodeStatusEvent, kl.supportLocalStorageCapacityIsolation()), nodestatus.VolumesInUse(kl.volumeManager.ReconcilerStatesHasBeenSynced, kl.volumeManager.GetVolumesInUse), // TODO(mtaufen): I decided not to move this setter for now, since all it does is send an event // and record state back to the Kubelet runtime object. In the future, I'd like to isolate diff --git a/pkg/kubelet/kubelet_node_status_test.go b/pkg/kubelet/kubelet_node_status_test.go index 0ed606ae009f..6dacc2e3d5bc 100644 --- a/pkg/kubelet/kubelet_node_status_test.go +++ b/pkg/kubelet/kubelet_node_status_test.go @@ -153,7 +153,10 @@ func (lcm *localCM) GetNodeAllocatableReservation() v1.ResourceList { return lcm.allocatableReservation } -func (lcm *localCM) GetCapacity() v1.ResourceList { +func (lcm *localCM) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList { + if !localStorageCapacityIsolation { + delete(lcm.capacity, v1.ResourceEphemeralStorage) + } return lcm.capacity } @@ -182,7 +185,7 @@ func TestUpdateNewNodeStatus(t *testing.T) { } inputImageList, expectedImageList := generateTestingImageLists(numTestImages, int(tc.nodeStatusMaxImages)) testKubelet := newTestKubeletWithImageList( - t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/) + t, inputImageList, false /* controllerAttachDetachEnabled */, true /*initFakeVolumePlugin*/, true /* localStorageCapacityIsolation */) defer testKubelet.Cleanup() kubelet := testKubelet.kubelet kubelet.nodeStatusMaxImages = tc.nodeStatusMaxImages @@ -1347,7 +1350,7 @@ func TestUpdateNewNodeStatusTooLargeReservation(t *testing.T) { // generate one more in inputImageList than we configure the Kubelet to report inputImageList, _ := generateTestingImageLists(nodeStatusMaxImages+1, nodeStatusMaxImages) testKubelet := newTestKubeletWithImageList( - t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */) + t, inputImageList, false /* controllerAttachDetachEnabled */, true /* initFakeVolumePlugin */, true) defer testKubelet.Cleanup() kubelet := testKubelet.kubelet kubelet.nodeStatusMaxImages = nodeStatusMaxImages diff --git a/pkg/kubelet/kubelet_pods_linux_test.go b/pkg/kubelet/kubelet_pods_linux_test.go index bcc5a1ff2dfe..51730c36aed5 100644 --- a/pkg/kubelet/kubelet_pods_linux_test.go +++ b/pkg/kubelet/kubelet_pods_linux_test.go @@ -23,7 +23,7 @@ import ( "testing" "github.com/stretchr/testify/assert" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" _ "k8s.io/kubernetes/pkg/apis/core/install" diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index 9995f1e04cc9..5b3aba5b3b98 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -145,14 +145,16 @@ func newTestKubelet(t *testing.T, controllerAttachDetachEnabled bool) *TestKubel Size: 456, }, } - return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/) + return newTestKubeletWithImageList(t, imageList, controllerAttachDetachEnabled, true /*initFakeVolumePlugin*/, true /*localStorageCapacityIsolation*/) } func newTestKubeletWithImageList( t *testing.T, imageList []kubecontainer.Image, controllerAttachDetachEnabled bool, - initFakeVolumePlugin bool) *TestKubelet { + initFakeVolumePlugin bool, + localStorageCapacityIsolation bool, +) *TestKubelet { logger, _ := ktesting.NewTestContext(t) fakeRuntime := &containertest.FakeRuntime{ @@ -320,7 +322,8 @@ func newTestKubeletWithImageList( Namespace: "", } // setup eviction manager - evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(kubelet.resourceAnalyzer, eviction.Config{}, + killPodNow(kubelet.podWorkers, fakeRecorder), kubelet.podManager.GetMirrorPodByPod, kubelet.imageManager, kubelet.containerGC, fakeRecorder, nodeRef, kubelet.clock, kubelet.supportLocalStorageCapacityIsolation()) kubelet.evictionManager = evictionManager kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) @@ -386,6 +389,7 @@ func newTestKubeletWithImageList( kubelet.AddPodSyncLoopHandler(activeDeadlineHandler) kubelet.AddPodSyncHandler(activeDeadlineHandler) kubelet.lastContainerStartedTime = newTimeCache() + kubelet.kubeletConfiguration.LocalStorageCapacityIsolation = localStorageCapacityIsolation return &TestKubelet{kubelet, fakeRuntime, fakeContainerManager, fakeKubeClient, fakeMirrorClient, fakeClock, nil, plug} } diff --git a/pkg/kubelet/nodestatus/setters.go b/pkg/kubelet/nodestatus/setters.go index 94ffc94172e9..a205809846f0 100644 --- a/pkg/kubelet/nodestatus/setters.go +++ b/pkg/kubelet/nodestatus/setters.go @@ -31,13 +31,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/errors" utilnet "k8s.io/apimachinery/pkg/util/net" - utilfeature "k8s.io/apiserver/pkg/util/feature" cloudprovider "k8s.io/cloud-provider" cloudproviderapi "k8s.io/cloud-provider/api" cloudprovidernodeutil "k8s.io/cloud-provider/node/helpers" "k8s.io/component-base/version" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" - "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/kubelet/cadvisor" "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainer "k8s.io/kubernetes/pkg/kubelet/container" @@ -244,10 +242,11 @@ func MachineInfo(nodeName string, maxPods int, podsPerCore int, machineInfoFunc func() (*cadvisorapiv1.MachineInfo, error), // typically Kubelet.GetCachedMachineInfo - capacityFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetCapacity + capacityFunc func(localStorageCapacityIsolation bool) v1.ResourceList, // typically Kubelet.containerManager.GetCapacity devicePluginResourceCapacityFunc func() (v1.ResourceList, v1.ResourceList, []string), // typically Kubelet.containerManager.GetDevicePluginResourceCapacity nodeAllocatableReservationFunc func() v1.ResourceList, // typically Kubelet.containerManager.GetNodeAllocatableReservation recordEventFunc func(eventType, event, message string), // typically Kubelet.recordEvent + localStorageCapacityIsolation bool, ) Setter { return func(node *v1.Node) error { // Note: avoid blindly overwriting the capacity in case opaque @@ -295,16 +294,15 @@ func MachineInfo(nodeName string, } node.Status.NodeInfo.BootID = info.BootID - if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - // TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the - // capacity for every node status request - initialCapacity := capacityFunc() - if initialCapacity != nil { - if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists { - node.Status.Capacity[v1.ResourceEphemeralStorage] = v - } + // TODO: all the node resources should use ContainerManager.GetCapacity instead of deriving the + // capacity for every node status request + initialCapacity := capacityFunc(localStorageCapacityIsolation) + if initialCapacity != nil { + if v, exists := initialCapacity[v1.ResourceEphemeralStorage]; exists { + node.Status.Capacity[v1.ResourceEphemeralStorage] = v } } + //} devicePluginCapacity, devicePluginAllocatable, removedDevicePlugins = devicePluginResourceCapacityFunc() for k, v := range devicePluginCapacity { @@ -469,6 +467,7 @@ func ReadyCondition( cmStatusFunc func() cm.Status, // typically Kubelet.containerManager.Status nodeShutdownManagerErrorsFunc func() error, // typically kubelet.shutdownManager.errors. recordEventFunc func(eventType, event string), // typically Kubelet.recordNodeStatusEvent + localStorageCapacityIsolation bool, ) Setter { return func(node *v1.Node) error { // NOTE(aaronlevy): NodeReady condition needs to be the last in the list of node conditions. @@ -484,7 +483,7 @@ func ReadyCondition( } errs := []error{runtimeErrorsFunc(), networkErrorsFunc(), storageErrorsFunc(), nodeShutdownManagerErrorsFunc()} requiredCapacities := []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods} - if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { + if localStorageCapacityIsolation { requiredCapacities = append(requiredCapacities, v1.ResourceEphemeralStorage) } missingCapacities := []string{} diff --git a/pkg/kubelet/nodestatus/setters_test.go b/pkg/kubelet/nodestatus/setters_test.go index 5696de62ddfe..4ada6c1aa462 100644 --- a/pkg/kubelet/nodestatus/setters_test.go +++ b/pkg/kubelet/nodestatus/setters_test.go @@ -585,17 +585,18 @@ func TestMachineInfo(t *testing.T) { } cases := []struct { - desc string - node *v1.Node - maxPods int - podsPerCore int - machineInfo *cadvisorapiv1.MachineInfo - machineInfoError error - capacity v1.ResourceList - devicePluginResourceCapacity dprc - nodeAllocatableReservation v1.ResourceList - expectNode *v1.Node - expectEvents []testEvent + desc string + node *v1.Node + maxPods int + podsPerCore int + machineInfo *cadvisorapiv1.MachineInfo + machineInfoError error + capacity v1.ResourceList + devicePluginResourceCapacity dprc + nodeAllocatableReservation v1.ResourceList + expectNode *v1.Node + expectEvents []testEvent + disableLocalStorageCapacityIsolation bool }{ { desc: "machine identifiers, basic capacity and allocatable", @@ -797,6 +798,35 @@ func TestMachineInfo(t *testing.T) { }, }, }, + { + desc: "ephemeral storage is not reflected in capacity and allocatable because localStorageCapacityIsolation is disabled", + node: &v1.Node{}, + maxPods: 110, + machineInfo: &cadvisorapiv1.MachineInfo{ + NumCores: 2, + MemoryCapacity: 1024, + }, + capacity: v1.ResourceList{ + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + }, + expectNode: &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + }, + Allocatable: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(1024, resource.BinarySI), + v1.ResourcePods: *resource.NewQuantity(110, resource.DecimalSI), + v1.ResourceEphemeralStorage: *resource.NewQuantity(5000, resource.BinarySI), + }, + }, + }, + disableLocalStorageCapacityIsolation: true, + }, { desc: "device plugin resources are reflected in capacity and allocatable", node: &v1.Node{}, @@ -962,7 +992,7 @@ func TestMachineInfo(t *testing.T) { machineInfoFunc := func() (*cadvisorapiv1.MachineInfo, error) { return tc.machineInfo, tc.machineInfoError } - capacityFunc := func() v1.ResourceList { + capacityFunc := func(localStorageCapacityIsolation bool) v1.ResourceList { return tc.capacity } devicePluginResourceCapacityFunc := func() (v1.ResourceList, v1.ResourceList, []string) { @@ -983,7 +1013,7 @@ func TestMachineInfo(t *testing.T) { } // construct setter setter := MachineInfo(nodeName, tc.maxPods, tc.podsPerCore, machineInfoFunc, capacityFunc, - devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc) + devicePluginResourceCapacityFunc, nodeAllocatableReservationFunc, recordEventFunc, tc.disableLocalStorageCapacityIsolation) // call setter on node if err := setter(tc.node); err != nil { t.Fatalf("unexpected error: %v", err) @@ -1180,17 +1210,28 @@ func TestReadyCondition(t *testing.T) { }, } + withoutStorageCapacity := &v1.Node{ + Status: v1.NodeStatus{ + Capacity: v1.ResourceList{ + v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI), + v1.ResourceMemory: *resource.NewQuantity(10e9, resource.BinarySI), + v1.ResourcePods: *resource.NewQuantity(100, resource.DecimalSI), + }, + }, + } + cases := []struct { - desc string - node *v1.Node - runtimeErrors error - networkErrors error - storageErrors error - appArmorValidateHostFunc func() error - cmStatus cm.Status - nodeShutdownManagerErrors error - expectConditions []v1.NodeCondition - expectEvents []testEvent + desc string + node *v1.Node + runtimeErrors error + networkErrors error + storageErrors error + appArmorValidateHostFunc func() error + cmStatus cm.Status + nodeShutdownManagerErrors error + expectConditions []v1.NodeCondition + expectEvents []testEvent + disableLocalStorageCapacityIsolation bool }{ { desc: "new, ready", @@ -1245,6 +1286,12 @@ func TestReadyCondition(t *testing.T) { node: &v1.Node{}, expectConditions: []v1.NodeCondition{*makeReadyCondition(false, "missing node capacity for resources: cpu, memory, pods, ephemeral-storage", now, now)}, }, + { + desc: "new, ready: localStorageCapacityIsolation is not supported", + node: withoutStorageCapacity.DeepCopy(), + disableLocalStorageCapacityIsolation: true, + expectConditions: []v1.NodeCondition{*makeReadyCondition(true, "kubelet is posting ready status", now, now)}, + }, // the transition tests ensure timestamps are set correctly, no need to test the entire condition matrix in this section { desc: "transition to ready", @@ -1324,7 +1371,7 @@ func TestReadyCondition(t *testing.T) { }) } // construct setter - setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc) + setter := ReadyCondition(nowFunc, runtimeErrorsFunc, networkErrorsFunc, storageErrorsFunc, tc.appArmorValidateHostFunc, cmStatusFunc, nodeShutdownErrorsFunc, recordEventFunc, !tc.disableLocalStorageCapacityIsolation) // call setter on node if err := setter(tc.node); err != nil { t.Fatalf("unexpected error: %v", err) diff --git a/pkg/kubelet/runonce_test.go b/pkg/kubelet/runonce_test.go index 7a7f5594f24a..c16f11b0f40e 100644 --- a/pkg/kubelet/runonce_test.go +++ b/pkg/kubelet/runonce_test.go @@ -130,7 +130,7 @@ func TestRunOnce(t *testing.T) { return nil } fakeMirrodPodFunc := func(*v1.Pod) (*v1.Pod, bool) { return nil, false } - evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock) + evictionManager, evictionAdmitHandler := eviction.NewManager(kb.resourceAnalyzer, eviction.Config{}, fakeKillPodFunc, fakeMirrodPodFunc, nil, nil, kb.recorder, nodeRef, kb.clock, kb.supportLocalStorageCapacityIsolation()) kb.evictionManager = evictionManager kb.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) diff --git a/pkg/kubemark/hollow_kubelet.go b/pkg/kubemark/hollow_kubelet.go index 5d28884ad5d8..dea85c04da9a 100644 --- a/pkg/kubemark/hollow_kubelet.go +++ b/pkg/kubemark/hollow_kubelet.go @@ -206,6 +206,7 @@ func GetHollowKubeletConfig(opt *HollowKubletOptions) (*options.KubeletFlags, *k c.ProtectKernelDefaults = false c.RegisterWithTaints = opt.RegisterWithTaints c.RegisterNode = true + c.LocalStorageCapacityIsolation = true return f, c } diff --git a/pkg/scheduler/framework/plugins/noderesources/fit_test.go b/pkg/scheduler/framework/plugins/noderesources/fit_test.go index e2d6f15fdc37..f76de155105c 100644 --- a/pkg/scheduler/framework/plugins/noderesources/fit_test.go +++ b/pkg/scheduler/framework/plugins/noderesources/fit_test.go @@ -25,9 +25,6 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/apiserver/pkg/util/feature" - "k8s.io/component-base/featuregate" - featuregatetesting "k8s.io/component-base/featuregate/testing" "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" plfeature "k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature" @@ -575,7 +572,6 @@ func TestStorageRequests(t *testing.T) { pod *v1.Pod nodeInfo *framework.NodeInfo name string - features map[featuregate.Feature]bool wantStatus *framework.Status }{ { @@ -599,13 +595,10 @@ func TestStorageRequests(t *testing.T) { wantStatus: framework.NewStatus(framework.Unschedulable, getErrReason(v1.ResourceEphemeralStorage)), }, { - pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 25}), framework.Resource{EphemeralStorage: 25}), + pod: newResourceInitPod(newResourcePod(framework.Resource{EphemeralStorage: 5})), nodeInfo: framework.NewNodeInfo( - newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2})), - name: "ephemeral local storage request is ignored due to disabled feature gate", - features: map[featuregate.Feature]bool{ - "LocalStorageCapacityIsolation": false, - }, + newResourcePod(framework.Resource{MilliCPU: 2, Memory: 2, EphemeralStorage: 10})), + name: "ephemeral local storage is sufficient", }, { pod: newResourcePod(framework.Resource{EphemeralStorage: 10}), @@ -617,9 +610,6 @@ func TestStorageRequests(t *testing.T) { for _, test := range storagePodsTests { t.Run(test.name, func(t *testing.T) { - for k, v := range test.features { - defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, k, v)() - } node := v1.Node{Status: v1.NodeStatus{Capacity: makeResources(10, 20, 32, 5, 20, 5).Capacity, Allocatable: makeAllocatableResources(10, 20, 32, 5, 20, 5)}} test.nodeInfo.SetNode(&node) diff --git a/pkg/scheduler/framework/types.go b/pkg/scheduler/framework/types.go index 93970327d5aa..247c124a7a55 100644 --- a/pkg/scheduler/framework/types.go +++ b/pkg/scheduler/framework/types.go @@ -29,9 +29,7 @@ import ( "k8s.io/apimachinery/pkg/labels" utilerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" - utilfeature "k8s.io/apiserver/pkg/util/feature" "k8s.io/klog/v2" - "k8s.io/kubernetes/pkg/features" schedutil "k8s.io/kubernetes/pkg/scheduler/util" ) @@ -444,10 +442,7 @@ func (r *Resource) Add(rl v1.ResourceList) { case v1.ResourcePods: r.AllowedPodNumber += int(rQuant.Value()) case v1.ResourceEphemeralStorage: - if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - // if the local storage capacity isolation feature gate is disabled, pods request 0 disk. - r.EphemeralStorage += rQuant.Value() - } + r.EphemeralStorage += rQuant.Value() default: if schedutil.IsScalarResourceName(rName) { r.AddScalar(rName, rQuant.Value()) @@ -500,9 +495,7 @@ func (r *Resource) SetMaxResource(rl v1.ResourceList) { case v1.ResourceCPU: r.MilliCPU = max(r.MilliCPU, rQuantity.MilliValue()) case v1.ResourceEphemeralStorage: - if utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value()) - } + r.EphemeralStorage = max(r.EphemeralStorage, rQuantity.Value()) default: if schedutil.IsScalarResourceName(rName) { r.SetScalar(rName, max(r.ScalarResources[rName], rQuantity.Value())) diff --git a/pkg/scheduler/util/pod_resources.go b/pkg/scheduler/util/pod_resources.go index b5947349cd45..bf665c68cc6b 100644 --- a/pkg/scheduler/util/pod_resources.go +++ b/pkg/scheduler/util/pod_resources.go @@ -18,8 +18,6 @@ package util import ( v1 "k8s.io/api/core/v1" - utilfeature "k8s.io/apiserver/pkg/util/feature" - "k8s.io/kubernetes/pkg/features" ) // For each of these resources, a pod that doesn't request the resource explicitly @@ -65,11 +63,6 @@ func GetRequestForResource(resource v1.ResourceName, requests *v1.ResourceList, } return requests.Memory().Value() case v1.ResourceEphemeralStorage: - // if the local storage capacity isolation feature gate is disabled, pods request 0 disk. - if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) { - return 0 - } - quantity, found := (*requests)[v1.ResourceEphemeralStorage] if !found { return 0 diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index 36cbc6602681..aa67206ee401 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -786,6 +786,17 @@ type KubeletConfiguration struct { // +featureGate=KubeletTracing // +optional Tracing *tracingapi.TracingConfiguration `json:"tracing,omitempty"` + + // LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true. + // This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way + // as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk + // usage from the volume exceeds the limit. + // This feature depends on the capability of detecting correct root file system disk usage. For certain systems, + // such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be + // disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir. + // Default: true + // +optional + LocalStorageCapacityIsolation *bool `json:"localStorageCapacityIsolation,omitempty"` } type KubeletAuthorizationMode string diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go index 157578545576..63ae4e749c15 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/zz_generated.deepcopy.go @@ -452,6 +452,11 @@ func (in *KubeletConfiguration) DeepCopyInto(out *KubeletConfiguration) { *out = new(apiv1.TracingConfiguration) (*in).DeepCopyInto(*out) } + if in.LocalStorageCapacityIsolation != nil { + in, out := &in.LocalStorageCapacityIsolation, &out.LocalStorageCapacityIsolation + *out = new(bool) + **out = **in + } return }