Skip to content

Commit

Permalink
Promote Local storage capacity isolation feature to GA
Browse files Browse the repository at this point in the history
This change is to promote local storage capacity isolation feature to GA

At the same time, to allow rootless system disable this feature due to
unable to get root fs, this change introduced a new kubelet config
"localStorageCapacityIsolation". By default it is set to true. For
rootless systems, they can set this configuration to false to disable
the feature. Once it is set, user cannot set ephemeral-storage
request/limit because capacity and allocatable will not be set.

Change-Id: I48a52e737c6a09e9131454db6ad31247b56c000a
  • Loading branch information
jingxu97 committed Aug 3, 2022
1 parent bc4c493 commit 0064010
Show file tree
Hide file tree
Showing 42 changed files with 267 additions and 383 deletions.
1 change: 1 addition & 0 deletions cmd/kubelet/app/options/options.go
Expand Up @@ -492,6 +492,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig

fs.Int32Var(&c.NodeStatusMaxImages, "node-status-max-images", c.NodeStatusMaxImages, "The maximum number of images to report in Node.Status.Images. If -1 is specified, no cap will be applied.")
fs.BoolVar(&c.KernelMemcgNotification, "kernel-memcg-notification", c.KernelMemcgNotification, "If enabled, the kubelet will integrate with the kernel memcg notification to determine if memory eviction thresholds are crossed rather than polling.")
fs.BoolVar(&c.LocalStorageCapacityIsolation, "local-storage-capacity-isolation", c.LocalStorageCapacityIsolation, "If true, local ephemeral storage isolation is enabled. Otherwise, local storage isolation feature will be disabled")

// Flags intended for testing, not recommended used in production environments.
fs.Int64Var(&c.MaxOpenFiles, "max-open-files", c.MaxOpenFiles, "Number of files that can be opened by Kubelet process.")
Expand Down
2 changes: 1 addition & 1 deletion cmd/kubelet/app/server.go
Expand Up @@ -646,7 +646,7 @@ func run(ctx context.Context, s *options.KubeletServer, kubeDeps *kubelet.Depend

if kubeDeps.CAdvisorInterface == nil {
imageFsInfoProvider := cadvisor.NewImageFsInfoProvider(s.RemoteRuntimeEndpoint)
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint))
kubeDeps.CAdvisorInterface, err = cadvisor.New(imageFsInfoProvider, s.RootDirectory, cgroupRoots, cadvisor.UsingLegacyCadvisorStats(s.RemoteRuntimeEndpoint), s.LocalStorageCapacityIsolation)
if err != nil {
return err
}
Expand Down
2 changes: 2 additions & 0 deletions hack/local-up-cluster.sh
Expand Up @@ -237,6 +237,7 @@ CPU_CFS_QUOTA=${CPU_CFS_QUOTA:-true}
ENABLE_HOSTPATH_PROVISIONER=${ENABLE_HOSTPATH_PROVISIONER:-"false"}
CLAIM_BINDER_SYNC_PERIOD=${CLAIM_BINDER_SYNC_PERIOD:-"15s"} # current k8s default
ENABLE_CONTROLLER_ATTACH_DETACH=${ENABLE_CONTROLLER_ATTACH_DETACH:-"true"} # current default
LOCAL_STORAGE_CAPACITY_ISOLATION=${LOCAL_STORAGE_CAPACITY_ISOLATION:-"true"} # current default
# This is the default dir and filename where the apiserver will generate a self-signed cert
# which should be able to be used as the CA to verify itself
CERT_DIR=${CERT_DIR:-"/var/run/kubernetes"}
Expand Down Expand Up @@ -754,6 +755,7 @@ cgroupRoot: "${CGROUP_ROOT}"
cgroupsPerQOS: ${CGROUPS_PER_QOS}
cpuCFSQuota: ${CPU_CFS_QUOTA}
enableControllerAttachDetach: ${ENABLE_CONTROLLER_ATTACH_DETACH}
localStorageCapacityIsolation: ${LOCAL_STORAGE_CAPACITY_ISOLATION}
evictionPressureTransitionPeriod: "${EVICTION_PRESSURE_TRANSITION_PERIOD}"
failSwapOn: ${FAIL_SWAP_ON}
port: ${KUBELET_PORT}
Expand Down
23 changes: 0 additions & 23 deletions pkg/api/pod/util.go
Expand Up @@ -517,14 +517,6 @@ func dropDisabledFields(
}
}

if !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) && !emptyDirSizeLimitInUse(oldPodSpec) {
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
podSpec.Volumes[i].EmptyDir.SizeLimit = nil
}
}
}

if !utilfeature.DefaultFeatureGate.Enabled(features.ProbeTerminationGracePeriod) && !probeGracePeriodInUse(oldPodSpec) {
// Set pod-level terminationGracePeriodSeconds to nil if the feature is disabled and it is not used
VisitContainers(podSpec, AllContainers, func(c *api.Container, containerType ContainerType) bool {
Expand Down Expand Up @@ -703,21 +695,6 @@ func appArmorInUse(podAnnotations map[string]string) bool {
return false
}

// emptyDirSizeLimitInUse returns true if any pod's EmptyDir volumes use SizeLimit.
func emptyDirSizeLimitInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
for i := range podSpec.Volumes {
if podSpec.Volumes[i].EmptyDir != nil {
if podSpec.Volumes[i].EmptyDir.SizeLimit != nil {
return true
}
}
}
return false
}

// probeGracePeriodInUse returns true if the pod spec is non-nil and has a probe that makes use
// of the probe-level terminationGracePeriodSeconds feature
func probeGracePeriodInUse(podSpec *api.PodSpec) bool {
Expand Down
111 changes: 0 additions & 111 deletions pkg/api/pod/util_test.go
Expand Up @@ -25,7 +25,6 @@ import (
"github.com/google/go-cmp/cmp"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
Expand Down Expand Up @@ -703,116 +702,6 @@ func TestDropProcMount(t *testing.T) {
}
}

func TestDropEmptyDirSizeLimit(t *testing.T) {
sizeLimit := resource.MustParse("1Gi")
podWithEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
SizeLimit: &sizeLimit,
},
},
},
},
},
}
}
podWithoutEmptyDirSizeLimit := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
RestartPolicy: api.RestartPolicyNever,
Volumes: []api.Volume{
{
Name: "a",
VolumeSource: api.VolumeSource{
EmptyDir: &api.EmptyDirVolumeSource{
Medium: "memory",
},
},
},
},
},
}
}

podInfo := []struct {
description string
hasEmptyDirSizeLimit bool
pod func() *api.Pod
}{
{
description: "has EmptyDir Size Limit",
hasEmptyDirSizeLimit: true,
pod: podWithEmptyDirSizeLimit,
},
{
description: "does not have EmptyDir Size Limit",
hasEmptyDirSizeLimit: false,
pod: podWithoutEmptyDirSizeLimit,
},
{
description: "is nil",
hasEmptyDirSizeLimit: false,
pod: func() *api.Pod { return nil },
},
}

for _, enabled := range []bool{true, false} {
for _, oldPodInfo := range podInfo {
for _, newPodInfo := range podInfo {
oldPodHasEmptyDirSizeLimit, oldPod := oldPodInfo.hasEmptyDirSizeLimit, oldPodInfo.pod()
newPodHasEmptyDirSizeLimit, newPod := newPodInfo.hasEmptyDirSizeLimit, newPodInfo.pod()
if newPod == nil {
continue
}

t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.LocalStorageCapacityIsolation, enabled)()

var oldPodSpec *api.PodSpec
if oldPod != nil {
oldPodSpec = &oldPod.Spec
}
dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil)

// old pod should never be changed
if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) {
t.Errorf("old pod changed: %v", cmp.Diff(oldPod, oldPodInfo.pod()))
}

switch {
case enabled || oldPodHasEmptyDirSizeLimit:
// new pod should not be changed if the feature is enabled, or if the old pod had EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
case newPodHasEmptyDirSizeLimit:
// new pod should be changed
if reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod was not changed")
}
// new pod should not have EmptyDir SizeLimit
if !reflect.DeepEqual(newPod, podWithoutEmptyDirSizeLimit()) {
t.Errorf("new pod had EmptyDir SizeLimit: %v", cmp.Diff(newPod, podWithoutEmptyDirSizeLimit()))
}
default:
// new pod should not need to be changed
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", cmp.Diff(newPod, newPodInfo.pod()))
}
}
})
}
}
}
}

func TestDropAppArmor(t *testing.T) {
podWithAppArmor := func() *api.Pod {
return &api.Pod{
Expand Down
7 changes: 0 additions & 7 deletions pkg/api/v1/resource/helpers.go
Expand Up @@ -24,8 +24,6 @@ import (

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/kubernetes/pkg/features"
)

// PodRequestsAndLimits returns a dictionary of all defined resources summed up for all
Expand Down Expand Up @@ -131,11 +129,6 @@ func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resou
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
}

if resourceName == v1.ResourceEphemeralStorage && !utilfeature.DefaultFeatureGate.Enabled(features.LocalStorageCapacityIsolation) {
// if the local storage capacity isolation feature gate is disabled, pods request 0 disk
return requestQuantity
}

for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity)
Expand Down
5 changes: 3 additions & 2 deletions pkg/features/kube_features.go
Expand Up @@ -515,8 +515,9 @@ const (

// owner: @jinxu
// beta: v1.10
// stable: v1.25
//
// New local storage types to support local storage capacity isolation
// Support local ephemeral storage types for local storage capacity isolation feature.
LocalStorageCapacityIsolation featuregate.Feature = "LocalStorageCapacityIsolation"

// owner: @RobertKrawitz
Expand Down Expand Up @@ -988,7 +989,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS

LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},

LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.27

LocalStorageCapacityIsolationFSQuotaMonitoring: {Default: true, PreRelease: featuregate.Beta},

Expand Down
7 changes: 7 additions & 0 deletions pkg/generated/openapi/zz_generated.openapi.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pkg/kubelet/apis/config/fuzzer/fuzzer.go
Expand Up @@ -20,7 +20,7 @@ import (
"math/rand"
"time"

"github.com/google/gofuzz"
fuzz "github.com/google/gofuzz"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtimeserializer "k8s.io/apimachinery/pkg/runtime/serializer"
Expand Down Expand Up @@ -110,6 +110,7 @@ func Funcs(codecs runtimeserializer.CodecFactory) []interface{} {
}
obj.EnableSystemLogHandler = true
obj.MemoryThrottlingFactor = utilpointer.Float64Ptr(rand.Float64())
obj.LocalStorageCapacityIsolation = true
},
}
}
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Expand Up @@ -282,5 +282,6 @@ var (
"MemoryThrottlingFactor",
"Tracing.Endpoint",
"Tracing.SamplingRatePerMillion",
"LocalStorageCapacityIsolation",
)
)
Expand Up @@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration
kubeAPIBurst: 10
kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging:
flushFrequency: 5000000000
format: text
Expand Down
Expand Up @@ -52,6 +52,7 @@ iptablesMasqueradeBit: 14
kind: KubeletConfiguration
kubeAPIBurst: 10
kubeAPIQPS: 5
localStorageCapacityIsolation: true
logging:
flushFrequency: 5000000000
format: text
Expand Down
10 changes: 10 additions & 0 deletions pkg/kubelet/apis/config/types.go
Expand Up @@ -450,6 +450,16 @@ type KubeletConfiguration struct {
// +featureGate=KubeletTracing
// +optional
Tracing *tracingapi.TracingConfiguration

// LocalStorageCapacityIsolation enables local ephemeral storage isolation feature. The default setting is true.
// This feature allows users to set request/limit for container's ephemeral storage and manage it in a similar way
// as cpu and memory. It also allows setting sizeLimit for emptyDir volume, which will trigger pod eviction if disk
// usage from the volume exceeds the limit.
// This feature depends on the capability of detecting correct root file system disk usage. For certain systems,
// such as kind rootless, if this capability cannot be supported, the feature LocalStorageCapacityIsolation should be
// disabled. Once disabled, user should not set request/limit for container's ephemeral storage, or sizeLimit for emptyDir.
// +optional
LocalStorageCapacityIsolation bool
}

// KubeletAuthorizationMode denotes the authorization mode for the kubelet
Expand Down
3 changes: 3 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/defaults.go
Expand Up @@ -264,4 +264,7 @@ func SetDefaults_KubeletConfiguration(obj *kubeletconfigv1beta1.KubeletConfigura
if obj.RegisterNode == nil {
obj.RegisterNode = utilpointer.BoolPtr(true)
}
if obj.LocalStorageCapacityIsolation == nil {
obj.LocalStorageCapacityIsolation = utilpointer.BoolPtr(true)
}
}

0 comments on commit 0064010

Please sign in to comment.