From 893f5fd4f007775d48536cae192d79f209eeeac2 Mon Sep 17 00:00:00 2001 From: David Porter Date: Tue, 2 Mar 2021 15:41:50 -0800 Subject: [PATCH] Promote kubelet graceful node shutdown to beta - Change the feature gate from alpha to beta and enable it by default - Update a few of the unit tests due to feature gate being enabled by default - Small refactor in `nodeshutdown_manager` which adds `featureEnabled` function (which checks that feature gate and that `kubeletConfig.ShutdownGracePeriod > 0`). - Use `featureEnabled()` to exit early from shutdown manager in the case that the feature is disabled - Update kubelet config defaulting to be explicit that `ShutdownGracePeriod` and `ShutdownGracePeriodCriticalPods` default to zero and update the godoc comments. - Update defaults and add featureGate tag in api config godoc. With this feature now in beta and the feature gate enabled by default, to enable graceful shutdown all that will be required is to configure `ShutdownGracePeriod` and `ShutdownGracePeriodCriticalPods` in the kubelet config. If not configured, they will be defaulted to zero, and graceful shutdown will effectively be disabled. --- pkg/features/kube_features.go | 3 +- pkg/kubelet/apis/config/types.go | 8 +++- .../apis/config/validation/validation_test.go | 3 +- pkg/kubelet/kubelet_test.go | 7 +++ .../nodeshutdown_manager_linux.go | 12 ++--- .../nodeshutdown_manager_linux_test.go | 44 +++++++++++++++++++ .../k8s.io/kubelet/config/v1beta1/types.go | 6 ++- 7 files changed, 71 insertions(+), 12 deletions(-) diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index 8fb22fe5c6de..14f856d60547 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -650,6 +650,7 @@ const ( // owner: @bobbypage // alpha: v1.20 + // beta: v1.21 // Adds support for kubelet to detect node shutdown and gracefully terminate pods prior to the node being shutdown. GracefulNodeShutdown featuregate.Feature = "GracefulNodeShutdown" @@ -765,7 +766,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS SizeMemoryBackedVolumes: {Default: false, PreRelease: featuregate.Alpha}, ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default in v1.21 and remove in v1.22 KubeletCredentialProviders: {Default: false, PreRelease: featuregate.Alpha}, - GracefulNodeShutdown: {Default: false, PreRelease: featuregate.Alpha}, + GracefulNodeShutdown: {Default: true, PreRelease: featuregate.Beta}, ServiceLBNodePortControl: {Default: false, PreRelease: featuregate.Alpha}, MixedProtocolLBService: {Default: false, PreRelease: featuregate.Alpha}, PreferNominatedNode: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/pkg/kubelet/apis/config/types.go b/pkg/kubelet/apis/config/types.go index 749a6df2a651..fcc86830b5a3 100644 --- a/pkg/kubelet/apis/config/types.go +++ b/pkg/kubelet/apis/config/types.go @@ -379,11 +379,15 @@ type KubeletConfiguration struct { // EnableSystemLogHandler enables /logs handler. EnableSystemLogHandler bool // ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown. - // Defaults to 30 seconds, requires GracefulNodeShutdown feature gate to be enabled. + // Defaults to 0 seconds. + // +featureGate=GracefulNodeShutdown + // +optional ShutdownGracePeriod metav1.Duration // ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod. - // Defaults to 10 seconds, requires GracefulNodeShutdown feature gate to be enabled. + // Defaults to 0 seconds. // For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods. + // +featureGate=GracefulNodeShutdown + // +optional ShutdownGracePeriodCriticalPods metav1.Duration // ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes. // The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads. diff --git a/pkg/kubelet/apis/config/validation/validation_test.go b/pkg/kubelet/apis/config/validation/validation_test.go index 9a00bda6e32f..641b4154b359 100644 --- a/pkg/kubelet/apis/config/validation/validation_test.go +++ b/pkg/kubelet/apis/config/validation/validation_test.go @@ -100,7 +100,6 @@ func TestValidateKubeletConfiguration(t *testing.T) { ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0}, FeatureGates: map[string]bool{ "CustomCPUCFSQuotaPeriod": true, - "GracefulNodeShutdown": true, }, } if allErrors := ValidateKubeletConfiguration(successCase2); allErrors != nil { @@ -133,7 +132,7 @@ func TestValidateKubeletConfiguration(t *testing.T) { NodeLeaseDurationSeconds: -1, CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond}, ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second}, - ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second}, + ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 60 * time.Second}, } const numErrsErrorCase1 = 28 if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 { diff --git a/pkg/kubelet/kubelet_test.go b/pkg/kubelet/kubelet_test.go index aaf74b400d0a..7673ffc90025 100644 --- a/pkg/kubelet/kubelet_test.go +++ b/pkg/kubelet/kubelet_test.go @@ -53,6 +53,7 @@ import ( "k8s.io/kubernetes/pkg/kubelet/lifecycle" "k8s.io/kubernetes/pkg/kubelet/logs" "k8s.io/kubernetes/pkg/kubelet/network/dns" + "k8s.io/kubernetes/pkg/kubelet/nodeshutdown" "k8s.io/kubernetes/pkg/kubelet/pleg" "k8s.io/kubernetes/pkg/kubelet/pluginmanager" kubepod "k8s.io/kubernetes/pkg/kubelet/pod" @@ -305,6 +306,12 @@ func newTestKubeletWithImageList( kubelet.evictionManager = evictionManager kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler) + + // setup shutdown manager + shutdownManager, shutdownAdmitHandler := nodeshutdown.NewManager(kubelet.podManager.GetPods, killPodNow(kubelet.podWorkers, fakeRecorder), func() {}, 0 /* shutdownGracePeriodRequested*/, 0 /*shutdownGracePeriodCriticalPods */) + kubelet.shutdownManager = shutdownManager + kubelet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler) + // Add this as cleanup predicate pod admitter kubelet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay, lifecycle.NewAdmissionFailureHandlerStub(), kubelet.containerManager.UpdatePluginResources)) diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go index 874449e5d254..e694db19ac0b 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go @@ -107,10 +107,7 @@ func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR // Start starts the node shutdown manager and will start watching the node for shutdown events. func (m *Manager) Start() error { - if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) { - return nil - } - if m.shutdownGracePeriodRequested == 0 { + if !m.isFeatureEnabled() { return nil } @@ -202,9 +199,14 @@ func (m *Manager) aquireInhibitLock() error { return nil } +// Returns if the feature is enabled +func (m *Manager) isFeatureEnabled() bool { + return utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) && m.shutdownGracePeriodRequested > 0 +} + // ShutdownStatus will return an error if the node is currently shutting down. func (m *Manager) ShutdownStatus() error { - if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) { + if !m.isFeatureEnabled() { return nil } diff --git a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go index 4bc036e1bd7a..cc611f1fc54f 100644 --- a/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go +++ b/pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go @@ -261,3 +261,47 @@ func TestManager(t *testing.T) { }) } } + +func TestFeatureEnabled(t *testing.T) { + var tests = []struct { + desc string + shutdownGracePeriodRequested time.Duration + featureGateEnabled bool + expectEnabled bool + }{ + { + desc: "shutdownGracePeriodRequested 0; disables feature", + shutdownGracePeriodRequested: time.Duration(0 * time.Second), + featureGateEnabled: true, + expectEnabled: false, + }, + { + desc: "feature gate disabled; disables feature", + shutdownGracePeriodRequested: time.Duration(100 * time.Second), + featureGateEnabled: false, + expectEnabled: false, + }, + { + desc: "feature gate enabled; shutdownGracePeriodRequested > 0; enables feature", + shutdownGracePeriodRequested: time.Duration(100 * time.Second), + featureGateEnabled: true, + expectEnabled: true, + }, + } + for _, tc := range tests { + t.Run(tc.desc, func(t *testing.T) { + activePodsFunc := func() []*v1.Pod { + return nil + } + killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error { + return nil + } + defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)() + + manager, _ := NewManager(activePodsFunc, killPodsFunc, func() {}, tc.shutdownGracePeriodRequested, 0 /*shutdownGracePeriodCriticalPods*/) + manager.clock = clock.NewFakeClock(time.Now()) + + assert.Equal(t, tc.expectEnabled, manager.isFeatureEnabled()) + }) + } +} diff --git a/staging/src/k8s.io/kubelet/config/v1beta1/types.go b/staging/src/k8s.io/kubelet/config/v1beta1/types.go index bc7409e9e11a..5977d855f22c 100644 --- a/staging/src/k8s.io/kubelet/config/v1beta1/types.go +++ b/staging/src/k8s.io/kubelet/config/v1beta1/types.go @@ -830,12 +830,14 @@ type KubeletConfiguration struct { // +optional EnableSystemLogHandler *bool `json:"enableSystemLogHandler,omitempty"` // ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown. - // Default: "30s" + // Default: "0s" + // +featureGate=GracefulNodeShutdown // +optional ShutdownGracePeriod metav1.Duration `json:"shutdownGracePeriod,omitempty"` // ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod. // For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods. - // Default: "10s" + // Default: "0s" + // +featureGate=GracefulNodeShutdown // +optional ShutdownGracePeriodCriticalPods metav1.Duration `json:"shutdownGracePeriodCriticalPods,omitempty"` // ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.