Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Promote kubelet graceful node shutdown to beta #99735

Merged
merged 1 commit into from Mar 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion pkg/features/kube_features.go
Expand Up @@ -650,6 +650,7 @@ const (

// owner: @bobbypage
// alpha: v1.20
// beta: v1.21
// Adds support for kubelet to detect node shutdown and gracefully terminate pods prior to the node being shutdown.
GracefulNodeShutdown featuregate.Feature = "GracefulNodeShutdown"

Expand Down Expand Up @@ -765,7 +766,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
SizeMemoryBackedVolumes: {Default: false, PreRelease: featuregate.Alpha},
ExecProbeTimeout: {Default: true, PreRelease: featuregate.GA}, // lock to default in v1.21 and remove in v1.22
KubeletCredentialProviders: {Default: false, PreRelease: featuregate.Alpha},
GracefulNodeShutdown: {Default: false, PreRelease: featuregate.Alpha},
GracefulNodeShutdown: {Default: true, PreRelease: featuregate.Beta},
ServiceLBNodePortControl: {Default: false, PreRelease: featuregate.Alpha},
MixedProtocolLBService: {Default: false, PreRelease: featuregate.Alpha},
PreferNominatedNode: {Default: false, PreRelease: featuregate.Alpha},
Expand Down
8 changes: 6 additions & 2 deletions pkg/kubelet/apis/config/types.go
Expand Up @@ -379,11 +379,15 @@ type KubeletConfiguration struct {
// EnableSystemLogHandler enables /logs handler.
EnableSystemLogHandler bool
// ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown.
// Defaults to 30 seconds, requires GracefulNodeShutdown feature gate to be enabled.
// Defaults to 0 seconds.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're adding a new +featureGate=GracefulNodeShutdown tag - if you put it here, later tooling will use it automatically.

Copy link
Member Author

@bobbypage bobbypage Mar 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks, I can add that.

Should the featureGate=GracefulNodeShutdown tag be added here or in https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go (where I see there's already existing tags like +optional)?

// +featureGate=GracefulNodeShutdown
// +optional
ShutdownGracePeriod metav1.Duration
// ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod.
// Defaults to 10 seconds, requires GracefulNodeShutdown feature gate to be enabled.
// Defaults to 0 seconds.
// For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods.
// +featureGate=GracefulNodeShutdown
// +optional
ShutdownGracePeriodCriticalPods metav1.Duration
// ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
// The parameter makes sense only in the context of the memory manager feature. The memory manager will not allocate reserved memory for container workloads.
Expand Down
3 changes: 1 addition & 2 deletions pkg/kubelet/apis/config/validation/validation_test.go
Expand Up @@ -100,7 +100,6 @@ func TestValidateKubeletConfiguration(t *testing.T) {
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
},
}
if allErrors := ValidateKubeletConfiguration(successCase2); allErrors != nil {
Expand Down Expand Up @@ -133,7 +132,7 @@ func TestValidateKubeletConfiguration(t *testing.T) {
NodeLeaseDurationSeconds: -1,
CPUCFSQuotaPeriod: metav1.Duration{Duration: 100 * time.Millisecond},
ShutdownGracePeriod: metav1.Duration{Duration: 30 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 60 * time.Second},
}
const numErrsErrorCase1 = 28
if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 {
Expand Down
7 changes: 7 additions & 0 deletions pkg/kubelet/kubelet_test.go
Expand Up @@ -53,6 +53,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/lifecycle"
"k8s.io/kubernetes/pkg/kubelet/logs"
"k8s.io/kubernetes/pkg/kubelet/network/dns"
"k8s.io/kubernetes/pkg/kubelet/nodeshutdown"
"k8s.io/kubernetes/pkg/kubelet/pleg"
"k8s.io/kubernetes/pkg/kubelet/pluginmanager"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
Expand Down Expand Up @@ -305,6 +306,12 @@ func newTestKubeletWithImageList(

kubelet.evictionManager = evictionManager
kubelet.admitHandlers.AddPodAdmitHandler(evictionAdmitHandler)

// setup shutdown manager
shutdownManager, shutdownAdmitHandler := nodeshutdown.NewManager(kubelet.podManager.GetPods, killPodNow(kubelet.podWorkers, fakeRecorder), func() {}, 0 /* shutdownGracePeriodRequested*/, 0 /*shutdownGracePeriodCriticalPods */)
kubelet.shutdownManager = shutdownManager
kubelet.admitHandlers.AddPodAdmitHandler(shutdownAdmitHandler)

// Add this as cleanup predicate pod admitter
kubelet.admitHandlers.AddPodAdmitHandler(lifecycle.NewPredicateAdmitHandler(kubelet.getNodeAnyWay, lifecycle.NewAdmissionFailureHandlerStub(), kubelet.containerManager.UpdatePluginResources))

Expand Down
12 changes: 7 additions & 5 deletions pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux.go
Expand Up @@ -107,10 +107,7 @@ func (m *Manager) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitR

// Start starts the node shutdown manager and will start watching the node for shutdown events.
func (m *Manager) Start() error {
if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) {
return nil
}
if m.shutdownGracePeriodRequested == 0 {
if !m.isFeatureEnabled() {
return nil
}

Expand Down Expand Up @@ -202,9 +199,14 @@ func (m *Manager) aquireInhibitLock() error {
return nil
}

// Returns if the feature is enabled
func (m *Manager) isFeatureEnabled() bool {
return utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) && m.shutdownGracePeriodRequested > 0
Copy link
Member

@wzshiming wzshiming Mar 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe have to judge m.shutdownGracePeriodCriticalPods > 0 && m.shutdownGracePeriodCriticalPods < m.shutdownGracePeriodRequested

Copy link
Member Author

@bobbypage bobbypage Mar 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should already be covered in the kubelet config validation I think where an error is returned if ShutdownGracePeriodCriticalPods > ShutdownGracePeriod

if localFeatureGate.Enabled(features.GracefulNodeShutdown) {
if kc.ShutdownGracePeriod.Duration < 0 || kc.ShutdownGracePeriodCriticalPods.Duration < 0 || kc.ShutdownGracePeriodCriticalPods.Duration > kc.ShutdownGracePeriod.Duration {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriod %v must be >= 0, ShutdownGracePeriodCriticalPods %v must be >= 0, and ShutdownGracePeriodCriticalPods %v must be <= ShutdownGracePeriod %v", kc.ShutdownGracePeriod, kc.ShutdownGracePeriodCriticalPods, kc.ShutdownGracePeriodCriticalPods, kc.ShutdownGracePeriod))
}
if kc.ShutdownGracePeriod.Duration > 0 && kc.ShutdownGracePeriod.Duration < time.Duration(time.Second) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriod %v must be either zero or otherwise >= 1 sec", kc.ShutdownGracePeriod))
}
if kc.ShutdownGracePeriodCriticalPods.Duration > 0 && kc.ShutdownGracePeriodCriticalPods.Duration < time.Duration(time.Second) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: ShutdownGracePeriodCriticalPods %v must be either zero or otherwise >= 1 sec", kc.ShutdownGracePeriodCriticalPods))
}
}
if (kc.ShutdownGracePeriod.Duration > 0 || kc.ShutdownGracePeriodCriticalPods.Duration > 0) && !localFeatureGate.Enabled(features.GracefulNodeShutdown) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: Specifying ShutdownGracePeriod or ShutdownGracePeriodCriticalPods requires feature gate GracefulNodeShutdown"))
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks good, it is best to add a verification of kc.ShutdownGracePeriodCriticalPods.Duration >= kc.ShutdownGracePeriodRequested.Duration

Copy link
Member Author

@bobbypage bobbypage Mar 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might not fully understand your comment, we already have a check that if kc.ShutdownGracePeriodCriticalPods.Duration > kc.ShutdownGracePeriod.Duration an error is returned in the validation.

As a result, I'm not clear why it would make sense to add inside isFeatureEnabled since if someone specifies CriticalPods.Duration > ShutdownGracePeriod kubelet config validation will fail, so kubelet won't startup (and thus this code won't even run).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. looks great!

}

// ShutdownStatus will return an error if the node is currently shutting down.
func (m *Manager) ShutdownStatus() error {
if !utilfeature.DefaultFeatureGate.Enabled(features.GracefulNodeShutdown) {
if !m.isFeatureEnabled() {
return nil
}

Expand Down
44 changes: 44 additions & 0 deletions pkg/kubelet/nodeshutdown/nodeshutdown_manager_linux_test.go
Expand Up @@ -261,3 +261,47 @@ func TestManager(t *testing.T) {
})
}
}

func TestFeatureEnabled(t *testing.T) {
var tests = []struct {
desc string
shutdownGracePeriodRequested time.Duration
featureGateEnabled bool
expectEnabled bool
}{
{
desc: "shutdownGracePeriodRequested 0; disables feature",
shutdownGracePeriodRequested: time.Duration(0 * time.Second),
featureGateEnabled: true,
expectEnabled: false,
},
{
desc: "feature gate disabled; disables feature",
shutdownGracePeriodRequested: time.Duration(100 * time.Second),
featureGateEnabled: false,
expectEnabled: false,
},
{
desc: "feature gate enabled; shutdownGracePeriodRequested > 0; enables feature",
shutdownGracePeriodRequested: time.Duration(100 * time.Second),
featureGateEnabled: true,
expectEnabled: true,
},
}
for _, tc := range tests {
t.Run(tc.desc, func(t *testing.T) {
activePodsFunc := func() []*v1.Pod {
return nil
}
killPodsFunc := func(pod *v1.Pod, status v1.PodStatus, gracePeriodOverride *int64) error {
return nil
}
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, pkgfeatures.GracefulNodeShutdown, tc.featureGateEnabled)()

manager, _ := NewManager(activePodsFunc, killPodsFunc, func() {}, tc.shutdownGracePeriodRequested, 0 /*shutdownGracePeriodCriticalPods*/)
manager.clock = clock.NewFakeClock(time.Now())

assert.Equal(t, tc.expectEnabled, manager.isFeatureEnabled())
})
}
}
6 changes: 4 additions & 2 deletions staging/src/k8s.io/kubelet/config/v1beta1/types.go
Expand Up @@ -830,12 +830,14 @@ type KubeletConfiguration struct {
// +optional
EnableSystemLogHandler *bool `json:"enableSystemLogHandler,omitempty"`
// ShutdownGracePeriod specifies the total duration that the node should delay the shutdown and total grace period for pod termination during a node shutdown.
// Default: "30s"
// Default: "0s"
// +featureGate=GracefulNodeShutdown
// +optional
ShutdownGracePeriod metav1.Duration `json:"shutdownGracePeriod,omitempty"`
// ShutdownGracePeriodCriticalPods specifies the duration used to terminate critical pods during a node shutdown. This should be less than ShutdownGracePeriod.
// For example, if ShutdownGracePeriod=30s, and ShutdownGracePeriodCriticalPods=10s, during a node shutdown the first 20 seconds would be reserved for gracefully terminating normal pods, and the last 10 seconds would be reserved for terminating critical pods.
// Default: "10s"
// Default: "0s"
// +featureGate=GracefulNodeShutdown
// +optional
ShutdownGracePeriodCriticalPods metav1.Duration `json:"shutdownGracePeriodCriticalPods,omitempty"`
// ReservedMemory specifies a comma-separated list of memory reservations for NUMA nodes.
Expand Down