Skip to content

Commit

Permalink
Merge pull request #102823 from ehashman/kep-2400-swap
Browse files Browse the repository at this point in the history
Alpha node swap support
  • Loading branch information
k8s-ci-robot committed Jul 7, 2021
2 parents 99f7772 + 5584725 commit 561959f
Show file tree
Hide file tree
Showing 21 changed files with 968 additions and 648 deletions.
2 changes: 1 addition & 1 deletion hack/local-up-cluster.sh
Expand Up @@ -786,7 +786,7 @@ function start_kubelet {

# warn if users are running with swap allowed
if [ "${FAIL_SWAP_ON}" == "false" ]; then
echo "WARNING : The kubelet is configured to not fail even if swap is enabled; production deployments should disable swap."
echo "WARNING : The kubelet is configured to not fail even if swap is enabled; production deployments should disable swap unless testing NodeSwapEnabled feature."
fi

if [[ "${REUSE_CERTS}" != true ]]; then
Expand Down
7 changes: 7 additions & 0 deletions pkg/features/kube_features.go
Expand Up @@ -649,6 +649,12 @@ const (
// Allows user to override pod-level terminationGracePeriod for probes
ProbeTerminationGracePeriod featuregate.Feature = "ProbeTerminationGracePeriod"

// owner: @ehashman
// alpha: v1.22
//
// Permits kubelet to run with swap enabled
NodeSwapEnabled featuregate.Feature = "NodeSwapEnabled"

// owner: @ahg-g
// alpha: v1.21
// beta: v1.22
Expand Down Expand Up @@ -847,6 +853,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
VolumeCapacityPriority: {Default: false, PreRelease: featuregate.Alpha},
PreferNominatedNode: {Default: true, PreRelease: featuregate.Beta},
ProbeTerminationGracePeriod: {Default: false, PreRelease: featuregate.Alpha},
NodeSwapEnabled: {Default: false, PreRelease: featuregate.Alpha},
PodDeletionCost: {Default: true, PreRelease: featuregate.Beta},
StatefulSetAutoDeletePVC: {Default: false, PreRelease: featuregate.Alpha},
TopologyAwareHints: {Default: false, PreRelease: featuregate.Alpha},
Expand Down
1 change: 1 addition & 0 deletions pkg/kubelet/apis/config/helpers_test.go
Expand Up @@ -209,6 +209,7 @@ var (
"MaxOpenFiles",
"MaxPods",
"MemoryManagerPolicy",
"MemorySwap.SwapBehavior",
"NodeLeaseDurationSeconds",
"NodeStatusMaxImages",
"NodeStatusUpdateFrequency.Duration",
Expand Down
Expand Up @@ -58,6 +58,7 @@ makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
memoryManagerPolicy: None
memorySwap: {}
nodeLeaseDurationSeconds: 40
nodeStatusMaxImages: 50
nodeStatusReportFrequency: 5m0s
Expand Down
Expand Up @@ -58,6 +58,7 @@ makeIPTablesUtilChains: true
maxOpenFiles: 1000000
maxPods: 110
memoryManagerPolicy: None
memorySwap: {}
nodeLeaseDurationSeconds: 40
nodeStatusMaxImages: 50
nodeStatusReportFrequency: 5m0s
Expand Down
13 changes: 13 additions & 0 deletions pkg/kubelet/apis/config/types.go
Expand Up @@ -326,6 +326,10 @@ type KubeletConfiguration struct {
FeatureGates map[string]bool
// Tells the Kubelet to fail to start if swap is enabled on the node.
FailSwapOn bool
// memorySwap configures swap memory available to container workloads.
// +featureGate=NodeSwapEnabled
// +optional
MemorySwap MemorySwapConfiguration
// A quantity defines the maximum size of the container log file before it is rotated. For example: "5Mi" or "256Ki".
ContainerLogMaxSize string
// Maximum number of container log files that can be present for a container.
Expand Down Expand Up @@ -568,3 +572,12 @@ type MemoryReservation struct {
NumaNode int32
Limits v1.ResourceList
}

type MemorySwapConfiguration struct {
// swapBehavior configures swap memory available to container workloads. May be one of
// "", "LimitedSwap": workload combined memory and swap usage cannot exceed pod memory limit
// "UnlimitedSwap": workloads can use unlimited swap, up to the allocatable limit.
// +featureGate=NodeSwapEnabled
// +optional
SwapBehavior string
}
36 changes: 36 additions & 0 deletions pkg/kubelet/apis/config/v1beta1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions pkg/kubelet/apis/config/validation/validation.go
Expand Up @@ -155,6 +155,14 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration) error
if (kc.ShutdownGracePeriod.Duration > 0 || kc.ShutdownGracePeriodCriticalPods.Duration > 0) && !localFeatureGate.Enabled(features.GracefulNodeShutdown) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: Specifying ShutdownGracePeriod or ShutdownGracePeriodCriticalPods requires feature gate GracefulNodeShutdown"))
}
if localFeatureGate.Enabled(features.NodeSwapEnabled) {
if kc.MemorySwap.SwapBehavior != "" && kc.MemorySwap.SwapBehavior != kubetypes.LimitedSwap && kc.MemorySwap.SwapBehavior != kubetypes.UnlimitedSwap {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: MemorySwap.SwapBehavior %v must be one of: LimitedSwap, UnlimitedSwap", kc.MemorySwap.SwapBehavior))
}
}
if !localFeatureGate.Enabled(features.NodeSwapEnabled) && kc.MemorySwap != (kubeletconfig.MemorySwapConfiguration{}) {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: MemorySwap.SwapBehavior cannot be set when NodeSwapEnabled feature flag is disabled"))
}

for _, val := range kc.EnforceNodeAllocatable {
switch val {
Expand Down
10 changes: 8 additions & 2 deletions pkg/kubelet/apis/config/validation/validation_test.go
Expand Up @@ -24,6 +24,7 @@ import (
utilerrors "k8s.io/apimachinery/pkg/util/errors"
componentbaseconfig "k8s.io/component-base/config"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)

func TestValidateKubeletConfiguration(t *testing.T) {
Expand Down Expand Up @@ -145,9 +146,11 @@ func TestValidateKubeletConfiguration(t *testing.T) {
TopologyManagerPolicy: kubeletconfig.NoneTopologyManagerPolicy,
ShutdownGracePeriod: metav1.Duration{Duration: 10 * time.Minute},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 0},
MemorySwap: kubeletconfig.MemorySwapConfiguration{SwapBehavior: kubetypes.UnlimitedSwap},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
"NodeSwapEnabled": true,
},
Logging: componentbaseconfig.LoggingConfiguration{
Format: "text",
Expand Down Expand Up @@ -187,8 +190,9 @@ func TestValidateKubeletConfiguration(t *testing.T) {
Logging: componentbaseconfig.LoggingConfiguration{
Format: "",
},
MemorySwap: kubeletconfig.MemorySwapConfiguration{SwapBehavior: kubetypes.UnlimitedSwap},
}
const numErrsErrorCase1 = 29
const numErrsErrorCase1 = 30
if allErrors := ValidateKubeletConfiguration(errorCase1); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase1 {
t.Errorf("expect %d errors, got %v", numErrsErrorCase1, len(allErrors.(utilerrors.Aggregate).Errors()))
}
Expand Down Expand Up @@ -225,15 +229,17 @@ func TestValidateKubeletConfiguration(t *testing.T) {
TopologyManagerPolicy: "invalid",
ShutdownGracePeriod: metav1.Duration{Duration: 40 * time.Second},
ShutdownGracePeriodCriticalPods: metav1.Duration{Duration: 10 * time.Second},
MemorySwap: kubeletconfig.MemorySwapConfiguration{SwapBehavior: "invalid"},
FeatureGates: map[string]bool{
"CustomCPUCFSQuotaPeriod": true,
"GracefulNodeShutdown": true,
"NodeSwapEnabled": true,
},
Logging: componentbaseconfig.LoggingConfiguration{
Format: "text",
},
}
const numErrsErrorCase2 = 3
const numErrsErrorCase2 = 4
if allErrors := ValidateKubeletConfiguration(errorCase2); len(allErrors.(utilerrors.Aggregate).Errors()) != numErrsErrorCase2 {
t.Errorf("expect %d errors, got %v", numErrsErrorCase2, len(allErrors.(utilerrors.Aggregate).Errors()))
}
Expand Down
17 changes: 17 additions & 0 deletions pkg/kubelet/apis/config/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/kubelet/kubelet.go
Expand Up @@ -652,6 +652,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
klet.containerLogManager,
klet.runtimeClassManager,
seccompDefault,
kubeCfg.MemorySwap.SwapBehavior,
)
if err != nil {
return nil, err
Expand Down
18 changes: 18 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
Expand Up @@ -30,6 +30,7 @@ import (
kubefeatures "k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/qos"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)

// applyPlatformSpecificContainerConfig applies platform specific configurations to runtimeapi.ContainerConfig.
Expand Down Expand Up @@ -89,6 +90,23 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C

lc.Resources.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)

if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwapEnabled) {
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
// https://github.com/opencontainers/runtime-spec/blob/1c3f411f041711bbeecf35ff7e93461ea6789220/config-linux.md#memory
switch m.memorySwapBehavior {
case kubelettypes.UnlimitedSwap:
// -1 = unlimited swap
lc.Resources.MemorySwapLimitInBytes = -1
case kubelettypes.LimitedSwap:
fallthrough
default:
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lc.Resources.MemorySwapLimitInBytes = lc.Resources.MemoryLimitInBytes
}
}

return lc
}

Expand Down
90 changes: 90 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container_linux_test.go
Expand Up @@ -33,6 +33,7 @@ import (
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
)

func makeExpectedConfig(m *kubeGenericRuntimeManager, pod *v1.Pod, containerIndex int) *runtimeapi.ContainerConfig {
Expand Down Expand Up @@ -367,3 +368,92 @@ func TestGenerateLinuxContainerConfigNamespaces(t *testing.T) {
})
}
}

func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.NodeSwapEnabled, true)()
_, _, m, err := createTestRuntimeManager()
if err != nil {
t.Fatalf("error creating test RuntimeManager: %v", err)
}
m.machineInfo.MemoryCapacity = 1000000
containerName := "test"

for _, tc := range []struct {
name string
swapSetting string
pod *v1.Pod
expected int64
}{
{
name: "config unset, memory limit set",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "config unset, no memory limit",
// no swap setting
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: 0,
},
{
// Note: behaviour will be the same as previous two cases
name: "config set to LimitedSwap, memory limit set",
swapSetting: kubelettypes.LimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{{
Name: containerName,
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
Requests: v1.ResourceList{
"memory": resource.MustParse("1000"),
},
},
}},
},
},
expected: 1000,
},
{
name: "UnlimitedSwap enabled",
swapSetting: kubelettypes.UnlimitedSwap,
pod: &v1.Pod{
Spec: v1.PodSpec{
Containers: []v1.Container{
{Name: containerName},
},
},
},
expected: -1,
},
} {
t.Run(tc.name, func(t *testing.T) {
m.memorySwapBehavior = tc.swapSetting
actual := m.generateLinuxContainerConfig(&tc.pod.Spec.Containers[0], tc.pod, nil, "", nil)
assert.Equal(t, tc.expected, actual.Resources.MemorySwapLimitInBytes, "memory swap config for %s", tc.name)
})
}
}

0 comments on commit 561959f

Please sign in to comment.