Skip to content

Commit

Permalink
Merge pull request #111070 from mimowo/retriable-pod-failures-refacto…
Browse files Browse the repository at this point in the history
…r-gc-controller

Refactor gc_controller to do not use the deletePod stub
  • Loading branch information
k8s-ci-robot committed Jul 14, 2022
2 parents e50a366 + 778b830 commit 27110bd
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 50 deletions.
30 changes: 15 additions & 15 deletions pkg/controller/podgc/gc_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ type PodGCController struct {

nodeQueue workqueue.DelayingInterface

deletePod func(namespace, name string) error
terminatedPodThreshold int
}

Expand All @@ -77,10 +76,6 @@ func NewPodGC(ctx context.Context, kubeClient clientset.Interface, podInformer c
nodeLister: nodeInformer.Lister(),
nodeListerSynced: nodeInformer.Informer().HasSynced,
nodeQueue: workqueue.NewNamedDelayingQueue("orphaned_pods_nodes"),
deletePod: func(namespace, name string) error {
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
return kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
},
}

return gcc
Expand Down Expand Up @@ -114,13 +109,13 @@ func (gcc *PodGCController) gc(ctx context.Context) {
return
}
if gcc.terminatedPodThreshold > 0 {
gcc.gcTerminated(pods)
gcc.gcTerminated(ctx, pods)
}
if utilfeature.DefaultFeatureGate.Enabled(features.NodeOutOfServiceVolumeDetach) {
gcc.gcTerminating(pods)
gcc.gcTerminating(ctx, pods)
}
gcc.gcOrphaned(ctx, pods, nodes)
gcc.gcUnscheduledTerminating(pods)
gcc.gcUnscheduledTerminating(ctx, pods)
}

func isPodTerminated(pod *v1.Pod) bool {
Expand All @@ -135,7 +130,7 @@ func isPodTerminating(pod *v1.Pod) bool {
return pod.ObjectMeta.DeletionTimestamp != nil
}

func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
func (gcc *PodGCController) gcTerminating(ctx context.Context, pods []*v1.Pod) {
klog.V(4).Info("GC'ing terminating pods that are on out-of-service nodes")
terminatingPods := []*v1.Pod{}
for _, pod := range pods {
Expand Down Expand Up @@ -168,7 +163,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
wait.Add(1)
go func(namespace string, name string) {
defer wait.Done()
if err := gcc.deletePod(namespace, name); err != nil {
if err := gcc.deletePod(ctx, namespace, name); err != nil {
// ignore not founds
utilruntime.HandleError(err)
}
Expand All @@ -177,7 +172,7 @@ func (gcc *PodGCController) gcTerminating(pods []*v1.Pod) {
wait.Wait()
}

func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
func (gcc *PodGCController) gcTerminated(ctx context.Context, pods []*v1.Pod) {
terminatedPods := []*v1.Pod{}
for _, pod := range pods {
if isPodTerminated(pod) {
Expand All @@ -200,7 +195,7 @@ func (gcc *PodGCController) gcTerminated(pods []*v1.Pod) {
wait.Add(1)
go func(namespace string, name string) {
defer wait.Done()
if err := gcc.deletePod(namespace, name); err != nil {
if err := gcc.deletePod(ctx, namespace, name); err != nil {
// ignore not founds
defer utilruntime.HandleError(err)
}
Expand Down Expand Up @@ -233,7 +228,7 @@ func (gcc *PodGCController) gcOrphaned(ctx context.Context, pods []*v1.Pod, node
continue
}
klog.V(2).InfoS("Found orphaned Pod assigned to the Node, deleting.", "pod", klog.KObj(pod), "node", pod.Spec.NodeName)
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
utilruntime.HandleError(err)
} else {
klog.V(0).InfoS("Forced deletion of orphaned Pod succeeded", "pod", klog.KObj(pod))
Expand Down Expand Up @@ -273,7 +268,7 @@ func (gcc *PodGCController) checkIfNodeExists(ctx context.Context, name string)
}

// gcUnscheduledTerminating deletes pods that are terminating and haven't been scheduled to a particular node.
func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
func (gcc *PodGCController) gcUnscheduledTerminating(ctx context.Context, pods []*v1.Pod) {
klog.V(4).Infof("GC'ing unscheduled pods which are terminating.")

for _, pod := range pods {
Expand All @@ -282,7 +277,7 @@ func (gcc *PodGCController) gcUnscheduledTerminating(pods []*v1.Pod) {
}

klog.V(2).InfoS("Found unscheduled terminating Pod not assigned to any Node, deleting.", "pod", klog.KObj(pod))
if err := gcc.deletePod(pod.Namespace, pod.Name); err != nil {
if err := gcc.deletePod(ctx, pod.Namespace, pod.Name); err != nil {
utilruntime.HandleError(err)
} else {
klog.V(0).InfoS("Forced deletion of unscheduled terminating Pod succeeded", "pod", klog.KObj(pod))
Expand All @@ -302,3 +297,8 @@ func (o byCreationTimestamp) Less(i, j int) bool {
}
return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp)
}

func (gcc *PodGCController) deletePod(ctx context.Context, namespace, name string) error {
klog.InfoS("PodGC is force deleting Pod", "pod", klog.KRef(namespace, name))
return gcc.kubeClient.CoreV1().Pods(namespace).Delete(ctx, name, *metav1.NewDeleteOptions(0))
}
57 changes: 22 additions & 35 deletions pkg/controller/podgc/gc_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package podgc

import (
"context"
"sync"
"testing"
"time"

Expand All @@ -32,6 +31,7 @@ import (
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
clienttesting "k8s.io/client-go/testing"
"k8s.io/client-go/util/workqueue"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/controller"
Expand Down Expand Up @@ -63,6 +63,17 @@ func compareStringSetToList(set sets.String, list []string) bool {
return true
}

func getDeletedPodNames(client *fake.Clientset) []string {
deletedPodNames := make([]string, 0)
for _, action := range client.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
deleteAction := action.(clienttesting.DeleteAction)
deletedPodNames = append(deletedPodNames, deleteAction.GetName())
}
}
return deletedPodNames
}

func TestGCTerminated(t *testing.T) {
type nameToPhase struct {
name string
Expand Down Expand Up @@ -129,14 +140,6 @@ func TestGCTerminated(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node")}})
gcc, podInformer, _ := NewFromClient(client, test.threshold)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}

creationTime := time.Unix(0, 0)
for _, pod := range test.pods {
Expand All @@ -150,6 +153,8 @@ func TestGCTerminated(t *testing.T) {

gcc.gc(context.TODO())

deletedPodNames := getDeletedPodNames(client)

if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
i, test.deletedPodNames.List(), deletedPodNames)
Expand Down Expand Up @@ -329,17 +334,10 @@ func TestGCOrphaned(t *testing.T) {
gcc.nodeQueue.ShutDown()
gcc.nodeQueue = workqueue.NewDelayingQueueWithCustomClock(fakeClock, "podgc_test_queue")

deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}

// First GC of orphaned pods
gcc.gc(context.TODO())
deletedPodNames := getDeletedPodNames(client)

if len(deletedPodNames) > 0 {
t.Errorf("no pods should be deleted at this point.\n\tactual: %v", deletedPodNames)
}
Expand Down Expand Up @@ -371,6 +369,7 @@ func TestGCOrphaned(t *testing.T) {

// Actual pod deletion
gcc.gc(context.TODO())
deletedPodNames = getDeletedPodNames(client)

if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
Expand Down Expand Up @@ -417,14 +416,6 @@ func TestGCUnscheduledTerminating(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset()
gcc, podInformer, _ := NewFromClient(client, -1)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}

creationTime := time.Unix(0, 0)
for _, pod := range test.pods {
Expand All @@ -442,7 +433,8 @@ func TestGCUnscheduledTerminating(t *testing.T) {
t.Errorf("Error while listing all Pods: %v", err)
return
}
gcc.gcUnscheduledTerminating(pods)
gcc.gcUnscheduledTerminating(context.TODO(), pods)
deletedPodNames := getDeletedPodNames(client)

if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v, test: %v",
Expand Down Expand Up @@ -557,14 +549,7 @@ func TestGCTerminating(t *testing.T) {
t.Run(test.name, func(t *testing.T) {
client := fake.NewSimpleClientset(&v1.NodeList{Items: []v1.Node{*testutil.NewNode("node-a")}})
gcc, podInformer, nodeInformer := NewFromClient(client, -1)
deletedPodNames := make([]string, 0)
var lock sync.Mutex
gcc.deletePod = func(_, name string) error {
lock.Lock()
defer lock.Unlock()
deletedPodNames = append(deletedPodNames, name)
return nil
}

creationTime := time.Unix(0, 0)
for _, node := range test.nodes {
creationTime = creationTime.Add(2 * time.Hour)
Expand Down Expand Up @@ -595,6 +580,8 @@ func TestGCTerminating(t *testing.T) {
}

gcc.gc(context.TODO())
deletedPodNames := getDeletedPodNames(client)

if pass := compareStringSetToList(test.deletedPodNames, deletedPodNames); !pass {
t.Errorf("[%v]pod's deleted expected and actual did not match.\n\texpected: %v\n\tactual: %v",
i, test.deletedPodNames.List(), deletedPodNames)
Expand Down

0 comments on commit 27110bd

Please sign in to comment.