From f5b8d3b26bb97a42afa65566c5ee968ee9035a38 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 7 Oct 2021 17:02:04 +0200 Subject: [PATCH] get logs of broken controller --- bootstrap/kubeadm/config/manager/manager.yaml | 2 + config/manager/manager.yaml | 2 + .../kubeadm/config/manager/manager.yaml | 2 + .../clusterctl/clusterctl_helpers.go | 6 ++ test/framework/deployment_helpers.go | 77 ++++++++++++++++++- .../docker/config/manager/manager.yaml | 2 + 6 files changed, 90 insertions(+), 1 deletion(-) diff --git a/bootstrap/kubeadm/config/manager/manager.yaml b/bootstrap/kubeadm/config/manager/manager.yaml index 233ba3d5fd90..a01ea2e1308f 100644 --- a/bootstrap/kubeadm/config/manager/manager.yaml +++ b/bootstrap/kubeadm/config/manager/manager.yaml @@ -33,6 +33,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 6e4d23e7ad32..e3b2a9c166b0 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -34,6 +34,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/controlplane/kubeadm/config/manager/manager.yaml b/controlplane/kubeadm/config/manager/manager.yaml index b5e31734e031..2363bd55d3ba 100644 --- a/controlplane/kubeadm/config/manager/manager.yaml +++ b/controlplane/kubeadm/config/manager/manager.yaml @@ -33,6 +33,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/test/framework/clusterctl/clusterctl_helpers.go b/test/framework/clusterctl/clusterctl_helpers.go index f016172ec7c0..5ed6618c6957 100644 --- a/test/framework/clusterctl/clusterctl_helpers.go +++ b/test/framework/clusterctl/clusterctl_helpers.go @@ -99,6 +99,9 @@ func InitManagementClusterAndWatchControllerLogs(ctx context.Context, input Init framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{ Getter: client, Deployment: deployment, + GetLister: client, + ClientSet: input.ClusterProxy.GetClientSet(), + LogPath: filepath.Join(input.LogFolder, "controllers"), }, intervals...) // Start streaming logs from all controller providers @@ -155,6 +158,9 @@ func UpgradeManagementClusterAndWait(ctx context.Context, input UpgradeManagemen framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{ Getter: client, Deployment: deployment, + GetLister: client, + ClientSet: input.ClusterProxy.GetClientSet(), + LogPath: filepath.Join(input.LogFolder, "controllers"), }, intervals...) // Start streaming logs from all controller providers diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index 2652e56d10c9..f7c5b19befdc 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -47,6 +47,9 @@ import ( type WaitForDeploymentsAvailableInput struct { Getter Getter Deployment *appsv1.Deployment + GetLister GetLister + ClientSet *kubernetes.Clientset + LogPath string } // WaitForDeploymentsAvailable waits until the Deployment has status.Available = True, that signals that @@ -69,7 +72,17 @@ func WaitForDeploymentsAvailable(ctx context.Context, input WaitForDeploymentsAv } } return false - }, intervals...).Should(BeTrue(), func() string { return DescribeFailedDeployment(input, deployment) }) + }, intervals...).Should(BeTrue(), func() string { + if input.GetLister != nil && input.ClientSet != nil && input.LogPath != "" { + GetDeploymentLogs(ctx, GetDeploymentLogsInput{ + Deployment: deployment, + GetLister: input.GetLister, + ClientSet: input.ClientSet, + LogPath: input.LogPath, + }) + } + return DescribeFailedDeployment(input, deployment) + }) } // DescribeFailedDeployment returns detailed output to help debug a deployment failure in e2e. @@ -152,6 +165,68 @@ func WatchDeploymentLogs(ctx context.Context, input WatchDeploymentLogsInput) { } } +type GetDeploymentLogsInput struct { + GetLister GetLister + ClientSet *kubernetes.Clientset + Deployment *appsv1.Deployment + LogPath string +} + +func GetDeploymentLogs(ctx context.Context, input GetDeploymentLogsInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for GetDeploymentLogs") + Expect(input.ClientSet).NotTo(BeNil(), "input.ClientSet is required for GetDeploymentLogs") + Expect(input.Deployment).NotTo(BeNil(), "input.Deployment is required for GetDeploymentLogs") + + deployment := &appsv1.Deployment{} + key := client.ObjectKeyFromObject(input.Deployment) + Expect(input.GetLister.Get(ctx, key, deployment)).To(Succeed(), "Failed to get deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + selector, err := metav1.LabelSelectorAsMap(deployment.Spec.Selector) + Expect(err).NotTo(HaveOccurred(), "Failed to Pods selector for deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + pods := &corev1.PodList{} + Expect(input.GetLister.List(ctx, pods, client.InNamespace(input.Deployment.Namespace), client.MatchingLabels(selector))).To(Succeed(), "Failed to list Pods for deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + for _, pod := range pods.Items { + for _, container := range deployment.Spec.Template.Spec.Containers { + log.Logf("Creating log watcher for controller %s/%s, pod %s, container %s", input.Deployment.Namespace, input.Deployment.Name, pod.Name, container.Name) + + // Watch each container's logs in a goroutine so we can stream them all concurrently. + go func(pod corev1.Pod, container corev1.Container) { + defer GinkgoRecover() + + logFile := filepath.Clean(path.Join(input.LogPath, input.Deployment.Name, pod.Name, container.Name+".log")) + Expect(os.MkdirAll(filepath.Dir(logFile), 0750)).To(Succeed()) + + f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + Expect(err).NotTo(HaveOccurred()) + defer f.Close() + + opts := &corev1.PodLogOptions{ + Container: container.Name, + Follow: false, + } + + podLogs, err := input.ClientSet.CoreV1().Pods(input.Deployment.Namespace).GetLogs(pod.Name, opts).Stream(ctx) + if err != nil { + // Failing to stream logs should not cause the test to fail + log.Logf("Error starting logs stream for pod %s/%s, container %s: %v", input.Deployment.Namespace, pod.Name, container.Name, err) + return + } + defer podLogs.Close() + + out := bufio.NewWriter(f) + defer out.Flush() + _, err = out.ReadFrom(podLogs) + if err != nil && err != io.ErrUnexpectedEOF { + // Failing to stream logs should not cause the test to fail + log.Logf("Got error while streaming logs for pod %s/%s, container %s: %v", input.Deployment.Namespace, pod.Name, container.Name, err) + } + }(pod, container) + } + } +} + type WatchPodMetricsInput struct { GetLister GetLister ClientSet *kubernetes.Clientset diff --git a/test/infrastructure/docker/config/manager/manager.yaml b/test/infrastructure/docker/config/manager/manager.yaml index 4454c900483b..b27cfa51aacc 100644 --- a/test/infrastructure/docker/config/manager/manager.yaml +++ b/test/infrastructure/docker/config/manager/manager.yaml @@ -31,6 +31,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz