From 14bc81a17949890878dbdc75d314a9b9b0d2fa71 Mon Sep 17 00:00:00 2001 From: Stefan Bueringer Date: Thu, 7 Oct 2021 17:02:04 +0200 Subject: [PATCH] get logs of broken controller --- bootstrap/kubeadm/config/manager/manager.yaml | 2 + config/manager/manager.yaml | 2 + .../kubeadm/config/manager/manager.yaml | 2 + go.mod | 2 + go.sum | 11 ++- .../clusterctl/clusterctl_helpers.go | 6 ++ test/framework/deployment_helpers.go | 77 ++++++++++++++++++- .../docker/config/manager/manager.yaml | 2 + 8 files changed, 101 insertions(+), 3 deletions(-) diff --git a/bootstrap/kubeadm/config/manager/manager.yaml b/bootstrap/kubeadm/config/manager/manager.yaml index 233ba3d5fd90..a01ea2e1308f 100644 --- a/bootstrap/kubeadm/config/manager/manager.yaml +++ b/bootstrap/kubeadm/config/manager/manager.yaml @@ -33,6 +33,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 6e4d23e7ad32..e3b2a9c166b0 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -34,6 +34,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/controlplane/kubeadm/config/manager/manager.yaml b/controlplane/kubeadm/config/manager/manager.yaml index b5e31734e031..2363bd55d3ba 100644 --- a/controlplane/kubeadm/config/manager/manager.yaml +++ b/controlplane/kubeadm/config/manager/manager.yaml @@ -33,6 +33,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz diff --git a/go.mod b/go.mod index 6a83ddf22320..a63db6064fc1 100644 --- a/go.mod +++ b/go.mod @@ -41,3 +41,5 @@ require ( sigs.k8s.io/controller-runtime v0.10.2 sigs.k8s.io/yaml v1.2.0 ) + +replace sigs.k8s.io/controller-runtime => /Users/buringerst/code/src/sigs.k8s.io/controller-runtime diff --git a/go.sum b/go.sum index ab7aee5c6721..7282070140ec 100644 --- a/go.sum +++ b/go.sum @@ -1084,24 +1084,31 @@ honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= k8s.io/api v0.21.4/go.mod h1:fTVGP+M4D8+00FN2cMnJqk/eb/GH53bvmNs2SVTmpFk= +k8s.io/api v0.22.1/go.mod h1:bh13rkTp3F1XEaLGykbyRD2QaTTzPm0e/BMd8ptFONY= k8s.io/api v0.22.2 h1:M8ZzAD0V6725Fjg53fKeTJxGsJvRbk4TEm/fexHMtfw= k8s.io/api v0.22.2/go.mod h1:y3ydYpLJAaDI+BbSe2xmGcqxiWHmWjkEeIbiwHvnPR8= +k8s.io/apiextensions-apiserver v0.22.1/go.mod h1:HeGmorjtRmRLE+Q8dJu6AYRoZccvCMsghwS8XTUYb2c= k8s.io/apiextensions-apiserver v0.22.2 h1:zK7qI8Ery7j2CaN23UCFaC1hj7dMiI87n01+nKuewd4= k8s.io/apiextensions-apiserver v0.22.2/go.mod h1:2E0Ve/isxNl7tWLSUDgi6+cmwHi5fQRdwGVCxbC+KFA= k8s.io/apimachinery v0.21.4/go.mod h1:H/IM+5vH9kZRNJ4l3x/fXP/5bOPJaVP/guptnZPeCFI= +k8s.io/apimachinery v0.22.1/go.mod h1:O3oNtNadZdeOMxHFVxOreoznohCpy0z6mocxbZr7oJ0= k8s.io/apimachinery v0.22.2 h1:ejz6y/zNma8clPVfNDLnPbleBo6MpoFy/HBiBqCouVk= k8s.io/apimachinery v0.22.2/go.mod h1:O3oNtNadZdeOMxHFVxOreoznohCpy0z6mocxbZr7oJ0= +k8s.io/apiserver v0.22.1/go.mod h1:2mcM6dzSt+XndzVQJX21Gx0/Klo7Aen7i0Ai6tIa400= k8s.io/apiserver v0.22.2 h1:TdIfZJc6YNhu2WxeAOWq1TvukHF0Sfx0+ln4XK9qnL4= k8s.io/apiserver v0.22.2/go.mod h1:vrpMmbyjWrgdyOvZTSpsusQq5iigKNWv9o9KlDAbBHI= k8s.io/cli-runtime v0.21.4/go.mod h1:eRbLHYkdVWzvG87yrkgGd8CqX6/+fAG9DTdAqTXmlRY= k8s.io/client-go v0.21.4/go.mod h1:t0/eMKyUAq/DoQ7vW8NVVA00/nomlwC+eInsS8PxSew= +k8s.io/client-go v0.22.1/go.mod h1:BquC5A4UOo4qVDUtoc04/+Nxp1MeHcVc1HJm1KmG8kk= k8s.io/client-go v0.22.2 h1:DaSQgs02aCC1QcwUdkKZWOeaVsQjYvWv8ZazcZ6JcHc= k8s.io/client-go v0.22.2/go.mod h1:sAlhrkVDf50ZHx6z4K0S40wISNTarf1r800F+RlCF6U= k8s.io/cluster-bootstrap v0.21.4 h1:dnCOcVJdCAMz8+nvqodrFv/yd/3Ae9Jn14cChpQjps8= k8s.io/cluster-bootstrap v0.21.4/go.mod h1:GtXGuiEtdV4XQJcscR6qQCm/vtQWkhUi3qnl9KL9jzw= k8s.io/code-generator v0.21.4/go.mod h1:K3y0Bv9Cz2cOW2vXUrNZlFbflhuPvuadW6JdnN6gGKo= +k8s.io/code-generator v0.22.1/go.mod h1:eV77Y09IopzeXOJzndrDyCI88UBok2h6WxAlBwpxa+o= k8s.io/code-generator v0.22.2/go.mod h1:eV77Y09IopzeXOJzndrDyCI88UBok2h6WxAlBwpxa+o= k8s.io/component-base v0.21.4/go.mod h1:ZKG0eHVX+tUDcaoIGpU3Vtk4TIjMddN9uhEWDmW6Nyg= +k8s.io/component-base v0.22.1/go.mod h1:0D+Bl8rrnsPN9v0dyYvkqFfBeAd4u7n77ze+p8CMiPo= k8s.io/component-base v0.22.2 h1:vNIvE0AIrLhjX8drH0BgCNJcR4QZxMXcJzBsDplDx9M= k8s.io/component-base v0.22.2/go.mod h1:5Br2QhI9OTe79p+TzPe9JKNQYvEKbq9rTJDWllunGug= k8s.io/component-helpers v0.21.4/go.mod h1:/5TBNWmxaAymZweO1JWv3Pt5rcYJV1LbWWY0x1rDdVU= @@ -1119,14 +1126,14 @@ k8s.io/kubectl v0.21.4 h1:ODXpSKpi5C6XnJmGg96E/36KAry513v4Jr9Efg3ePJI= k8s.io/kubectl v0.21.4/go.mod h1:rRYB5HeScoGQKxZDQmus17pTSVIuqfm0D31ApET/qSM= k8s.io/metrics v0.21.4/go.mod h1:uhWoVuVumUMSeCa1B1p2tm4Y4XuZIg0n24QEtB54wuA= k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +k8s.io/utils v0.0.0-20210707171843-4b05e18ac7d9/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +k8s.io/utils v0.0.0-20210802155522-efc7438f0176/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a h1:8dYfu/Fc9Gz2rNJKB9IQRGgQOh2clmRzNIPPY1xLY5g= k8s.io/utils v0.0.0-20210819203725-bdf08cb9a70a/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.22/go.mod h1:LEScyzhFmoF5pso/YSeBstl57mOzx9xlU9n85RGrDQg= -sigs.k8s.io/controller-runtime v0.10.2 h1:jW8qiY+yMnnPx6O9hu63tgcwaKzd1yLYui+mpvClOOc= -sigs.k8s.io/controller-runtime v0.10.2/go.mod h1:CQp8eyUQZ/Q7PJvnIrB6/hgfTC1kBkGylwsLgOQi1WY= sigs.k8s.io/kustomize/api v0.8.8/go.mod h1:He1zoK0nk43Pc6NlV085xDXDXTNprtcyKZVm3swsdNY= sigs.k8s.io/kustomize/cmd/config v0.9.10/go.mod h1:Mrby0WnRH7hA6OwOYnYpfpiY0WJIMgYrEDfwOeFdMK0= sigs.k8s.io/kustomize/kustomize/v4 v4.1.2/go.mod h1:PxBvo4WGYlCLeRPL+ziT64wBXqbgfcalOS/SXa/tcyo= diff --git a/test/framework/clusterctl/clusterctl_helpers.go b/test/framework/clusterctl/clusterctl_helpers.go index f016172ec7c0..5ed6618c6957 100644 --- a/test/framework/clusterctl/clusterctl_helpers.go +++ b/test/framework/clusterctl/clusterctl_helpers.go @@ -99,6 +99,9 @@ func InitManagementClusterAndWatchControllerLogs(ctx context.Context, input Init framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{ Getter: client, Deployment: deployment, + GetLister: client, + ClientSet: input.ClusterProxy.GetClientSet(), + LogPath: filepath.Join(input.LogFolder, "controllers"), }, intervals...) // Start streaming logs from all controller providers @@ -155,6 +158,9 @@ func UpgradeManagementClusterAndWait(ctx context.Context, input UpgradeManagemen framework.WaitForDeploymentsAvailable(ctx, framework.WaitForDeploymentsAvailableInput{ Getter: client, Deployment: deployment, + GetLister: client, + ClientSet: input.ClusterProxy.GetClientSet(), + LogPath: filepath.Join(input.LogFolder, "controllers"), }, intervals...) // Start streaming logs from all controller providers diff --git a/test/framework/deployment_helpers.go b/test/framework/deployment_helpers.go index 2652e56d10c9..f7c5b19befdc 100644 --- a/test/framework/deployment_helpers.go +++ b/test/framework/deployment_helpers.go @@ -47,6 +47,9 @@ import ( type WaitForDeploymentsAvailableInput struct { Getter Getter Deployment *appsv1.Deployment + GetLister GetLister + ClientSet *kubernetes.Clientset + LogPath string } // WaitForDeploymentsAvailable waits until the Deployment has status.Available = True, that signals that @@ -69,7 +72,17 @@ func WaitForDeploymentsAvailable(ctx context.Context, input WaitForDeploymentsAv } } return false - }, intervals...).Should(BeTrue(), func() string { return DescribeFailedDeployment(input, deployment) }) + }, intervals...).Should(BeTrue(), func() string { + if input.GetLister != nil && input.ClientSet != nil && input.LogPath != "" { + GetDeploymentLogs(ctx, GetDeploymentLogsInput{ + Deployment: deployment, + GetLister: input.GetLister, + ClientSet: input.ClientSet, + LogPath: input.LogPath, + }) + } + return DescribeFailedDeployment(input, deployment) + }) } // DescribeFailedDeployment returns detailed output to help debug a deployment failure in e2e. @@ -152,6 +165,68 @@ func WatchDeploymentLogs(ctx context.Context, input WatchDeploymentLogsInput) { } } +type GetDeploymentLogsInput struct { + GetLister GetLister + ClientSet *kubernetes.Clientset + Deployment *appsv1.Deployment + LogPath string +} + +func GetDeploymentLogs(ctx context.Context, input GetDeploymentLogsInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for GetDeploymentLogs") + Expect(input.ClientSet).NotTo(BeNil(), "input.ClientSet is required for GetDeploymentLogs") + Expect(input.Deployment).NotTo(BeNil(), "input.Deployment is required for GetDeploymentLogs") + + deployment := &appsv1.Deployment{} + key := client.ObjectKeyFromObject(input.Deployment) + Expect(input.GetLister.Get(ctx, key, deployment)).To(Succeed(), "Failed to get deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + selector, err := metav1.LabelSelectorAsMap(deployment.Spec.Selector) + Expect(err).NotTo(HaveOccurred(), "Failed to Pods selector for deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + pods := &corev1.PodList{} + Expect(input.GetLister.List(ctx, pods, client.InNamespace(input.Deployment.Namespace), client.MatchingLabels(selector))).To(Succeed(), "Failed to list Pods for deployment %s/%s", input.Deployment.Namespace, input.Deployment.Name) + + for _, pod := range pods.Items { + for _, container := range deployment.Spec.Template.Spec.Containers { + log.Logf("Creating log watcher for controller %s/%s, pod %s, container %s", input.Deployment.Namespace, input.Deployment.Name, pod.Name, container.Name) + + // Watch each container's logs in a goroutine so we can stream them all concurrently. + go func(pod corev1.Pod, container corev1.Container) { + defer GinkgoRecover() + + logFile := filepath.Clean(path.Join(input.LogPath, input.Deployment.Name, pod.Name, container.Name+".log")) + Expect(os.MkdirAll(filepath.Dir(logFile), 0750)).To(Succeed()) + + f, err := os.OpenFile(logFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600) + Expect(err).NotTo(HaveOccurred()) + defer f.Close() + + opts := &corev1.PodLogOptions{ + Container: container.Name, + Follow: false, + } + + podLogs, err := input.ClientSet.CoreV1().Pods(input.Deployment.Namespace).GetLogs(pod.Name, opts).Stream(ctx) + if err != nil { + // Failing to stream logs should not cause the test to fail + log.Logf("Error starting logs stream for pod %s/%s, container %s: %v", input.Deployment.Namespace, pod.Name, container.Name, err) + return + } + defer podLogs.Close() + + out := bufio.NewWriter(f) + defer out.Flush() + _, err = out.ReadFrom(podLogs) + if err != nil && err != io.ErrUnexpectedEOF { + // Failing to stream logs should not cause the test to fail + log.Logf("Got error while streaming logs for pod %s/%s, container %s: %v", input.Deployment.Namespace, pod.Name, container.Name, err) + } + }(pod, container) + } + } +} + type WatchPodMetricsInput struct { GetLister GetLister ClientSet *kubernetes.Clientset diff --git a/test/infrastructure/docker/config/manager/manager.yaml b/test/infrastructure/docker/config/manager/manager.yaml index 4454c900483b..b27cfa51aacc 100644 --- a/test/infrastructure/docker/config/manager/manager.yaml +++ b/test/infrastructure/docker/config/manager/manager.yaml @@ -31,6 +31,8 @@ spec: path: /readyz port: healthz livenessProbe: + failureThreshold: 12 + periodSeconds: 10 httpGet: path: /healthz port: healthz