From b9a5cdfa8c1f4e90b53407a2de6e960fb4b07781 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20M=C3=A5rtensson?= Date: Tue, 31 May 2022 17:28:04 +0200 Subject: [PATCH] Debug, bugfix --- .../grafanadashboard/dashboard_pipeline.go | 13 +++++-- .../grafanadashboard_controller.go | 39 ++++++++++--------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/controllers/grafanadashboard/dashboard_pipeline.go b/controllers/grafanadashboard/dashboard_pipeline.go index adf8467b6..c5c46a532 100644 --- a/controllers/grafanadashboard/dashboard_pipeline.go +++ b/controllers/grafanadashboard/dashboard_pipeline.go @@ -21,6 +21,7 @@ import ( "github.com/grafana-operator/grafana-operator/v4/controllers/config" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -216,6 +217,7 @@ func (r *DashboardPipelineImpl) loadDashboardFromURL() error { } if resp.StatusCode != 200 { retries := 0 + r.refreshDashboard() if r.Dashboard.Status.Error != nil { retries = r.Dashboard.Status.Error.Retries } @@ -229,7 +231,7 @@ func (r *DashboardPipelineImpl) loadDashboardFromURL() error { } if err := r.Client.Status().Update(r.Context, r.Dashboard); err != nil { - return fmt.Errorf("failed to request dashboard and failed to update status %s: %w", string(body), err) + return fmt.Errorf("failed to request dashboard: %s\nfailed to update status : %w", string(body), err) } return fmt.Errorf("request failed with status %v", resp.StatusCode) @@ -248,6 +250,7 @@ func (r *DashboardPipelineImpl) loadDashboardFromURL() error { r.JSON = json } + r.refreshDashboard() r.Dashboard.Status = v1alpha1.GrafanaDashboardStatus{ Content: r.JSON, ContentTimestamp: &metav1.Time{Time: time.Now()}, @@ -261,6 +264,10 @@ func (r *DashboardPipelineImpl) loadDashboardFromURL() error { return nil } +func (r *DashboardPipelineImpl) refreshDashboard() error { + return r.Client.Get(r.Context, types.NamespacedName{Name: r.Dashboard.Name, Namespace: r.Dashboard.Namespace}, r.Dashboard) +} + func (r *DashboardPipelineImpl) loadDashboardFromGrafanaCom() error { url, err := r.getGrafanaComDashboardUrl() if err != nil { @@ -279,6 +286,7 @@ func (r *DashboardPipelineImpl) loadDashboardFromGrafanaCom() error { } if resp.StatusCode != 200 { + r.refreshDashboard() retries := 0 if r.Dashboard.Status.Error != nil { retries = r.Dashboard.Status.Error.Retries @@ -301,8 +309,7 @@ func (r *DashboardPipelineImpl) loadDashboardFromGrafanaCom() error { r.JSON = string(body) - // Update JSON so dashboard is not refetched - + r.refreshDashboard() r.Dashboard.Status = v1alpha1.GrafanaDashboardStatus{ Content: r.JSON, ContentTimestamp: &metav1.Time{Time: time.Now()}, diff --git a/controllers/grafanadashboard/grafanadashboard_controller.go b/controllers/grafanadashboard/grafanadashboard_controller.go index fd1eb2f84..9c8b05b62 100644 --- a/controllers/grafanadashboard/grafanadashboard_controller.go +++ b/controllers/grafanadashboard/grafanadashboard_controller.go @@ -264,9 +264,9 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque } // Process new/updated dashboards - for i, dashboard := range namespaceDashboards.Items { + for _, dashboard := range namespaceDashboards.Items { // Is this a dashboard we care about (matches the label selectors)? - if !r.isMatch(&namespaceDashboards.Items[i]) { + if !r.isMatch(&dashboard) { log.Log.Info("dashboard found but selectors do not match", "namespace", dashboard.Namespace, "name", dashboard.Name) continue @@ -277,11 +277,12 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque folderName = dashboard.Spec.CustomFolderName } - if dashboard.Status.Error != nil && dashboard.Status.Error.Code == 429 { + if dashboard.Status.Error != nil { backoffDuration := 30 * time.Second * time.Duration(math.Pow(2, float64(dashboard.Status.Error.Retries))) + retryTime := dashboard.Status.ContentTimestamp.Add(backoffDuration) - if dashboard.Status.ContentTimestamp.Add(backoffDuration).After(time.Now()) { - log.Log.Info("still waiting for rate limited dashboard", "folder", folderName, "dashboard", request.Name) + if retryTime.After(time.Now()) { + log.Log.Info("delaying retry of failing dashboard", "folder", folderName, "dashboard", request.Name, "namespace", request.Namespace, "retryTime", retryTime) continue } } @@ -290,7 +291,7 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque if err != nil { log.Log.Error(err, "failed to get or create namespace folder for dashboard", "folder", folderName, "dashboard", request.Name) - r.manageError(&namespaceDashboards.Items[i], err) + r.manageError(&dashboard, err) continue } @@ -307,9 +308,9 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque // Process the dashboard. Use the known hash of an existing dashboard // to determine if an update is required - knownHash := findHash(knownDashboards, &namespaceDashboards.Items[i]) - knownUid := findUid(knownDashboards, &namespaceDashboards.Items[i]) - pipeline := NewDashboardPipeline(r.Client, &namespaceDashboards.Items[i], r.context) + knownHash := findHash(knownDashboards, &dashboard) + knownUid := findUid(knownDashboards, &dashboard) + pipeline := NewDashboardPipeline(r.Client, &dashboard, r.context) processed, err := pipeline.ProcessDashboard(knownHash, &folderId, folderName, false) // Check known dashboards exist on grafana instance and recreate if not @@ -320,32 +321,32 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque } if *response.Dashboard.ID == uint(0) { - log.Log.Info(fmt.Sprintf("Dashboard %v has been deleted via grafana console. Recreating.", namespaceDashboards.Items[i].ObjectMeta.Name)) + log.Log.Info(fmt.Sprintf("Dashboard %v has been deleted via grafana console. Recreating.", dashboard.ObjectMeta.Name)) processed, err = pipeline.ProcessDashboard(knownHash, &folderId, folderName, true) if err != nil { log.Log.Error(err, "cannot process dashboard", "namespace", dashboard.Namespace, "name", dashboard.Name) - r.manageError(&namespaceDashboards.Items[i], err) + r.manageError(&dashboard, err) continue } } } if err != nil { - log.Log.Error(err, "cannot process dashboard", "namespace", dashboard.Namespace, "name", dashboard.Name) - r.manageError(&namespaceDashboards.Items[i], err) + // log.Log.Error(err, "cannot process dashboard", "namespace", dashboard.Namespace, "name", dashboard.Name) + r.manageError(&dashboard, err) continue } if processed == nil { - r.config.SetPluginsFor(&namespaceDashboards.Items[i]) + r.config.SetPluginsFor(&dashboard) continue } // Check labels only when DashboardNamespaceSelector isnt empty if r.state.DashboardNamespaceSelector != nil { - matchesNamespaceLabels, err := r.checkNamespaceLabels(&namespaceDashboards.Items[i]) + matchesNamespaceLabels, err := r.checkNamespaceLabels(&dashboard) if err != nil { - r.manageError(&namespaceDashboards.Items[i], err) + r.manageError(&dashboard, err) continue } @@ -358,12 +359,12 @@ func (r *GrafanaDashboardReconciler) reconcileDashboards(request reconcile.Reque _, err = grafanaClient.CreateOrUpdateDashboard(processed, folderId, folderName) if err != nil { //log.Log.Error(err, "cannot submit dashboard %v/%v", "namespace", dashboard.Namespace, "name", dashboard.Name) - r.manageError(&namespaceDashboards.Items[i], err) + r.manageError(&dashboard, err) continue } - r.manageSuccess(&namespaceDashboards.Items[i], &folderId, folderName) + r.manageSuccess(&dashboard, &folderId, folderName) } for _, dashboard := range dashboardsToDelete { @@ -478,7 +479,7 @@ func (r *GrafanaDashboardReconciler) manageError(dashboard *grafanav1alpha1.Graf if k8serrors.IsConflict(issue) || k8serrors.IsServiceUnavailable(issue) { return } - log.Log.Error(issue, "error updating dashboard") + log.Log.Error(issue, "error updating dashboard", "name", dashboard.Name, "namespace", dashboard.Namespace) } func (r *GrafanaDashboardReconciler) SetupWithManager(mgr manager.Manager) error {