Skip to content

Commit

Permalink
Merge metrics to fire singleton metrics to controller-runtime namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Mar 1, 2023
1 parent 3e5ec9b commit 8107489
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 42 deletions.
2 changes: 1 addition & 1 deletion pkg/controllers/metrics/pod/controller.go
Expand Up @@ -112,7 +112,7 @@ func NewController(kubeClient client.Client) controller.Controller {
}

func (c *Controller) Name() string {
return "podmetrics"
return "pod_metrics"
}

// Reconcile executes a termination control loop for the resource
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/metrics/provisioner/controller.go
Expand Up @@ -101,7 +101,7 @@ func NewController(kubeClient client.Client) corecontroller.Controller {
}

func (c *Controller) Name() string {
return "provisionermetrics"
return "provisioner_metrics"
}

// Reconcile executes a termination control loop for the resource
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/metrics/state/controller.go
Expand Up @@ -41,7 +41,7 @@ func NewController(cluster *state.Cluster) *Controller {
}

func (c *Controller) Name() string {
return "metricscraper"
return "metric_scraper"
}

func (c *Controller) Builder(_ context.Context, mgr manager.Manager) controller.Builder {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/provisioning/controller.go
Expand Up @@ -49,7 +49,7 @@ func NewController(kubeClient client.Client, provisioner *Provisioner, recorder
}

func (c *Controller) Name() string {
return "provisioning"
return "provisioner_trigger"
}

// Reconcile the resource
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/provisioning/provisioner.go
Expand Up @@ -448,7 +448,7 @@ func validateNodeSelectorTerm(term v1.NodeSelectorTerm) (errs error) {
var schedulingDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metrics.Namespace,
Subsystem: "allocation_controller",
Subsystem: "provisioner",
Name: "scheduling_duration_seconds",
Help: "Duration of scheduling process in seconds. Broken down by provisioner and error.",
Buckets: metrics.DurationBuckets(),
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/state/informer/node.go
Expand Up @@ -45,7 +45,7 @@ func NewNodeController(kubeClient client.Client, cluster *state.Cluster) corecon
}

func (c *NodeController) Name() string {
return "node-state"
return "node_state"
}

func (c *NodeController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/state/informer/pod.go
Expand Up @@ -47,7 +47,7 @@ func NewPodController(kubeClient client.Client, cluster *state.Cluster) corecont
}

func (c *PodController) Name() string {
return "pod-state"
return "pod_state"
}

func (c *PodController) Reconcile(ctx context.Context, req reconcile.Request) (reconcile.Result, error) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/state/informer/provisioner.go
Expand Up @@ -46,7 +46,7 @@ func NewProvisionerController(kubeClient client.Client, cluster *state.Cluster)
}

func (c *ProvisionerController) Name() string {
return "provisionerstate"
return "provisioner_state"
}

func (c *ProvisionerController) Reconcile(_ context.Context, _ *v1alpha5.Provisioner) (reconcile.Result, error) {
Expand Down
121 changes: 87 additions & 34 deletions pkg/operator/controller/singleton.go
Expand Up @@ -16,11 +16,11 @@ package controller

import (
"context"
"errors"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"k8s.io/client-go/util/workqueue"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/manager"
Expand All @@ -47,45 +47,32 @@ func (b SingletonBuilder) Complete(r Reconciler) error {

type Singleton struct {
Reconciler
metrics *singletonMetrics
rateLimiter ratelimiter.RateLimiter
}

type singletonMetrics struct {
reconcileDuration prometheus.Histogram
reconcileErrors prometheus.Counter
}

func newSingleton(r Reconciler) *Singleton {
return &Singleton{
s := &Singleton{
Reconciler: r,
metrics: newSingletonMetrics(r.Name()),
rateLimiter: workqueue.DefaultItemBasedRateLimiter(),
}
s.initMetrics()
return s
}

func newSingletonMetrics(name string) *singletonMetrics {
metrics := &singletonMetrics{
reconcileDuration: prometheus.NewHistogram(
prometheus.HistogramOpts{
Namespace: metrics.Namespace,
Subsystem: strings.ReplaceAll(name, ".", "_"),
Name: "reconcile_time_seconds",
Help: "Length of time per reconcile.",
Buckets: metrics.DurationBuckets(),
},
),
reconcileErrors: prometheus.NewCounter(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: strings.ReplaceAll(name, ".", "_"),
Name: "reconcile_errors_total",
Help: "Total number of reconcile errors.",
},
),
}
crmetrics.Registry.MustRegister(metrics.reconcileDuration, metrics.reconcileErrors)
return metrics
func (s *Singleton) metricName() string {
return strings.ReplaceAll(s.Name(), ".", "_")
}

// initMetrics is effectively the same metrics initialization function used by controller-runtime
// https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/internal/controller/controller.go
func (s *Singleton) initMetrics() {
activeWorkers.WithLabelValues(s.metricName()).Set(0)
reconcileErrors.WithLabelValues(s.metricName()).Add(0)
reconcileTotal.WithLabelValues(s.metricName(), labelError).Add(0)
reconcileTotal.WithLabelValues(s.metricName(), labelRequeueAfter).Add(0)
reconcileTotal.WithLabelValues(s.metricName(), labelRequeue).Add(0)
reconcileTotal.WithLabelValues(s.metricName(), labelSuccess).Add(0)
workerCount.WithLabelValues(s.metricName()).Set(float64(1))
}

var singletonRequest = reconcile.Request{}
Expand All @@ -105,23 +92,89 @@ func (s *Singleton) Start(ctx context.Context) error {
}

func (s *Singleton) reconcile(ctx context.Context) time.Duration {
measureDuration := metrics.Measure(s.metrics.reconcileDuration)
activeWorkers.WithLabelValues(s.metricName()).Inc()
defer activeWorkers.WithLabelValues(s.metricName()).Dec()

measureDuration := metrics.Measure(reconcileDuration.WithLabelValues(s.metricName()))
res, err := s.Reconcile(ctx, singletonRequest)
measureDuration() // Observe the length of time between the function creation and now

switch {
case err != nil:
s.metrics.reconcileErrors.Inc()
reconcileErrors.WithLabelValues(s.metricName()).Inc()
reconcileTotal.WithLabelValues(s.metricName(), labelError).Inc()
logging.FromContext(ctx).Error(err)
return s.rateLimiter.When(singletonRequest)
case res.Requeue:
reconcileTotal.WithLabelValues(s.metricName(), labelRequeue).Inc()
return s.rateLimiter.When(singletonRequest)
default:
s.rateLimiter.Forget(singletonRequest)
return lo.Ternary(res.RequeueAfter > 0, res.RequeueAfter, time.Duration(0))
switch {
case res.RequeueAfter > 0:
reconcileTotal.WithLabelValues(s.metricName(), labelRequeueAfter).Inc()
return res.RequeueAfter
default:
reconcileTotal.WithLabelValues(s.metricName(), labelSuccess).Inc()
return time.Duration(0)
}
}
}

func (s *Singleton) NeedLeaderElection() bool {
return true
}

func init() {
mergeMetrics()
}

const (
labelError = "error"
labelRequeueAfter = "requeue_after"
labelRequeue = "requeue"
labelSuccess = "success"
)

// Metrics below are copied metrics fired by controller-runtime in its /internal package. This is leveraged
// so that we can fire to the same namespace as users expect other controller-runtime metrics to be fired
// https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/internal/controller/metrics/metrics.go
var (
reconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_total",
Help: "Total number of reconciliations per controller",
}, []string{"controller", "result"})
reconcileDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "controller_runtime_reconcile_time_seconds",
Help: "Length of time per reconciliation per controller",
Buckets: metrics.DurationBuckets(),
}, []string{"controller"})
reconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_errors_total",
Help: "Total number of reconciliation errors per controller",
}, []string{"controller"})
workerCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "controller_runtime_max_concurrent_reconciles",
Help: "Maximum number of concurrent reconciles per controller",
}, []string{"controller"})
activeWorkers = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "controller_runtime_active_workers",
Help: "Number of currently used workers per controller",
}, []string{"controller"})
)

// mergeMetrics merges the singletonMetrics with metrics already registered in the controller-runtime metrics registry
// https://github.com/kubernetes-sigs/controller-runtime/blob/main/pkg/internal/controller/metrics/metrics.go
func mergeMetrics() {
err := &prometheus.AlreadyRegisteredError{}
errors.As(crmetrics.Registry.Register(reconcileTotal), err)
reconcileTotal = err.ExistingCollector.(*prometheus.CounterVec)
errors.As(crmetrics.Registry.Register(reconcileDuration), err)
reconcileDuration = err.ExistingCollector.(*prometheus.HistogramVec)
errors.As(crmetrics.Registry.Register(reconcileErrors), err)
reconcileErrors = err.ExistingCollector.(*prometheus.CounterVec)
errors.As(crmetrics.Registry.Register(workerCount), err)
workerCount = err.ExistingCollector.(*prometheus.GaugeVec)
errors.As(crmetrics.Registry.Register(activeWorkers), err)
activeWorkers = err.ExistingCollector.(*prometheus.GaugeVec)
}

0 comments on commit 8107489

Please sign in to comment.