From 1e7bb20c52e452a7ca061ca1dda1936e9df1f266 Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Mon, 8 Feb 2021 10:28:42 +0100 Subject: [PATCH] kubelet: podresources: per-endpoint metrics Before the addition of GetAllocatableResources, the podresources API had just one endpoint `List()`, thus we could just account for the total of the calls to have a good pulse of the API usage. Now that we extend the API with more endpoints (`GetAlloctableResources`), in order to improve the observability we add per-endpoint counters, in addition to the existing counter of the total API calls. Signed-off-by: Francesco Romani --- pkg/kubelet/apis/podresources/server_v1.go | 5 ++ pkg/kubelet/metrics/metrics.go | 54 +++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pkg/kubelet/apis/podresources/server_v1.go b/pkg/kubelet/apis/podresources/server_v1.go index b5d64f28146d..bd29ed9fc53e 100644 --- a/pkg/kubelet/apis/podresources/server_v1.go +++ b/pkg/kubelet/apis/podresources/server_v1.go @@ -47,6 +47,7 @@ func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesP // List returns information about the resources assigned to pods on the node func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResourcesRequest) (*v1.ListPodResourcesResponse, error) { metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc() + metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc() pods := p.podsProvider.GetPods() podResources := make([]*v1.PodResources, len(pods)) @@ -76,7 +77,11 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource // GetAllocatableResources returns information about all the resources known by the server - this more like the capacity, not like the current amount of free resources. func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req *v1.AllocatableResourcesRequest) (*v1.AllocatableResourcesResponse, error) { + metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc() + metrics.PodResourcesEndpointRequestsGetAllocatableCount.WithLabelValues("v1").Inc() + if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesGetAllocatable) { + metrics.PodResourcesEndpointErrorsGetAllocatableCount.WithLabelValues("v1").Inc() return nil, fmt.Errorf("Pod Resources API GetAllocatableResources disabled") } diff --git a/pkg/kubelet/metrics/metrics.go b/pkg/kubelet/metrics/metrics.go index 61269fedd400..6fb40e1875ba 100644 --- a/pkg/kubelet/metrics/metrics.go +++ b/pkg/kubelet/metrics/metrics.go @@ -63,7 +63,11 @@ const ( DevicePluginRegistrationCountKey = "device_plugin_registration_total" DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds" // Metrics keys of pod resources operations - PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total" + PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total" + PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list" + PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable" + PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list" + PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable" // Metric keys for node config AssignedConfigKey = "node_config_assigned" @@ -293,6 +297,54 @@ var ( []string{"server_api_version"}, ) + // PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint. + // Broken down by server API version. + PodResourcesEndpointRequestsListCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointRequestsListKey, + Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint. + // Broken down by server API version. + PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointRequestsGetAllocatableKey, + Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint. + // Broken down by server API version. + PodResourcesEndpointErrorsListCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointErrorsListKey, + Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + + // PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint. + // Broken down by server API version. + PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec( + &metrics.CounterOpts{ + Subsystem: KubeletSubsystem, + Name: PodResourcesEndpointErrorsGetAllocatableKey, + Help: "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.", + StabilityLevel: metrics.ALPHA, + }, + []string{"server_api_version"}, + ) + // Metrics for node config // AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.