Skip to content

Commit

Permalink
kubelet: podresources: per-endpoint metrics
Browse files Browse the repository at this point in the history
Before the addition of GetAllocatableResources, the
podresources API had just one endpoint `List()`, thus we could just
account for the total of the calls to have a good pulse of the API usage.
Now that we extend the API with more endpoints
(`GetAlloctableResources`), in order to improve the observability we add
per-endpoint counters, in addition to the existing counter of the total
API calls.

Signed-off-by: Francesco Romani <fromani@redhat.com>
  • Loading branch information
ffromani committed Mar 9, 2021
1 parent d7a30e1 commit 1e7bb20
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
5 changes: 5 additions & 0 deletions pkg/kubelet/apis/podresources/server_v1.go
Expand Up @@ -47,6 +47,7 @@ func NewV1PodResourcesServer(podsProvider PodsProvider, devicesProvider DevicesP
// List returns information about the resources assigned to pods on the node
func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResourcesRequest) (*v1.ListPodResourcesResponse, error) {
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
metrics.PodResourcesEndpointRequestsListCount.WithLabelValues("v1").Inc()

pods := p.podsProvider.GetPods()
podResources := make([]*v1.PodResources, len(pods))
Expand Down Expand Up @@ -76,7 +77,11 @@ func (p *v1PodResourcesServer) List(ctx context.Context, req *v1.ListPodResource

// GetAllocatableResources returns information about all the resources known by the server - this more like the capacity, not like the current amount of free resources.
func (p *v1PodResourcesServer) GetAllocatableResources(ctx context.Context, req *v1.AllocatableResourcesRequest) (*v1.AllocatableResourcesResponse, error) {
metrics.PodResourcesEndpointRequestsTotalCount.WithLabelValues("v1").Inc()
metrics.PodResourcesEndpointRequestsGetAllocatableCount.WithLabelValues("v1").Inc()

if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletPodResourcesGetAllocatable) {
metrics.PodResourcesEndpointErrorsGetAllocatableCount.WithLabelValues("v1").Inc()
return nil, fmt.Errorf("Pod Resources API GetAllocatableResources disabled")
}

Expand Down
54 changes: 53 additions & 1 deletion pkg/kubelet/metrics/metrics.go
Expand Up @@ -63,7 +63,11 @@ const (
DevicePluginRegistrationCountKey = "device_plugin_registration_total"
DevicePluginAllocationDurationKey = "device_plugin_alloc_duration_seconds"
// Metrics keys of pod resources operations
PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
PodResourcesEndpointRequestsTotalKey = "pod_resources_endpoint_requests_total"
PodResourcesEndpointRequestsListKey = "pod_resources_endpoint_requests_list"
PodResourcesEndpointRequestsGetAllocatableKey = "pod_resources_endpoint_requests_get_allocatable"
PodResourcesEndpointErrorsListKey = "pod_resources_endpoint_errors_list"
PodResourcesEndpointErrorsGetAllocatableKey = "pod_resources_endpoint_errors_get_allocatable"

// Metric keys for node config
AssignedConfigKey = "node_config_assigned"
Expand Down Expand Up @@ -293,6 +297,54 @@ var (
[]string{"server_api_version"},
)

// PodResourcesEndpointRequestsListCount is a Counter that tracks the number of requests to the PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsListKey,
Help: "Number of requests to the PodResource List endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)

// PodResourcesEndpointRequestsGetAllocatableCount is a Counter that tracks the number of requests to the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointRequestsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointRequestsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)

// PodResourcesEndpointErrorsListCount is a Counter that tracks the number of errors returned by he PodResource List() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsListCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsListKey,
Help: "Number of requests to the PodResource List endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)

// PodResourcesEndpointErrorsGetAllocatableCount is a Counter that tracks the number of errors returned by the PodResource GetAllocatableResources() endpoint.
// Broken down by server API version.
PodResourcesEndpointErrorsGetAllocatableCount = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: KubeletSubsystem,
Name: PodResourcesEndpointErrorsGetAllocatableKey,
Help: "Number of requests to the PodResource GetAllocatableResources endpoint which returned error. Broken down by server api version.",
StabilityLevel: metrics.ALPHA,
},
[]string{"server_api_version"},
)

// Metrics for node config

// AssignedConfig is a Gauge that is set 1 if the Kubelet has a NodeConfig assigned.
Expand Down

0 comments on commit 1e7bb20

Please sign in to comment.