Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add failopen metric #107171

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 18 additions & 1 deletion staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics.go
Expand Up @@ -116,6 +116,7 @@ type AdmissionMetrics struct {
controller *metricSet
webhook *metricSet
webhookRejection *metrics.CounterVec
webhookFailOpen *metrics.CounterVec
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please see my comment in #107146 (comment). I wonder if we can achieve the goal by reusing / extending the existing metrics.

webhookRequest *metrics.CounterVec
}

Expand Down Expand Up @@ -196,6 +197,16 @@ func newAdmissionMetrics() *AdmissionMetrics {
},
[]string{"name", "type", "operation", "error_type", "rejection_code"})

webhookFailOpen := metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "webhook_fail_open_count",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This must be suffixed _total as per instrumentation guidelines.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@logicalhan let me fix that, will open a PR shortly adding you are reviewer

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@logicalhan the file contains 2 counter metrics:

  • webhook_rejection_count (preexisting)
  • webhook_fail_open_count (new added by me)

Do you want me to address both? renaming them to:

  • webhook_rejection_total
  • webhook_fail_open_total

Help: "Admission webhook fail open count, identified by name and broken out for each admission type (validating or mutating).",
StabilityLevel: metrics.ALPHA,
},
[]string{"name", "type"})

webhookRequest := metrics.NewCounterVec(
&metrics.CounterOpts{
Namespace: namespace,
Expand All @@ -210,8 +221,9 @@ func newAdmissionMetrics() *AdmissionMetrics {
controller.mustRegister()
webhook.mustRegister()
legacyregistry.MustRegister(webhookRejection)
legacyregistry.MustRegister(webhookFailOpen)
legacyregistry.MustRegister(webhookRequest)
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection, webhookRequest: webhookRequest}
return &AdmissionMetrics{step: step, controller: controller, webhook: webhook, webhookRejection: webhookRejection, webhookFailOpen: webhookFailOpen, webhookRequest: webhookRequest}
}

func (m *AdmissionMetrics) reset() {
Expand Down Expand Up @@ -250,6 +262,11 @@ func (m *AdmissionMetrics) ObserveWebhookRejection(ctx context.Context, name, st
m.webhookRejection.WithContext(ctx).WithLabelValues(name, stepType, operation, string(errorType), strconv.Itoa(rejectionCode)).Inc()
}

// ObserveWebhookFailOpen records validating or mutating webhook that fail open.
func (m *AdmissionMetrics) ObserveWebhookFailOpen(ctx context.Context, name, stepType string) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a unit test here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done with commit 0844891, thanks to point this out.

m.webhookFailOpen.WithContext(ctx).WithLabelValues(name, stepType).Inc()
}

type metricSet struct {
latencies *metrics.HistogramVec
latenciesSummary *metrics.SummaryVec
Expand Down
17 changes: 17 additions & 0 deletions staging/src/k8s.io/apiserver/pkg/admission/metrics/metrics_test.go
Expand Up @@ -159,6 +159,23 @@ func TestObserveWebhookRejection(t *testing.T) {
expectCounterValue(t, "apiserver_admission_webhook_rejection_count", wantLabelsAPIServerInternalError, 1)
}

func TestObserveWebhookFailOpen(t *testing.T) {
defer Metrics.reset()
defer legacyregistry.Reset()
Metrics.ObserveWebhookFailOpen(context.TODO(), "x", stepAdmit)
Metrics.ObserveWebhookFailOpen(context.TODO(), "x", stepValidate)
wantLabelsCounterAdmit := map[string]string{
"name": "x",
"type": "admit",
}
wantLabelsCounterValidate := map[string]string{
"name": "x",
"type": "validate",
}
expectCounterValue(t, "apiserver_admission_webhook_fail_open_count", wantLabelsCounterAdmit, 1)
expectCounterValue(t, "apiserver_admission_webhook_fail_open_count", wantLabelsCounterValidate, 1)
}

func TestWithMetrics(t *testing.T) {
defer Metrics.reset()
defer legacyregistry.Reset()
Expand Down
Expand Up @@ -178,7 +178,7 @@ func (a *mutatingDispatcher) Dispatch(ctx context.Context, attr admission.Attrib
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
if ignoreClientCallFailures {
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)

admissionmetrics.Metrics.ObserveWebhookFailOpen(ctx, hook.Name, "admit")
annotator.addFailedOpenAnnotation()

utilruntime.HandleError(callErr)
Expand Down
Expand Up @@ -140,7 +140,7 @@ func (d *validatingDispatcher) Dispatch(ctx context.Context, attr admission.Attr
if callErr, ok := err.(*webhookutil.ErrCallingWebhook); ok {
if ignoreClientCallFailures {
klog.Warningf("Failed calling webhook, failing open %v: %v", hook.Name, callErr)

admissionmetrics.Metrics.ObserveWebhookFailOpen(ctx, hook.Name, "validating")
key := fmt.Sprintf("%sround_0_index_%d", ValidatingAuditAnnotationFailedOpenKeyPrefix, idx)
value := hook.Name
if err := versionedAttr.Attributes.AddAnnotation(key, value); err != nil {
Expand Down