From 462d8676960963b161face2f81ed9a48ecf67f9e Mon Sep 17 00:00:00 2001 From: Zach Reyes <39203661+zasweq@users.noreply.github.com> Date: Mon, 9 May 2022 16:13:07 -0400 Subject: [PATCH] xds: Add Outlier Detection configuration and CDS handling (#5299) xds: Add Outlier Detection configuration and CDS handling --- .../balancer/cdsbalancer/cdsbalancer.go | 51 +++++ .../cdsbalancer/cdsbalancer_security_test.go | 14 +- .../balancer/cdsbalancer/cdsbalancer_test.go | 199 ++++++++++++++++-- .../balancer/clusterresolver/config.go | 6 + .../balancer/outlierdetection/config.go | 184 ++++++++++++++++ .../xdsclient/xdsresource/type_cds.go | 2 +- 6 files changed, 435 insertions(+), 21 deletions(-) create mode 100644 xds/internal/balancer/outlierdetection/config.go diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer.go b/xds/internal/balancer/cdsbalancer/cdsbalancer.go index 0be796c47ba..d057ed66a53 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer.go @@ -29,6 +29,7 @@ import ( "google.golang.org/grpc/credentials/tls/certprovider" "google.golang.org/grpc/internal/buffer" xdsinternal "google.golang.org/grpc/internal/credentials/xds" + "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpclog" "google.golang.org/grpc/internal/grpcsync" "google.golang.org/grpc/internal/pretty" @@ -36,6 +37,7 @@ import ( "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/clusterresolver" + "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/xdsclient" "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" @@ -270,6 +272,52 @@ func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanc return provider, nil } +func outlierDetectionToConfig(od *xdsresource.OutlierDetection) *outlierdetection.LBConfig { // Already validated - no need to return error + if od == nil { + // "If the outlier_detection field is not set in the Cluster message, a + // "no-op" outlier_detection config will be generated, with interval set + // to the maximum possible value and all other fields unset." - A50 + return &outlierdetection.LBConfig{ + Interval: 1<<63 - 1, + } + } + + // "if the enforcing_success_rate field is set to 0, the config + // success_rate_ejection field will be null and all success_rate_* fields + // will be ignored." - A50 + var sre *outlierdetection.SuccessRateEjection + if od.EnforcingSuccessRate != 0 { + sre = &outlierdetection.SuccessRateEjection{ + StdevFactor: od.SuccessRateStdevFactor, + EnforcementPercentage: od.EnforcingSuccessRate, + MinimumHosts: od.SuccessRateMinimumHosts, + RequestVolume: od.SuccessRateRequestVolume, + } + } + + // "If the enforcing_failure_percent field is set to 0 or null, the config + // failure_percent_ejection field will be null and all failure_percent_* + // fields will be ignored." - A50 + var fpe *outlierdetection.FailurePercentageEjection + if od.EnforcingFailurePercentage != 0 { + fpe = &outlierdetection.FailurePercentageEjection{ + Threshold: od.FailurePercentageThreshold, + EnforcementPercentage: od.EnforcingFailurePercentage, + MinimumHosts: od.FailurePercentageMinimumHosts, + RequestVolume: od.FailurePercentageRequestVolume, + } + } + + return &outlierdetection.LBConfig{ + Interval: od.Interval, + BaseEjectionTime: od.BaseEjectionTime, + MaxEjectionTime: od.MaxEjectionTime, + MaxEjectionPercent: od.MaxEjectionPercent, + SuccessRateEjection: sre, + FailurePercentageEjection: fpe, + } +} + // handleWatchUpdate handles a watch update from the xDS Client. Good updates // lead to clientConn updates being invoked on the underlying cluster_resolver balancer. func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { @@ -342,6 +390,9 @@ func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { default: b.logger.Infof("unexpected cluster type %v when handling update from cluster handler", cu.ClusterType) } + if envconfig.XDSOutlierDetection { + dms[i].OutlierDetection = outlierDetectionToConfig(cu.OutlierDetection) + } } lbCfg := &clusterresolver.LBConfig{ DiscoveryMechanisms: dms, diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go index cd93dd0ecd8..c58990ab34d 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_security_test.go @@ -250,7 +250,7 @@ func (s) TestSecurityConfigWithoutXDSCreds(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -306,7 +306,7 @@ func (s) TestNoSecurityConfigWithXDSCreds(t *testing.T) { // newChildBalancer function as part of test setup. No security config is // passed to the CDS balancer as part of this update. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -462,7 +462,7 @@ func (s) TestSecurityConfigUpdate_BadToGood(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -496,7 +496,7 @@ func (s) TestGoodSecurityConfig(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -549,7 +549,7 @@ func (s) TestSecurityConfigUpdate_GoodToFallback(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -599,7 +599,7 @@ func (s) TestSecurityConfigUpdate_GoodToBad(t *testing.T) { // create a new EDS balancer. The fake EDS balancer created above will be // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdateWithGoodSecurityCfg, nil}, wantCCS, edsB); err != nil { @@ -677,7 +677,7 @@ func (s) TestSecurityConfigUpdate_GoodToGood(t *testing.T) { SubjectAltNameMatchers: testSANMatchers, }, } - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { diff --git a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go index 112d25df333..b15481f318b 100644 --- a/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go +++ b/xds/internal/balancer/cdsbalancer/cdsbalancer_test.go @@ -29,12 +29,14 @@ import ( "google.golang.org/grpc/balancer" "google.golang.org/grpc/connectivity" "google.golang.org/grpc/internal" + "google.golang.org/grpc/internal/envconfig" "google.golang.org/grpc/internal/grpctest" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/internal/testutils" "google.golang.org/grpc/resolver" "google.golang.org/grpc/serviceconfig" "google.golang.org/grpc/xds/internal/balancer/clusterresolver" + "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/testutils/fakeclient" "google.golang.org/grpc/xds/internal/xdsclient" @@ -49,10 +51,15 @@ const ( defaultTestShortTimeout = 10 * time.Millisecond // For events expected to *not* happen. ) -var defaultTestAuthorityServerConfig = &bootstrap.ServerConfig{ - ServerURI: "self_server", - CredsType: "self_creds", -} +var ( + defaultTestAuthorityServerConfig = &bootstrap.ServerConfig{ + ServerURI: "self_server", + CredsType: "self_creds", + } + noopODLBCfg = &outlierdetection.LBConfig{ + Interval: 1<<63 - 1, + } +) type s struct { grpctest.Tester @@ -208,11 +215,12 @@ func cdsCCS(cluster string, xdsC xdsclient.XDSClient) balancer.ClientConnState { // edsCCS is a helper function to construct a good update passed from the // cdsBalancer to the edsBalancer. -func edsCCS(service string, countMax *uint32, enableLRS bool, xdslbpolicy *internalserviceconfig.BalancerConfig) balancer.ClientConnState { +func edsCCS(service string, countMax *uint32, enableLRS bool, xdslbpolicy *internalserviceconfig.BalancerConfig, odConfig *outlierdetection.LBConfig) balancer.ClientConnState { discoveryMechanism := clusterresolver.DiscoveryMechanism{ Type: clusterresolver.DiscoveryMechanismTypeEDS, Cluster: service, MaxConcurrentRequests: countMax, + OutlierDetection: odConfig, } if enableLRS { discoveryMechanism.LoadReportingServer = defaultTestAuthorityServerConfig @@ -358,11 +366,14 @@ func (s) TestUpdateClientConnStateWithSameState(t *testing.T) { // different updates and verifies that the expect ClientConnState is propagated // to the edsBalancer. func (s) TestHandleClusterUpdate(t *testing.T) { + oldOutlierDetection := envconfig.XDSOutlierDetection + envconfig.XDSOutlierDetection = true xdsC, cdsB, edsB, _, cancel := setupWithWatch(t) xdsC.SetBootstrapConfig(&bootstrap.Config{ XDSServer: defaultTestAuthorityServerConfig, }) defer func() { + envconfig.XDSOutlierDetection = oldOutlierDetection cancel() cdsB.Close() }() @@ -376,12 +387,12 @@ func (s) TestHandleClusterUpdate(t *testing.T) { { name: "happy-case-with-lrs", cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName, LRSServerConfig: xdsresource.ClusterLRSServerSelf}, - wantCCS: edsCCS(serviceName, nil, true, nil), + wantCCS: edsCCS(serviceName, nil, true, nil, noopODLBCfg), }, { name: "happy-case-without-lrs", cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName}, - wantCCS: edsCCS(serviceName, nil, false, nil), + wantCCS: edsCCS(serviceName, nil, false, nil, noopODLBCfg), }, { name: "happy-case-with-ring-hash-lb-policy", @@ -392,6 +403,41 @@ func (s) TestHandleClusterUpdate(t *testing.T) { wantCCS: edsCCS(serviceName, nil, false, &internalserviceconfig.BalancerConfig{ Name: ringhash.Name, Config: &ringhash.LBConfig{MinRingSize: 10, MaxRingSize: 100}, + }, noopODLBCfg), + }, + { + name: "happy-case-outlier-detection", + cdsUpdate: xdsresource.ClusterUpdate{ClusterName: serviceName, OutlierDetection: &xdsresource.OutlierDetection{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateStdevFactor: 1900, + EnforcingSuccessRate: 100, + SuccessRateMinimumHosts: 5, + SuccessRateRequestVolume: 100, + FailurePercentageThreshold: 85, + EnforcingFailurePercentage: 5, + FailurePercentageMinimumHosts: 5, + FailurePercentageRequestVolume: 50, + }}, + wantCCS: edsCCS(serviceName, nil, false, nil, &outlierdetection.LBConfig{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateEjection: &outlierdetection.SuccessRateEjection{ + StdevFactor: 1900, + EnforcementPercentage: 100, + MinimumHosts: 5, + RequestVolume: 100, + }, + FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ + Threshold: 85, + EnforcementPercentage: 5, + MinimumHosts: 5, + RequestVolume: 50, + }, }), }, } @@ -460,7 +506,7 @@ func (s) TestHandleClusterUpdateError(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -545,7 +591,7 @@ func (s) TestResolverError(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { t.Fatal(err) } @@ -594,7 +640,7 @@ func (s) TestUpdateSubConnState(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -629,7 +675,7 @@ func (s) TestCircuitBreaking(t *testing.T) { // the service's counter with the new max requests. var maxRequests uint32 = 1 cdsUpdate := xdsresource.ClusterUpdate{ClusterName: clusterName, MaxRequests: &maxRequests} - wantCCS := edsCCS(clusterName, &maxRequests, false, nil) + wantCCS := edsCCS(clusterName, &maxRequests, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -662,7 +708,7 @@ func (s) TestClose(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -733,7 +779,7 @@ func (s) TestExitIdle(t *testing.T) { // returned to the CDS balancer, because we have overridden the // newChildBalancer function as part of test setup. cdsUpdate := xdsresource.ClusterUpdate{ClusterName: serviceName} - wantCCS := edsCCS(serviceName, nil, false, nil) + wantCCS := edsCCS(serviceName, nil, false, nil, nil) ctx, ctxCancel := context.WithTimeout(context.Background(), defaultTestTimeout) defer ctxCancel() if err := invokeWatchCbAndWait(ctx, xdsC, cdsWatchInfo{cdsUpdate, nil}, wantCCS, edsB); err != nil { @@ -795,3 +841,130 @@ func (s) TestParseConfig(t *testing.T) { }) } } + +func (s) TestOutlierDetectionToConfig(t *testing.T) { + tests := []struct { + name string + od *xdsresource.OutlierDetection + odLBCfgWant *outlierdetection.LBConfig + }{ + // "if the outlier_detection field is not set in the Cluster resource, + // a "no-op" outlier_detection config will be generated in the + // corresponding DiscoveryMechanism config, with interval set to the + // maximum possible value and all other fields unset." - A50 + { + name: "no-op-outlier-detection-config", + od: nil, + odLBCfgWant: noopODLBCfg, + }, + // "if the enforcing_success_rate field is set to 0, the config + // success_rate_ejection field will be null and all success_rate_* + // fields will be ignored." - A50 + { + name: "enforcing-success-rate-zero", + od: &xdsresource.OutlierDetection{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateStdevFactor: 1900, + EnforcingSuccessRate: 0, + SuccessRateMinimumHosts: 5, + SuccessRateRequestVolume: 100, + FailurePercentageThreshold: 85, + EnforcingFailurePercentage: 5, + FailurePercentageMinimumHosts: 5, + FailurePercentageRequestVolume: 50, + }, + odLBCfgWant: &outlierdetection.LBConfig{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateEjection: nil, + FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ + Threshold: 85, + EnforcementPercentage: 5, + MinimumHosts: 5, + RequestVolume: 50, + }, + }, + }, + // "If the enforcing_failure_percent field is set to 0 or null, the + // config failure_percent_ejection field will be null and all + // failure_percent_* fields will be ignored." - A50 + { + name: "enforcing-failure-percentage-zero", + od: &xdsresource.OutlierDetection{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateStdevFactor: 1900, + EnforcingSuccessRate: 100, + SuccessRateMinimumHosts: 5, + SuccessRateRequestVolume: 100, + FailurePercentageThreshold: 85, + EnforcingFailurePercentage: 0, + FailurePercentageMinimumHosts: 5, + FailurePercentageRequestVolume: 50, + }, + odLBCfgWant: &outlierdetection.LBConfig{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateEjection: &outlierdetection.SuccessRateEjection{ + StdevFactor: 1900, + EnforcementPercentage: 100, + MinimumHosts: 5, + RequestVolume: 100, + }, + FailurePercentageEjection: nil, + }, + }, + { + name: "normal-conversion", + od: &xdsresource.OutlierDetection{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateStdevFactor: 1900, + EnforcingSuccessRate: 100, + SuccessRateMinimumHosts: 5, + SuccessRateRequestVolume: 100, + FailurePercentageThreshold: 85, + EnforcingFailurePercentage: 5, + FailurePercentageMinimumHosts: 5, + FailurePercentageRequestVolume: 50, + }, + odLBCfgWant: &outlierdetection.LBConfig{ + Interval: 10 * time.Second, + BaseEjectionTime: 30 * time.Second, + MaxEjectionTime: 300 * time.Second, + MaxEjectionPercent: 10, + SuccessRateEjection: &outlierdetection.SuccessRateEjection{ + StdevFactor: 1900, + EnforcementPercentage: 100, + MinimumHosts: 5, + RequestVolume: 100, + }, + FailurePercentageEjection: &outlierdetection.FailurePercentageEjection{ + Threshold: 85, + EnforcementPercentage: 5, + MinimumHosts: 5, + RequestVolume: 50, + }, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + odLBCfgGot := outlierDetectionToConfig(test.od) + if diff := cmp.Diff(odLBCfgGot, test.odLBCfgWant); diff != "" { + t.Fatalf("outlierDetectionToConfig(%v) (-want, +got):\n%s", test.od, diff) + } + }) + } +} diff --git a/xds/internal/balancer/clusterresolver/config.go b/xds/internal/balancer/clusterresolver/config.go index 363afd03ab2..1cbffdfa52f 100644 --- a/xds/internal/balancer/clusterresolver/config.go +++ b/xds/internal/balancer/clusterresolver/config.go @@ -26,6 +26,7 @@ import ( "google.golang.org/grpc/balancer/roundrobin" internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" "google.golang.org/grpc/serviceconfig" + "google.golang.org/grpc/xds/internal/balancer/outlierdetection" "google.golang.org/grpc/xds/internal/balancer/ringhash" "google.golang.org/grpc/xds/internal/xdsclient/bootstrap" ) @@ -102,6 +103,9 @@ type DiscoveryMechanism struct { // DNSHostname is the DNS name to resolve in "host:port" form. For type // LOGICAL_DNS only. DNSHostname string `json:"dnsHostname,omitempty"` + // OutlierDetection is the Outlier Detection LB configuration for this + // priority. + OutlierDetection *outlierdetection.LBConfig `json:"outlierDetection,omitempty"` } // Equal returns whether the DiscoveryMechanism is the same with the parameter. @@ -117,6 +121,8 @@ func (dm DiscoveryMechanism) Equal(b DiscoveryMechanism) bool { return false case dm.DNSHostname != b.DNSHostname: return false + case !dm.OutlierDetection.Equal(b.OutlierDetection): + return false } if dm.LoadReportingServer == nil && b.LoadReportingServer == nil { diff --git a/xds/internal/balancer/outlierdetection/config.go b/xds/internal/balancer/outlierdetection/config.go new file mode 100644 index 00000000000..8b0cdcab406 --- /dev/null +++ b/xds/internal/balancer/outlierdetection/config.go @@ -0,0 +1,184 @@ +/* + * + * Copyright 2022 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package outlierdetection implements a balancer that implements +// Outlier Detection. +package outlierdetection + +import ( + "time" + + "github.com/google/go-cmp/cmp" + internalserviceconfig "google.golang.org/grpc/internal/serviceconfig" + "google.golang.org/grpc/serviceconfig" +) + +// SuccessRateEjection is parameters for the success rate ejection algorithm. +// This algorithm monitors the request success rate for all endpoints and ejects +// individual endpoints whose success rates are statistical outliers. +type SuccessRateEjection struct { + // StddevFactor is used to determine the ejection threshold for + // success rate outlier ejection. The ejection threshold is the difference + // between the mean success rate, and the product of this factor and the + // standard deviation of the mean success rate: mean - (stdev * + // success_rate_stdev_factor). This factor is divided by a thousand to get a + // double. That is, if the desired factor is 1.9, the runtime value should + // be 1900. Defaults to 1900. + StdevFactor uint32 `json:"stdevFactor,omitempty"` + // EnforcementPercentage is the % chance that a host will be actually ejected + // when an outlier status is detected through success rate statistics. This + // setting can be used to disable ejection or to ramp it up slowly. Defaults + // to 100. + EnforcementPercentage uint32 `json:"enforcementPercentage,omitempty"` + // MinimumHosts is the number of hosts in a cluster that must have enough + // request volume to detect success rate outliers. If the number of hosts is + // less than this setting, outlier detection via success rate statistics is + // not performed for any host in the cluster. Defaults to 5. + MinimumHosts uint32 `json:"minimumHosts,omitempty"` + // RequestVolume is the minimum number of total requests that must be + // collected in one interval (as defined by the interval duration above) to + // include this host in success rate based outlier detection. If the volume + // is lower than this setting, outlier detection via success rate statistics + // is not performed for that host. Defaults to 100. + RequestVolume uint32 `json:"requestVolume,omitempty"` +} + +// Equal returns whether the SuccessRateEjection is the same with the parameter. +func (sre *SuccessRateEjection) Equal(sre2 *SuccessRateEjection) bool { + if sre == nil && sre2 == nil { + return true + } + if (sre != nil) != (sre2 != nil) { + return false + } + if sre.StdevFactor != sre2.StdevFactor { + return false + } + if sre.EnforcementPercentage != sre2.EnforcementPercentage { + return false + } + if sre.MinimumHosts != sre2.MinimumHosts { + return false + } + return sre.RequestVolume == sre2.RequestVolume +} + +// FailurePercentageEjection is parameters for the failure percentage algorithm. +// This algorithm ejects individual endpoints whose failure rate is greater than +// some threshold, independently of any other endpoint. +type FailurePercentageEjection struct { + // Threshold is the failure percentage to use when determining failure + // percentage-based outlier detection. If the failure percentage of a given + // host is greater than or equal to this value, it will be ejected. Defaults + // to 85. + Threshold uint32 `json:"threshold,omitempty"` + // EnforcementPercentage is the % chance that a host will be actually + // ejected when an outlier status is detected through failure percentage + // statistics. This setting can be used to disable ejection or to ramp it up + // slowly. Defaults to 0. + EnforcementPercentage uint32 `json:"enforcementPercentage,omitempty"` + // MinimumHosts is the minimum number of hosts in a cluster in order to + // perform failure percentage-based ejection. If the total number of hosts + // in the cluster is less than this value, failure percentage-based ejection + // will not be performed. Defaults to 5. + MinimumHosts uint32 `json:"minimumHosts,omitempty"` + // RequestVolume is the minimum number of total requests that must be + // collected in one interval (as defined by the interval duration above) to + // perform failure percentage-based ejection for this host. If the volume is + // lower than this setting, failure percentage-based ejection will not be + // performed for this host. Defaults to 50. + RequestVolume uint32 `json:"requestVolume,omitempty"` +} + +// Equal returns whether the FailurePercentageEjection is the same with the +// parameter. +func (fpe *FailurePercentageEjection) Equal(fpe2 *FailurePercentageEjection) bool { + if fpe == nil && fpe2 == nil { + return true + } + if (fpe != nil) != (fpe2 != nil) { + return false + } + if fpe.Threshold != fpe2.Threshold { + return false + } + if fpe.EnforcementPercentage != fpe2.EnforcementPercentage { + return false + } + if fpe.MinimumHosts != fpe2.MinimumHosts { + return false + } + return fpe.RequestVolume == fpe2.RequestVolume +} + +// LBConfig is the config for the outlier detection balancer. +type LBConfig struct { + serviceconfig.LoadBalancingConfig `json:"-"` + // Interval is the time interval between ejection analysis sweeps. This can + // result in both new ejections as well as addresses being returned to + // service. Defaults to 10s. + Interval time.Duration `json:"interval,omitempty"` + // BaseEjectionTime is the base time that a host is ejected for. The real + // time is equal to the base time multiplied by the number of times the host + // has been ejected and is capped by MaxEjectionTime. Defaults to 30s. + BaseEjectionTime time.Duration `json:"baseEjectionTime,omitempty"` + // MaxEjectionTime is the maximum time that an address is ejected for. If + // not specified, the default value (300s) or the BaseEjectionTime value is + // applied, whichever is larger. + MaxEjectionTime time.Duration `json:"maxEjectionTime,omitempty"` + // MaxEjectionPercent is the maximum % of an upstream cluster that can be + // ejected due to outlier detection. Defaults to 10% but will eject at least + // one host regardless of the value. + MaxEjectionPercent uint32 `json:"maxEjectionPercent,omitempty"` + // SuccessRateEjection is the parameters for the success rate ejection + // algorithm. If set, success rate ejections will be performed. + SuccessRateEjection *SuccessRateEjection `json:"successRateEjection,omitempty"` + // FailurePercentageEjection is the parameters for the failure percentage + // algorithm. If set, failure rate ejections will be performed. + FailurePercentageEjection *FailurePercentageEjection `json:"failurePercentageEjection,omitempty"` + // ChildPolicy is the config for the child policy. + ChildPolicy *internalserviceconfig.BalancerConfig `json:"childPolicy,omitempty"` +} + +// Equal returns whether the LBConfig is the same with the parameter. +func (lbc *LBConfig) Equal(lbc2 *LBConfig) bool { + if lbc == nil && lbc2 == nil { + return true + } + if (lbc != nil) != (lbc2 != nil) { + return false + } + if lbc.Interval != lbc2.Interval { + return false + } + if lbc.BaseEjectionTime != lbc2.BaseEjectionTime { + return false + } + if lbc.MaxEjectionTime != lbc2.MaxEjectionTime { + return false + } + if lbc.MaxEjectionPercent != lbc2.MaxEjectionPercent { + return false + } + if !lbc.SuccessRateEjection.Equal(lbc2.SuccessRateEjection) { + return false + } + if !lbc.FailurePercentageEjection.Equal(lbc2.FailurePercentageEjection) { + return false + } + return cmp.Equal(lbc.ChildPolicy, lbc2.ChildPolicy) +} diff --git a/xds/internal/xdsclient/xdsresource/type_cds.go b/xds/internal/xdsclient/xdsresource/type_cds.go index b61a80b429c..d459717acd2 100644 --- a/xds/internal/xdsclient/xdsresource/type_cds.go +++ b/xds/internal/xdsclient/xdsresource/type_cds.go @@ -76,7 +76,7 @@ type OutlierDetection struct { // ejected due to outlier detection. Defaults to 10% but will eject at least // one host regardless of the value. MaxEjectionPercent uint32 - // SuccessRateStddevFactor is used to determine the ejection threshold for + // SuccessRateStdevFactor is used to determine the ejection threshold for // success rate outlier ejection. The ejection threshold is the difference // between the mean success rate, and the product of this factor and the // standard deviation of the mean success rate: mean - (stdev *