Skip to content

Commit

Permalink
xds: Configure outlier detection.
Browse files Browse the repository at this point in the history
  • Loading branch information
temawi committed Aug 18, 2022
1 parent 128688a commit b0f4107
Show file tree
Hide file tree
Showing 9 changed files with 443 additions and 75 deletions.
30 changes: 15 additions & 15 deletions core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java
Expand Up @@ -837,13 +837,13 @@ private static boolean hasSingleAddress(List<EquivalentAddressGroup> addressGrou
*/
public static final class OutlierDetectionLoadBalancerConfig {

final Long intervalNanos;
final Long baseEjectionTimeNanos;
final Long maxEjectionTimeNanos;
final Integer maxEjectionPercent;
final SuccessRateEjection successRateEjection;
final FailurePercentageEjection failurePercentageEjection;
final PolicySelection childPolicy;
public final Long intervalNanos;
public final Long baseEjectionTimeNanos;
public final Long maxEjectionTimeNanos;
public final Integer maxEjectionPercent;
public final SuccessRateEjection successRateEjection;
public final FailurePercentageEjection failurePercentageEjection;
public final PolicySelection childPolicy;

private OutlierDetectionLoadBalancerConfig(Long intervalNanos,
Long baseEjectionTimeNanos,
Expand Down Expand Up @@ -932,10 +932,10 @@ public OutlierDetectionLoadBalancerConfig build() {
/** The configuration for success rate ejection. */
public static class SuccessRateEjection {

final Integer stdevFactor;
final Integer enforcementPercentage;
final Integer minimumHosts;
final Integer requestVolume;
public final Integer stdevFactor;
public final Integer enforcementPercentage;
public final Integer minimumHosts;
public final Integer requestVolume;

SuccessRateEjection(Integer stdevFactor, Integer enforcementPercentage, Integer minimumHosts,
Integer requestVolume) {
Expand Down Expand Up @@ -996,10 +996,10 @@ public SuccessRateEjection build() {

/** The configuration for failure percentage ejection. */
public static class FailurePercentageEjection {
final Integer threshold;
final Integer enforcementPercentage;
final Integer minimumHosts;
final Integer requestVolume;
public final Integer threshold;
public final Integer enforcementPercentage;
public final Integer minimumHosts;
public final Integer requestVolume;

FailurePercentageEjection(Integer threshold, Integer enforcementPercentage,
Integer minimumHosts, Integer requestVolume) {
Expand Down
2 changes: 1 addition & 1 deletion xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java
Expand Up @@ -159,7 +159,7 @@ private void handleClusterDiscovered() {
instance = DiscoveryMechanism.forEds(
clusterState.name, clusterState.result.edsServiceName(),
clusterState.result.lrsServerInfo(), clusterState.result.maxConcurrentRequests(),
clusterState.result.upstreamTlsContext());
clusterState.result.upstreamTlsContext(), clusterState.result.outlierDetection());
} else { // logical DNS
instance = DiscoveryMechanism.forLogicalDns(
clusterState.name, clusterState.result.dnsHostName(),
Expand Down
12 changes: 11 additions & 1 deletion xds/src/main/java/io/grpc/xds/ClientXdsClient.java
Expand Up @@ -90,6 +90,7 @@
import io.grpc.xds.EnvoyServerProtoData.ConnectionSourceType;
import io.grpc.xds.EnvoyServerProtoData.FilterChain;
import io.grpc.xds.EnvoyServerProtoData.FilterChainMatch;
import io.grpc.xds.EnvoyServerProtoData.OutlierDetection;
import io.grpc.xds.EnvoyServerProtoData.UpstreamTlsContext;
import io.grpc.xds.Filter.ClientInterceptorBuilder;
import io.grpc.xds.Filter.FilterConfig;
Expand Down Expand Up @@ -166,6 +167,10 @@ final class ClientXdsClient extends XdsClient implements XdsResponseHandler, Res
static boolean enableCustomLbConfig =
Strings.isNullOrEmpty(System.getenv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG"))
|| Boolean.parseBoolean(System.getenv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG"));
@VisibleForTesting
static boolean enableOutlierDetection =
Strings.isNullOrEmpty(System.getenv("GRPC_EXPERIMENTAL_XDS_OUTLIER_DETECTION"))
|| Boolean.parseBoolean(System.getenv("GRPC_EXPERIMENTAL_XDS_OUTLIER_DETECTION"));
private static final String TYPE_URL_HTTP_CONNECTION_MANAGER_V2 =
"type.googleapis.com/envoy.config.filter.network.http_connection_manager.v2"
+ ".HttpConnectionManager";
Expand Down Expand Up @@ -1704,6 +1709,7 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
ServerInfo lrsServerInfo = null;
Long maxConcurrentRequests = null;
UpstreamTlsContext upstreamTlsContext = null;
OutlierDetection outlierDetection = null;
if (cluster.hasLrsServer()) {
if (!cluster.getLrsServer().hasSelf()) {
return StructOrError.fromError(
Expand Down Expand Up @@ -1743,6 +1749,9 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
"Cluster " + clusterName + ": malformed UpstreamTlsContext: " + e);
}
}
if (cluster.hasOutlierDetection() && enableOutlierDetection) {
outlierDetection = OutlierDetection.fromEnvoyOutlierDetection(cluster.getOutlierDetection());
}

DiscoveryType type = cluster.getType();
if (type == DiscoveryType.EDS) {
Expand All @@ -1763,7 +1772,8 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
edsResources.add(clusterName);
}
return StructOrError.fromStruct(CdsUpdate.forEds(
clusterName, edsServiceName, lrsServerInfo, maxConcurrentRequests, upstreamTlsContext));
clusterName, edsServiceName, lrsServerInfo, maxConcurrentRequests, upstreamTlsContext,
outlierDetection));
} else if (type.equals(DiscoveryType.LOGICAL_DNS)) {
if (!cluster.hasLoadAssignment()) {
return StructOrError.fromError(
Expand Down
115 changes: 104 additions & 11 deletions xds/src/main/java/io/grpc/xds/ClusterResolverLoadBalancer.java
Expand Up @@ -38,13 +38,17 @@
import io.grpc.internal.ServiceConfigUtil.PolicySelection;
import io.grpc.util.ForwardingLoadBalancerHelper;
import io.grpc.util.GracefulSwitchLoadBalancer;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig;
import io.grpc.xds.Bootstrapper.ServerInfo;
import io.grpc.xds.ClusterImplLoadBalancerProvider.ClusterImplConfig;
import io.grpc.xds.ClusterResolverLoadBalancerProvider.ClusterResolverConfig;
import io.grpc.xds.ClusterResolverLoadBalancerProvider.ClusterResolverConfig.DiscoveryMechanism;
import io.grpc.xds.Endpoints.DropOverload;
import io.grpc.xds.Endpoints.LbEndpoint;
import io.grpc.xds.Endpoints.LocalityLbEndpoints;
import io.grpc.xds.EnvoyServerProtoData.FailurePercentageEjection;
import io.grpc.xds.EnvoyServerProtoData.OutlierDetection;
import io.grpc.xds.EnvoyServerProtoData.SuccessRateEjection;
import io.grpc.xds.EnvoyServerProtoData.UpstreamTlsContext;
import io.grpc.xds.PriorityLoadBalancerProvider.PriorityLbConfig;
import io.grpc.xds.PriorityLoadBalancerProvider.PriorityLbConfig.PriorityChildConfig;
Expand Down Expand Up @@ -176,7 +180,8 @@ public void handleResolvedAddresses(ResolvedAddresses resolvedAddresses) {
ClusterState state;
if (instance.type == DiscoveryMechanism.Type.EDS) {
state = new EdsClusterState(instance.cluster, instance.edsServiceName,
instance.lrsServerInfo, instance.maxConcurrentRequests, instance.tlsContext);
instance.lrsServerInfo, instance.maxConcurrentRequests, instance.tlsContext,
instance.outlierDetection);
} else { // logical DNS
state = new LogicalDnsClusterState(instance.cluster, instance.dnsHostName,
instance.lrsServerInfo, instance.maxConcurrentRequests, instance.tlsContext);
Expand Down Expand Up @@ -316,6 +321,8 @@ private abstract class ClusterState {
protected final Long maxConcurrentRequests;
@Nullable
protected final UpstreamTlsContext tlsContext;
@Nullable
protected final OutlierDetection outlierDetection;
// Resolution status, may contain most recent error encountered.
protected Status status = Status.OK;
// True if has received resolution result.
Expand All @@ -327,11 +334,13 @@ private abstract class ClusterState {
protected boolean shutdown;

private ClusterState(String name, @Nullable ServerInfo lrsServerInfo,
@Nullable Long maxConcurrentRequests, @Nullable UpstreamTlsContext tlsContext) {
@Nullable Long maxConcurrentRequests, @Nullable UpstreamTlsContext tlsContext,
@Nullable OutlierDetection outlierDetection) {
this.name = name;
this.lrsServerInfo = lrsServerInfo;
this.maxConcurrentRequests = maxConcurrentRequests;
this.tlsContext = tlsContext;
this.outlierDetection = outlierDetection;
}

abstract void start();
Expand All @@ -349,8 +358,8 @@ private final class EdsClusterState extends ClusterState implements EdsResourceW

private EdsClusterState(String name, @Nullable String edsServiceName,
@Nullable ServerInfo lrsServerInfo, @Nullable Long maxConcurrentRequests,
@Nullable UpstreamTlsContext tlsContext) {
super(name, lrsServerInfo, maxConcurrentRequests, tlsContext);
@Nullable UpstreamTlsContext tlsContext, @Nullable OutlierDetection outlierDetection) {
super(name, lrsServerInfo, maxConcurrentRequests, tlsContext, outlierDetection);
this.edsServiceName = edsServiceName;
}

Expand Down Expand Up @@ -434,7 +443,8 @@ public void run() {
Map<String, PriorityChildConfig> priorityChildConfigs =
generateEdsBasedPriorityChildConfigs(
name, edsServiceName, lrsServerInfo, maxConcurrentRequests, tlsContext,
endpointLbPolicy, lbRegistry, prioritizedLocalityWeights, dropOverloads);
outlierDetection, endpointLbPolicy, lbRegistry, prioritizedLocalityWeights,
dropOverloads);
status = Status.OK;
resolved = true;
result = new ClusterResolutionResult(addresses, priorityChildConfigs,
Expand Down Expand Up @@ -530,7 +540,7 @@ private final class LogicalDnsClusterState extends ClusterState {
private LogicalDnsClusterState(String name, String dnsHostName,
@Nullable ServerInfo lrsServerInfo, @Nullable Long maxConcurrentRequests,
@Nullable UpstreamTlsContext tlsContext) {
super(name, lrsServerInfo, maxConcurrentRequests, tlsContext);
super(name, lrsServerInfo, maxConcurrentRequests, tlsContext, null);
this.dnsHostName = checkNotNull(dnsHostName, "dnsHostName");
nameResolverFactory =
checkNotNull(helper.getNameResolverRegistry().asFactory(), "nameResolverFactory");
Expand Down Expand Up @@ -730,25 +740,108 @@ private static PriorityChildConfig generateDnsBasedPriorityChildConfig(
private static Map<String, PriorityChildConfig> generateEdsBasedPriorityChildConfigs(
String cluster, @Nullable String edsServiceName, @Nullable ServerInfo lrsServerInfo,
@Nullable Long maxConcurrentRequests, @Nullable UpstreamTlsContext tlsContext,
PolicySelection endpointLbPolicy, LoadBalancerRegistry lbRegistry,
Map<String, Map<Locality, Integer>> prioritizedLocalityWeights,
List<DropOverload> dropOverloads) {
@Nullable OutlierDetection outlierDetection, PolicySelection endpointLbPolicy,
LoadBalancerRegistry lbRegistry, Map<String,
Map<Locality, Integer>> prioritizedLocalityWeights, List<DropOverload> dropOverloads) {
Map<String, PriorityChildConfig> configs = new HashMap<>();
for (String priority : prioritizedLocalityWeights.keySet()) {
ClusterImplConfig clusterImplConfig =
new ClusterImplConfig(cluster, edsServiceName, lrsServerInfo, maxConcurrentRequests,
dropOverloads, endpointLbPolicy, tlsContext);
LoadBalancerProvider clusterImplLbProvider =
lbRegistry.getProvider(XdsLbPolicies.CLUSTER_IMPL_POLICY_NAME);
PolicySelection clusterImplPolicy =
PolicySelection priorityChildPolicy =
new PolicySelection(clusterImplLbProvider, clusterImplConfig);

// If outlier detection has been configured we wrap the child policy in the outlier detection
// load balancer.
if (outlierDetection != null) {
LoadBalancerProvider outlierDetectionProvider = lbRegistry.getProvider(
"outlier_detection_experimental");
priorityChildPolicy = new PolicySelection(outlierDetectionProvider,
buildOutlierDetectionLbConfig(outlierDetection, priorityChildPolicy));
}

PriorityChildConfig priorityChildConfig =
new PriorityChildConfig(clusterImplPolicy, true /* ignoreReresolution */);
new PriorityChildConfig(priorityChildPolicy, true /* ignoreReresolution */);
configs.put(priority, priorityChildConfig);
}
return configs;
}

/**
* Converts {@link OutlierDetection} that represents the xDS configuration to {@link
* OutlierDetectionLoadBalancerConfig} that the {@link io.grpc.util.OutlierDetectionLoadBalancer}
* understands.
*/
private static OutlierDetectionLoadBalancerConfig buildOutlierDetectionLbConfig(
OutlierDetection outlierDetection, PolicySelection childPolicy) {
OutlierDetectionLoadBalancerConfig.Builder configBuilder
= new OutlierDetectionLoadBalancerConfig.Builder();

configBuilder.setChildPolicy(childPolicy);

if (outlierDetection.intervalNanos() != null) {
configBuilder.setIntervalNanos(outlierDetection.intervalNanos());
}
if (outlierDetection.baseEjectionTimeNanos() != null) {
configBuilder.setBaseEjectionTimeNanos(outlierDetection.baseEjectionTimeNanos());
}
if (outlierDetection.maxEjectionTimeNanos() != null) {
configBuilder.setMaxEjectionTimeNanos(outlierDetection.maxEjectionTimeNanos());
}
if (outlierDetection.maxEjectionPercent() != null) {
configBuilder.setMaxEjectionPercent(outlierDetection.maxEjectionPercent());
}

SuccessRateEjection successRate = outlierDetection.successRateEjection();
if (successRate != null) {
OutlierDetectionLoadBalancerConfig.SuccessRateEjection.Builder
successRateConfigBuilder = new OutlierDetectionLoadBalancerConfig
.SuccessRateEjection.Builder();

if (successRate.stdevFactor() != null) {
successRateConfigBuilder.setStdevFactor(successRate.stdevFactor());
}
if (successRate.enforcementPercentage() != null) {
successRateConfigBuilder.setEnforcementPercentage(successRate.enforcementPercentage());
}
if (successRate.minimumHosts() != null) {
successRateConfigBuilder.setMinimumHosts(successRate.minimumHosts());
}
if (successRate.requestVolume() != null) {
successRateConfigBuilder.setRequestVolume(successRate.requestVolume());
}

configBuilder.setSuccessRateEjection(successRateConfigBuilder.build());
}

FailurePercentageEjection failurePercentage = outlierDetection.failurePercentageEjection();
if (failurePercentage != null) {
OutlierDetectionLoadBalancerConfig.FailurePercentageEjection.Builder
failurePercentageConfigBuilder = new OutlierDetectionLoadBalancerConfig
.FailurePercentageEjection.Builder();

if (failurePercentage.threshold() != null) {
failurePercentageConfigBuilder.setThreshold(failurePercentage.threshold());
}
if (failurePercentage.enforcementPercentage() != null) {
failurePercentageConfigBuilder.setEnforcementPercentage(
failurePercentage.enforcementPercentage());
}
if (failurePercentage.minimumHosts() != null) {
failurePercentageConfigBuilder.setMinimumHosts(failurePercentage.minimumHosts());
}
if (failurePercentage.requestVolume() != null) {
failurePercentageConfigBuilder.setRequestVolume(failurePercentage.requestVolume());
}

configBuilder.setFailurePercentageEjection(failurePercentageConfigBuilder.build());
}

return configBuilder.build();
}

/**
* Generates a string that represents the priority in the LB policy config. The string is unique
* across priorities in all clusters and priorityName(c, p1) < priorityName(c, p2) iff p1 < p2.
Expand Down
Expand Up @@ -26,6 +26,7 @@
import io.grpc.NameResolver.ConfigOrError;
import io.grpc.internal.ServiceConfigUtil.PolicySelection;
import io.grpc.xds.Bootstrapper.ServerInfo;
import io.grpc.xds.EnvoyServerProtoData.OutlierDetection;
import io.grpc.xds.EnvoyServerProtoData.UpstreamTlsContext;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -124,6 +125,8 @@ static final class DiscoveryMechanism {
// Hostname for resolving endpoints via DNS. Only valid for LOGICAL_DNS clusters.
@Nullable
final String dnsHostName;
@Nullable
final OutlierDetection outlierDetection;

enum Type {
EDS,
Expand All @@ -132,28 +135,31 @@ enum Type {

private DiscoveryMechanism(String cluster, Type type, @Nullable String edsServiceName,
@Nullable String dnsHostName, @Nullable ServerInfo lrsServerInfo,
@Nullable Long maxConcurrentRequests, @Nullable UpstreamTlsContext tlsContext) {
@Nullable Long maxConcurrentRequests, @Nullable UpstreamTlsContext tlsContext,
@Nullable OutlierDetection outlierDetection) {
this.cluster = checkNotNull(cluster, "cluster");
this.type = checkNotNull(type, "type");
this.edsServiceName = edsServiceName;
this.dnsHostName = dnsHostName;
this.lrsServerInfo = lrsServerInfo;
this.maxConcurrentRequests = maxConcurrentRequests;
this.tlsContext = tlsContext;
this.outlierDetection = outlierDetection;
}

static DiscoveryMechanism forEds(String cluster, @Nullable String edsServiceName,
@Nullable ServerInfo lrsServerInfo, @Nullable Long maxConcurrentRequests,
@Nullable UpstreamTlsContext tlsContext) {
@Nullable UpstreamTlsContext tlsContext,
OutlierDetection outlierDetection) {
return new DiscoveryMechanism(cluster, Type.EDS, edsServiceName, null, lrsServerInfo,
maxConcurrentRequests, tlsContext);
maxConcurrentRequests, tlsContext, outlierDetection);
}

static DiscoveryMechanism forLogicalDns(String cluster, String dnsHostName,
@Nullable ServerInfo lrsServerInfo, @Nullable Long maxConcurrentRequests,
@Nullable UpstreamTlsContext tlsContext) {
return new DiscoveryMechanism(cluster, Type.LOGICAL_DNS, null, dnsHostName,
lrsServerInfo, maxConcurrentRequests, tlsContext);
lrsServerInfo, maxConcurrentRequests, tlsContext, null);
}

@Override
Expand Down

0 comments on commit b0f4107

Please sign in to comment.