Skip to content

Commit

Permalink
xds: Configure outlier detection. (#9456)
Browse files Browse the repository at this point in the history
Enables the new OutlierDetectionLoadBalancer when outlier detection is enabled
in the xDS cluster configuration.
  • Loading branch information
temawi committed Aug 18, 2022
1 parent 128688a commit 81abb21
Show file tree
Hide file tree
Showing 13 changed files with 789 additions and 117 deletions.
34 changes: 18 additions & 16 deletions core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java
Expand Up @@ -31,6 +31,7 @@
import io.grpc.ConnectivityState;
import io.grpc.ConnectivityStateInfo;
import io.grpc.EquivalentAddressGroup;
import io.grpc.Internal;
import io.grpc.LoadBalancer;
import io.grpc.Metadata;
import io.grpc.Status;
Expand Down Expand Up @@ -58,7 +59,8 @@
* <p>This implements the outlier detection gRFC:
* https://github.com/grpc/proposal/blob/master/A50-xds-outlier-detection.md
*/
public class OutlierDetectionLoadBalancer extends LoadBalancer {
@Internal
public final class OutlierDetectionLoadBalancer extends LoadBalancer {

@VisibleForTesting
final AddressTrackerMap trackerMap;
Expand Down Expand Up @@ -837,13 +839,13 @@ private static boolean hasSingleAddress(List<EquivalentAddressGroup> addressGrou
*/
public static final class OutlierDetectionLoadBalancerConfig {

final Long intervalNanos;
final Long baseEjectionTimeNanos;
final Long maxEjectionTimeNanos;
final Integer maxEjectionPercent;
final SuccessRateEjection successRateEjection;
final FailurePercentageEjection failurePercentageEjection;
final PolicySelection childPolicy;
public final Long intervalNanos;
public final Long baseEjectionTimeNanos;
public final Long maxEjectionTimeNanos;
public final Integer maxEjectionPercent;
public final SuccessRateEjection successRateEjection;
public final FailurePercentageEjection failurePercentageEjection;
public final PolicySelection childPolicy;

private OutlierDetectionLoadBalancerConfig(Long intervalNanos,
Long baseEjectionTimeNanos,
Expand Down Expand Up @@ -932,10 +934,10 @@ public OutlierDetectionLoadBalancerConfig build() {
/** The configuration for success rate ejection. */
public static class SuccessRateEjection {

final Integer stdevFactor;
final Integer enforcementPercentage;
final Integer minimumHosts;
final Integer requestVolume;
public final Integer stdevFactor;
public final Integer enforcementPercentage;
public final Integer minimumHosts;
public final Integer requestVolume;

SuccessRateEjection(Integer stdevFactor, Integer enforcementPercentage, Integer minimumHosts,
Integer requestVolume) {
Expand Down Expand Up @@ -996,10 +998,10 @@ public SuccessRateEjection build() {

/** The configuration for failure percentage ejection. */
public static class FailurePercentageEjection {
final Integer threshold;
final Integer enforcementPercentage;
final Integer minimumHosts;
final Integer requestVolume;
public final Integer threshold;
public final Integer enforcementPercentage;
public final Integer minimumHosts;
public final Integer requestVolume;

FailurePercentageEjection(Integer threshold, Integer enforcementPercentage,
Integer minimumHosts, Integer requestVolume) {
Expand Down
Expand Up @@ -16,6 +16,7 @@

package io.grpc.util;

import io.grpc.Internal;
import io.grpc.LoadBalancer;
import io.grpc.LoadBalancer.Helper;
import io.grpc.LoadBalancerProvider;
Expand All @@ -33,6 +34,7 @@
import java.util.List;
import java.util.Map;

@Internal
public final class OutlierDetectionLoadBalancerProvider extends LoadBalancerProvider {

@Override
Expand Down
2 changes: 1 addition & 1 deletion xds/src/main/java/io/grpc/xds/CdsLoadBalancer2.java
Expand Up @@ -159,7 +159,7 @@ private void handleClusterDiscovered() {
instance = DiscoveryMechanism.forEds(
clusterState.name, clusterState.result.edsServiceName(),
clusterState.result.lrsServerInfo(), clusterState.result.maxConcurrentRequests(),
clusterState.result.upstreamTlsContext());
clusterState.result.upstreamTlsContext(), clusterState.result.outlierDetection());
} else { // logical DNS
instance = DiscoveryMechanism.forLogicalDns(
clusterState.name, clusterState.result.dnsHostName(),
Expand Down
78 changes: 77 additions & 1 deletion xds/src/main/java/io/grpc/xds/ClientXdsClient.java
Expand Up @@ -90,6 +90,7 @@
import io.grpc.xds.EnvoyServerProtoData.ConnectionSourceType;
import io.grpc.xds.EnvoyServerProtoData.FilterChain;
import io.grpc.xds.EnvoyServerProtoData.FilterChainMatch;
import io.grpc.xds.EnvoyServerProtoData.OutlierDetection;
import io.grpc.xds.EnvoyServerProtoData.UpstreamTlsContext;
import io.grpc.xds.Filter.ClientInterceptorBuilder;
import io.grpc.xds.Filter.FilterConfig;
Expand Down Expand Up @@ -166,6 +167,10 @@ final class ClientXdsClient extends XdsClient implements XdsResponseHandler, Res
static boolean enableCustomLbConfig =
Strings.isNullOrEmpty(System.getenv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG"))
|| Boolean.parseBoolean(System.getenv("GRPC_EXPERIMENTAL_XDS_CUSTOM_LB_CONFIG"));
@VisibleForTesting
static boolean enableOutlierDetection =
!Strings.isNullOrEmpty(System.getenv("GRPC_EXPERIMENTAL_XDS_OUTLIER_DETECTION"))
|| Boolean.parseBoolean(System.getenv("GRPC_EXPERIMENTAL_XDS_OUTLIER_DETECTION"));
private static final String TYPE_URL_HTTP_CONNECTION_MANAGER_V2 =
"type.googleapis.com/envoy.config.filter.network.http_connection_manager.v2"
+ ".HttpConnectionManager";
Expand Down Expand Up @@ -632,6 +637,65 @@ static void validateCommonTlsContext(
}
}

static io.envoyproxy.envoy.config.cluster.v3.OutlierDetection validateOutlierDetection(
io.envoyproxy.envoy.config.cluster.v3.OutlierDetection outlierDetection)
throws ResourceInvalidException {
if (outlierDetection.hasInterval()) {
if (!Durations.isValid(outlierDetection.getInterval())) {
throw new ResourceInvalidException("outlier_detection interval is not a valid Duration");
}
if (hasNegativeValues(outlierDetection.getInterval())) {
throw new ResourceInvalidException("outlier_detection interval has a negative value");
}
}
if (outlierDetection.hasBaseEjectionTime()) {
if (!Durations.isValid(outlierDetection.getBaseEjectionTime())) {
throw new ResourceInvalidException(
"outlier_detection base_ejection_time is not a valid Duration");
}
if (hasNegativeValues(outlierDetection.getBaseEjectionTime())) {
throw new ResourceInvalidException(
"outlier_detection base_ejection_time has a negative value");
}
}
if (outlierDetection.hasMaxEjectionTime()) {
if (!Durations.isValid(outlierDetection.getMaxEjectionTime())) {
throw new ResourceInvalidException(
"outlier_detection max_ejection_time is not a valid Duration");
}
if (hasNegativeValues(outlierDetection.getMaxEjectionTime())) {
throw new ResourceInvalidException(
"outlier_detection max_ejection_time has a negative value");
}
}
if (outlierDetection.hasMaxEjectionPercent()
&& outlierDetection.getMaxEjectionPercent().getValue() > 100) {
throw new ResourceInvalidException(
"outlier_detection max_ejection_percent is > 100");
}
if (outlierDetection.hasEnforcingSuccessRate()
&& outlierDetection.getEnforcingSuccessRate().getValue() > 100) {
throw new ResourceInvalidException(
"outlier_detection enforcing_success_rate is > 100");
}
if (outlierDetection.hasFailurePercentageThreshold()
&& outlierDetection.getFailurePercentageThreshold().getValue() > 100) {
throw new ResourceInvalidException(
"outlier_detection failure_percentage_threshold is > 100");
}
if (outlierDetection.hasEnforcingFailurePercentage()
&& outlierDetection.getEnforcingFailurePercentage().getValue() > 100) {
throw new ResourceInvalidException(
"outlier_detection enforcing_failure_percentage is > 100");
}

return outlierDetection;
}

static boolean hasNegativeValues(Duration duration) {
return duration.getSeconds() < 0 || duration.getNanos() < 0;
}

private static String getIdentityCertInstanceName(CommonTlsContext commonTlsContext) {
if (commonTlsContext.hasTlsCertificateProviderInstance()) {
return commonTlsContext.getTlsCertificateProviderInstance().getInstanceName();
Expand Down Expand Up @@ -1704,6 +1768,7 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
ServerInfo lrsServerInfo = null;
Long maxConcurrentRequests = null;
UpstreamTlsContext upstreamTlsContext = null;
OutlierDetection outlierDetection = null;
if (cluster.hasLrsServer()) {
if (!cluster.getLrsServer().hasSelf()) {
return StructOrError.fromError(
Expand Down Expand Up @@ -1743,6 +1808,16 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
"Cluster " + clusterName + ": malformed UpstreamTlsContext: " + e);
}
}
if (cluster.hasOutlierDetection() && enableOutlierDetection) {
try {
outlierDetection = OutlierDetection.fromEnvoyOutlierDetection(
validateOutlierDetection(cluster.getOutlierDetection()));
} catch (ResourceInvalidException e) {
return StructOrError.fromError(
"Cluster " + clusterName + ": malformed outlier_detection: " + e);
}
}


DiscoveryType type = cluster.getType();
if (type == DiscoveryType.EDS) {
Expand All @@ -1763,7 +1838,8 @@ private static StructOrError<CdsUpdate.Builder> parseNonAggregateCluster(
edsResources.add(clusterName);
}
return StructOrError.fromStruct(CdsUpdate.forEds(
clusterName, edsServiceName, lrsServerInfo, maxConcurrentRequests, upstreamTlsContext));
clusterName, edsServiceName, lrsServerInfo, maxConcurrentRequests, upstreamTlsContext,
outlierDetection));
} else if (type.equals(DiscoveryType.LOGICAL_DNS)) {
if (!cluster.hasLoadAssignment()) {
return StructOrError.fromError(
Expand Down

0 comments on commit 81abb21

Please sign in to comment.