Skip to content

Commit

Permalink
failure percentage algo tested
Browse files Browse the repository at this point in the history
  • Loading branch information
temawi committed Aug 13, 2022
1 parent ece4f8b commit ee4e748
Show file tree
Hide file tree
Showing 2 changed files with 154 additions and 52 deletions.
44 changes: 18 additions & 26 deletions core/src/main/java/io/grpc/util/OutlierDetectionLoadBalancer.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
import javax.annotation.Nullable;

/**
* Wraps a child {@code LoadBalancer} while monitoring for outliers backends and removing them from
* use by the child LB.
* Wraps a child {@code LoadBalancer} while monitoring for outlier backends and removing them from
* the use of the child LB.
*
* <p>This implements the outlier detection gRFC:
* https://github.com/grpc/proposal/blob/master/A50-xds-outlier-detection.md
Expand Down Expand Up @@ -462,6 +462,10 @@ long volume() {
return ((double) inactiveCallCounter.successCount.get()) / volume();
}

double failureRate() {
return ((double)inactiveCallCounter.failureCount.get()) / volume();
}

void clearCallCounters() {
activeCallCounter.successCount.set(0);
activeCallCounter.failureCount.set(0);
Expand Down Expand Up @@ -622,7 +626,7 @@ List<AddressTracker> trackersWithVolume(OutlierDetectionLoadBalancerConfig confi
ejectedAddresses++;
}
}
return ((double)ejectedAddresses + 1 / totalAddresses) * 100;
return ((double)(ejectedAddresses + 1) / totalAddresses) * 100;
}
}

Expand Down Expand Up @@ -650,6 +654,11 @@ static OutlierEjectionAlgorithm forConfig(OutlierDetectionLoadBalancerConfig con
}
}

/**
* This algorithm ejects addresses that don't maintain a required rate of successful calls. The
* required rate is not fixed, but is based on the mean and standard deviation of the success
* rates of all of the addresses.
*/
static class SuccessRateOutlierEjectionAlgorithm implements OutlierEjectionAlgorithm {

private final OutlierDetectionLoadBalancerConfig config;
Expand Down Expand Up @@ -737,30 +746,19 @@ public void ejectOutliers(AddressTrackerMap trackerMap, long ejectionTimeMillis)

// If this address does not have enough volume to be considered, skip to the next one.
for (AddressTracker tracker : trackerMap.values()) {
// If we have already ejected addresses past the max percentage, stop here.
int totalAddresses = 0;
int ejectedAddresses = 0;
for (AddressTracker t : trackerMap.values()) {
totalAddresses++;
if (t.subchannelsEjected()) {
ejectedAddresses++;
}
}
double ejectedPercentage = (ejectedAddresses / totalAddresses) * 100;

if (ejectedPercentage > config.maxEjectionPercent) {
// If an ejection now would take us past the max configured ejection percentagem stop here.
if (trackerMap.nextEjectionPercentage() > config.maxEjectionPercent) {
return;
}

if (tracker.volume() < config.failurePercentageEjection.requestVolume) {
continue;
}

// If the failure percentage is above the threshold.
long failurePercentage =
(tracker.activeCallCounter.failureCount.get() / tracker.volume()) * 100;
if (failurePercentage > config.failurePercentageEjection.threshold) {
// Only eject some addresses based on the enforcement percentage.
// If the failure rate is above the threshold, we should eject...
double maxFailureRate = ((double)config.failurePercentageEjection.threshold) / 100;
if (tracker.failureRate() > maxFailureRate) {
// ...but only enforce this based on the enforcement percentage.
if (new Random().nextInt(100) < config.failurePercentageEjection.enforcementPercentage) {
tracker.ejectSubchannels(ejectionTimeMillis);
}
Expand Down Expand Up @@ -969,10 +967,4 @@ boolean outlierDetectionEnabled() {
return successRateEjection != null || failurePercentageEjection != null;
}
}

/** Math needed in outlier detection. */
static class OutlierDetectionMath {


}
}
162 changes: 136 additions & 26 deletions core/src/test/java/io/grpc/util/OutlierDetectionLoadBalancerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import io.grpc.ClientStreamTracer;
Expand All @@ -50,9 +51,10 @@
import io.grpc.internal.FakeClock.ScheduledTask;
import io.grpc.internal.ServiceConfigUtil.PolicySelection;
import io.grpc.internal.TestUtils.StandardLoadBalancerProvider;
import io.grpc.util.OutlierDetectionLoadBalancer.AddressTracker;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig.FailurePercentageEjection;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionLoadBalancerConfig.SuccessRateEjection;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionMath;
import io.grpc.util.OutlierDetectionLoadBalancer.OutlierDetectionSubchannel;
import io.grpc.util.OutlierDetectionLoadBalancer.SuccessRateOutlierEjectionAlgorithm;
import io.grpc.util.RoundRobinLoadBalancer.ReadyPicker;
Expand All @@ -62,6 +64,8 @@
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.junit.Before;
import org.junit.Rule;
Expand Down Expand Up @@ -350,8 +354,9 @@ public void delegatePick() throws Exception {
*/
@Test
public void successRateNoOutliers() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder().setMaxEjectionPercent(
50).setSuccessRateEjection(
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setSuccessRateEjection(
new SuccessRateEjection.Builder().setMinimumHosts(3).setRequestVolume(10).build())
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

Expand All @@ -362,11 +367,8 @@ public void successRateNoOutliers() {
// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

assertThat(loadBalancer.trackerMap.get(servers.get(0)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(1)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(2)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(3)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(4)).subchannelsEjected()).isFalse();
// No outliers, no ejections.
assertEjectedChannels(ImmutableSet.of());
}

/**
Expand All @@ -388,11 +390,7 @@ public void successRateOneOutlier() {
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// The one subchannel that was returning errors should be ejected.
assertThat(loadBalancer.trackerMap.get(servers.get(0)).subchannelsEjected()).isTrue();
assertThat(loadBalancer.trackerMap.get(servers.get(1)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(2)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(3)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(4)).subchannelsEjected()).isFalse();
assertEjectedChannels(ImmutableSet.of(servers.get(0)));
}

/**
Expand All @@ -413,12 +411,8 @@ public void successRateOneOutlier_notEnoughVolume() {
// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// The one subchannel that was returning errors should be ejected.
assertThat(loadBalancer.trackerMap.get(servers.get(0)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(1)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(2)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(3)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(4)).subchannelsEjected()).isFalse();
// The address should not have been ejected..
assertEjectedChannels(ImmutableSet.of());
}

/**
Expand All @@ -440,11 +434,31 @@ public void successRateOneOutlier_notEnoughAddressesWithVolume() {
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// No subchannels should have been ejected.
assertThat(loadBalancer.trackerMap.get(servers.get(0)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(1)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(2)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(3)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(4)).subchannelsEjected()).isFalse();
assertEjectedChannels(ImmutableSet.of());
}

/**
* The enforcementPercentage configuration should be honored.
*/
@Test
public void successRateOneOutlier_enforcementPercentage() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setSuccessRateEjection(
new SuccessRateEjection.Builder()
.setMinimumHosts(3)
.setRequestVolume(10)
.setEnforcementPercentage(0)
.build())
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

generateLoad(config, ImmutableMap.of(subchannel1, Status.DEADLINE_EXCEEDED));

// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// There is one outlier, but because enforcementPercentage is 0, nothing should be ejected.
assertEjectedChannels(ImmutableSet.of());
}

/**
Expand Down Expand Up @@ -474,6 +488,8 @@ public void successRateTwoOutliers() {
assertThat(loadBalancer.trackerMap.get(servers.get(2)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(3)).subchannelsEjected()).isFalse();
assertThat(loadBalancer.trackerMap.get(servers.get(4)).subchannelsEjected()).isFalse();

//assertEjectedChannels(ImmutableSet.of(servers.get(0), servers.get(1)));
}

/**
Expand Down Expand Up @@ -508,13 +524,99 @@ public void successRateTwoOutliers_maxEjectionPercentage() {
(double) config.maxEjectionPercent / 100);
}


/**
* The failure percentage algorithm leaves a healthy set of addresses alone.
* The success rate algorithm leaves a healthy set of addresses alone.
*/
@Test
public void failurePercentageHealthy() {
public void failurePercentageNoOutliers() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setFailurePercentageEjection(
new FailurePercentageEjection.Builder()
.setMinimumHosts(3)
.setRequestVolume(10).build())
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

loadBalancer.handleResolvedAddresses(buildResolvedAddress(config, servers));

// By default all calls will return OK.
generateLoad(config, ImmutableMap.of());

// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// No outliers, no ejections.
assertEjectedChannels(ImmutableSet.of());
}

/**
* The success rate algorithm ejects the outlier.
*/
@Test
public void failurePercentageOneOutlier() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setFailurePercentageEjection(
new FailurePercentageEjection.Builder()
.setMinimumHosts(3)
.setRequestVolume(10).build())
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

generateLoad(config, ImmutableMap.of(subchannel1, Status.DEADLINE_EXCEEDED));

// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// The one subchannel that was returning errors should be ejected.
assertEjectedChannels(ImmutableSet.of(servers.get(0)));
}

/**
* The failure percentage algorithm ignores addresses without enough volume..
*/
@Test
public void failurePercentageOneOutlier_notEnoughVolume() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setFailurePercentageEjection(
new FailurePercentageEjection.Builder()
.setMinimumHosts(3)
.setRequestVolume(100).build()) // We won't produce this much volume...
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

generateLoad(config, ImmutableMap.of(subchannel1, Status.DEADLINE_EXCEEDED));

// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// We should see no ejections.
assertEjectedChannels(ImmutableSet.of());
}

/**
* The enforcementPercentage configuration should be honored.
*/
@Test
public void failurePercentageOneOutlier_enforcementPercentage() {
OutlierDetectionLoadBalancerConfig config = new OutlierDetectionLoadBalancerConfig.Builder()
.setMaxEjectionPercent(50)
.setFailurePercentageEjection(
new FailurePercentageEjection.Builder()
.setMinimumHosts(3)
.setRequestVolume(10)
.setEnforcementPercentage(0)
.build())
.setChildPolicy(new PolicySelection(roundRobinLbProvider, null)).build();

generateLoad(config, ImmutableMap.of(subchannel1, Status.DEADLINE_EXCEEDED));

// Move forward in time to a point where the detection timer has fired.
fakeClock.forwardTime(config.intervalSecs + 1, TimeUnit.SECONDS);

// There is one outlier, but because enforcementPercentage is 0, nothing should be ejected.
assertEjectedChannels(ImmutableSet.of());
}

@Test
public void mathChecksOut() {
Expand Down Expand Up @@ -588,4 +690,12 @@ private void generateLoad(OutlierDetectionLoadBalancerConfig config,
statusMap.containsKey(subchannel) ? statusMap.get(subchannel) : Status.OK);
}
}

// Asserts that the given addresses are ejected and the rest are not.
void assertEjectedChannels(Set<EquivalentAddressGroup> addresses) {
for (Entry<EquivalentAddressGroup, AddressTracker> entry : loadBalancer.trackerMap.entrySet()) {
assertThat(entry.getValue().subchannelsEjected()).isEqualTo(
addresses.contains(entry.getKey()));
}
}
}

0 comments on commit ee4e748

Please sign in to comment.