Skip to content

Commit

Permalink
xds: Fail RPCs with error details when resources are deleted
Browse files Browse the repository at this point in the history
Previously if LDS/RDS were missing or improperly configured RPCs would
fail with "UNAVAILABLE: NameResolver returned no usable address errors".
That is very confusing and not helpful for debugging.

Ideally we'd also include the node id in this error message, but that's
a bit more involved and this is a huge improvement even without it.

b/237539851
  • Loading branch information
ejona86 committed Jul 6, 2022
1 parent 2fc7ac4 commit 3e09ea0
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 21 deletions.
48 changes: 34 additions & 14 deletions xds/src/main/java/io/grpc/xds/XdsNameResolver.java
Expand Up @@ -669,14 +669,22 @@ private static String prefixedClusterSpecifierPluginName(String pluginName) {
return "cluster_specifier_plugin:" + pluginName;
}

private static final class FailingConfigSelector extends InternalConfigSelector {
private final Result result;

public FailingConfigSelector(Status error) {
this.result = Result.forError(error);
}

@Override
public Result selectConfig(PickSubchannelArgs args) {
return result;
}
}

private class ResolveState implements LdsResourceWatcher {
private final ConfigOrError emptyServiceConfig =
serviceConfigParser.parseServiceConfig(Collections.<String, Object>emptyMap());
private final ResolutionResult emptyResult =
ResolutionResult.newBuilder()
.setServiceConfig(emptyServiceConfig)
// let channel take action for no config selector
.build();
private final String ldsResourceName;
private boolean stopped;
@Nullable
Expand Down Expand Up @@ -738,9 +746,10 @@ public void run() {
if (stopped) {
return;
}
logger.log(XdsLogLevel.INFO, "LDS resource {0} unavailable", resourceName);
String error = "LDS resource does not exist: " + resourceName;
logger.log(XdsLogLevel.INFO, error);
cleanUpRouteDiscoveryState();
cleanUpRoutes();
cleanUpRoutes(error);
}
});
}
Expand All @@ -762,9 +771,9 @@ private void updateRoutes(List<VirtualHost> virtualHosts, long httpMaxStreamDura
@Nullable List<NamedFilterConfig> filterConfigs) {
VirtualHost virtualHost = findVirtualHostForHostName(virtualHosts, ldsResourceName);
if (virtualHost == null) {
logger.log(XdsLogLevel.WARNING,
"Failed to find virtual host matching hostname {0}", ldsResourceName);
cleanUpRoutes();
String error = "Failed to find virtual host matching hostname: " + ldsResourceName;
logger.log(XdsLogLevel.WARNING, error);
cleanUpRoutes(error);
return;
}

Expand Down Expand Up @@ -860,7 +869,7 @@ private void updateRoutes(List<VirtualHost> virtualHosts, long httpMaxStreamDura
}
}

private void cleanUpRoutes() {
private void cleanUpRoutes(String error) {
if (existingClusters != null) {
for (String cluster : existingClusters) {
int count = clusterRefs.get(cluster).refCount.decrementAndGet();
Expand All @@ -871,7 +880,17 @@ private void cleanUpRoutes() {
existingClusters = null;
}
routingConfig = RoutingConfig.empty;
listener.onResult(emptyResult);
// Without addresses the default LB (normally pick_first) should become TRANSIENT_FAILURE, and
// the config selector handles the error message itself. Once the LB API allows providing
// failure information for addresses yet still providing a service config, the config seector
// could be avoided.
listener.onResult(ResolutionResult.newBuilder()
.setAttributes(Attributes.newBuilder()
.set(InternalConfigSelector.KEY,
new FailingConfigSelector(Status.UNAVAILABLE.withDescription(error)))
.build())
.setServiceConfig(emptyServiceConfig)
.build());
receivedConfig = true;
}

Expand Down Expand Up @@ -938,8 +957,9 @@ public void run() {
if (RouteDiscoveryState.this != routeDiscoveryState) {
return;
}
logger.log(XdsLogLevel.INFO, "RDS resource {0} unavailable", resourceName);
cleanUpRoutes();
String error = "RDS resource does not exist: " + resourceName;
logger.log(XdsLogLevel.INFO, error);
cleanUpRoutes(error);
}
});
}
Expand Down
19 changes: 12 additions & 7 deletions xds/src/test/java/io/grpc/xds/XdsNameResolverTest.java
Expand Up @@ -288,7 +288,7 @@ public void resolving_ldsResourceNotFound() {
resolver.start(mockListener);
FakeXdsClient xdsClient = (FakeXdsClient) resolver.getXdsClient();
xdsClient.deliverLdsResourceNotFound();
assertEmptyResolutionResult();
assertEmptyResolutionResult(expectedLdsResourceName);
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -346,7 +346,7 @@ public void resolving_rdsResourceNotFound() {
FakeXdsClient xdsClient = (FakeXdsClient) resolver.getXdsClient();
xdsClient.deliverLdsUpdateForRdsName(RDS_RESOURCE_NAME);
xdsClient.deliverRdsResourceNotFound(RDS_RESOURCE_NAME);
assertEmptyResolutionResult();
assertEmptyResolutionResult(RDS_RESOURCE_NAME);
}

@SuppressWarnings("unchecked")
Expand Down Expand Up @@ -374,7 +374,7 @@ public void resolving_ldsResourceRevokedAndAddedBack() {
reset(mockListener);
xdsClient.deliverLdsResourceNotFound(); // revoke LDS resource
assertThat(xdsClient.rdsResource).isNull(); // stop subscribing to stale RDS resource
assertEmptyResolutionResult();
assertEmptyResolutionResult(expectedLdsResourceName);

reset(mockListener);
xdsClient.deliverLdsUpdateForRdsName(RDS_RESOURCE_NAME);
Expand Down Expand Up @@ -412,7 +412,7 @@ public void resolving_rdsResourceRevokedAndAddedBack() {

reset(mockListener);
xdsClient.deliverRdsResourceNotFound(RDS_RESOURCE_NAME); // revoke RDS resource
assertEmptyResolutionResult();
assertEmptyResolutionResult(RDS_RESOURCE_NAME);

// Simulate management server adds back the previously used RDS resource.
reset(mockListener);
Expand Down Expand Up @@ -470,7 +470,7 @@ public void resolving_matchingVirtualHostNotFoundInLdsResource() {
resolver.start(mockListener);
FakeXdsClient xdsClient = (FakeXdsClient) resolver.getXdsClient();
xdsClient.deliverLdsUpdate(0L, buildUnmatchedVirtualHosts());
assertEmptyResolutionResult();
assertEmptyResolutionResult(expectedLdsResourceName);
}

@Test
Expand All @@ -479,7 +479,7 @@ public void resolving_matchingVirtualHostNotFoundInRdsResource() {
FakeXdsClient xdsClient = (FakeXdsClient) resolver.getXdsClient();
xdsClient.deliverLdsUpdateForRdsName(RDS_RESOURCE_NAME);
xdsClient.deliverRdsUpdate(RDS_RESOURCE_NAME, buildUnmatchedVirtualHosts());
assertEmptyResolutionResult();
assertEmptyResolutionResult(expectedLdsResourceName);
}

private List<VirtualHost> buildUnmatchedVirtualHosts() {
Expand Down Expand Up @@ -1056,11 +1056,16 @@ public void resolved_simpleCallSucceeds_routeToRls() {
}

@SuppressWarnings("unchecked")
private void assertEmptyResolutionResult() {
private void assertEmptyResolutionResult(String resource) {
verify(mockListener).onResult(resolutionResultCaptor.capture());
ResolutionResult result = resolutionResultCaptor.getValue();
assertThat(result.getAddresses()).isEmpty();
assertThat((Map<String, ?>) result.getServiceConfig().getConfig()).isEmpty();
InternalConfigSelector configSelector = result.getAttributes().get(InternalConfigSelector.KEY);
Result configResult = configSelector.selectConfig(
new PickSubchannelArgsImpl(call1.methodDescriptor, new Metadata(), CallOptions.DEFAULT));
assertThat(configResult.getStatus().getCode()).isEqualTo(Status.Code.UNAVAILABLE);
assertThat(configResult.getStatus().getDescription()).contains(resource);
}

private void assertCallSelectClusterResult(
Expand Down

0 comments on commit 3e09ea0

Please sign in to comment.