Skip to content

Commit

Permalink
[grid] Default to Node healthcheck instead of initial status endpoint…
Browse files Browse the repository at this point in the history
… check

This will allow a Relay Node to boot and check periodically
for the status endpoint, and wait indefinitely for it to come
up.

If the Relay Node reports itself as DOWN because the endpoint
is not responding with 200, then the Node will eventually be
removed by the Distributor. However, when the endpoint responds
with 200 again, the heartbeat event will have UP as a status and
the Node will be registered again.

Fixes #10908
  • Loading branch information
diemol committed Aug 3, 2022
1 parent a9a526e commit 14242af
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 16 deletions.
14 changes: 7 additions & 7 deletions java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java
Expand Up @@ -21,9 +21,6 @@
import com.google.common.collect.ImmutableSet;
import com.google.common.net.MediaType;

import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;

import org.openqa.selenium.BuildInfo;
import org.openqa.selenium.cli.CliCommand;
import org.openqa.selenium.events.EventBus;
Expand Down Expand Up @@ -54,6 +51,9 @@
import org.openqa.selenium.remote.http.Route;
import org.openqa.selenium.remote.tracing.Tracer;

import dev.failsafe.Failsafe;
import dev.failsafe.RetryPolicy;

import java.util.Collections;
import java.util.Set;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -85,7 +85,7 @@ public String getName() {

@Override
public String getDescription() {
return "Adds this server as a node in the selenium grid.";
return "Adds this server as a Node in the Selenium Grid.";
}

@Override
Expand Down Expand Up @@ -203,16 +203,16 @@ public NettyServer start() {
Executors.newSingleThreadExecutor().submit(() -> {
Failsafe.with(registrationPolicy).run(
() -> {
if (nodeRegistered.get()) {
throw new InterruptedException("Stopping registration thread.");
}
HealthCheck.Result check = node.getHealthCheck().check();
if (DOWN.equals(check.getAvailability())) {
LOG.severe("Node is not alive: " + check.getMessage());
// Throw an exception to force another check sooner.
throw new UnsupportedOperationException("Node cannot be registered");
}
bus.fire(new NodeStatusEvent(node.getStatus()));
if (nodeRegistered.get()) {
throw new InterruptedException("Stopping registration thread.");
}
LOG.info("Sending registration event...");
}
);
Expand Down
10 changes: 2 additions & 8 deletions java/src/org/openqa/selenium/grid/node/relay/RelayOptions.java
Expand Up @@ -27,7 +27,6 @@
import org.openqa.selenium.grid.node.SessionFactory;
import org.openqa.selenium.internal.Require;
import org.openqa.selenium.json.Json;
import org.openqa.selenium.remote.http.ClientConfig;
import org.openqa.selenium.remote.http.HttpClient;
import org.openqa.selenium.remote.http.HttpRequest;
import org.openqa.selenium.remote.http.HttpResponse;
Expand Down Expand Up @@ -105,6 +104,8 @@ public URI getServiceStatusUri() {
}
}

// Method being used in SessionSlot
@SuppressWarnings("unused")
private boolean isServiceUp(HttpClient client) {
URI serviceStatusUri = getServiceStatusUri();
if (serviceStatusUri == null) {
Expand All @@ -125,13 +126,6 @@ public Map<Capabilities, Collection<SessionFactory>> getSessionFactories(
HttpClient.Factory clientFactory,
Duration sessionTimeout) {

HttpClient client = clientFactory
.createClient(ClientConfig.defaultConfig().baseUri(getServiceUri()));

if (!isServiceUp(client)) {
throw new ConfigException("Unable to reach the service at " + getServiceUri());
}

List<String> allConfigs = config.getAll(RELAY_SECTION, "configs")
.orElseThrow(() -> new ConfigException("Unable to find configs for " + getServiceUri()));

Expand Down
Expand Up @@ -215,7 +215,12 @@ public boolean isServiceUp() {
LOG.log(Debug.getDebugLogLevel(), Contents.string(response));
return response.getStatus() == 200;
} catch (Exception e) {
LOG.log(Level.WARNING, "Error checking service status " + serviceStatusUrl, e);
LOG.log(
Level.WARNING,
() -> String.format("Error checking service status %s. %s",
serviceStatusUrl,
e.getMessage()));
LOG.log(Debug.getDebugLogLevel(), "Error checking service status " + serviceStatusUrl, e);
}
return false;
}
Expand Down

0 comments on commit 14242af

Please sign in to comment.