From 14242af3d40453ce4bb3008a80057eaf76e6f11b Mon Sep 17 00:00:00 2001 From: Diego Molina Date: Wed, 3 Aug 2022 05:50:40 +0200 Subject: [PATCH] [grid] Default to Node healthcheck instead of initial status endpoint check This will allow a Relay Node to boot and check periodically for the status endpoint, and wait indefinitely for it to come up. If the Relay Node reports itself as DOWN because the endpoint is not responding with 200, then the Node will eventually be removed by the Distributor. However, when the endpoint responds with 200 again, the heartbeat event will have UP as a status and the Node will be registered again. Fixes #10908 --- .../selenium/grid/node/httpd/NodeServer.java | 14 +++++++------- .../selenium/grid/node/relay/RelayOptions.java | 10 ++-------- .../grid/node/relay/RelaySessionFactory.java | 7 ++++++- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java b/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java index 5d58ed3e4769c..044b5944ba1ee 100644 --- a/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java +++ b/java/src/org/openqa/selenium/grid/node/httpd/NodeServer.java @@ -21,9 +21,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.net.MediaType; -import dev.failsafe.Failsafe; -import dev.failsafe.RetryPolicy; - import org.openqa.selenium.BuildInfo; import org.openqa.selenium.cli.CliCommand; import org.openqa.selenium.events.EventBus; @@ -54,6 +51,9 @@ import org.openqa.selenium.remote.http.Route; import org.openqa.selenium.remote.tracing.Tracer; +import dev.failsafe.Failsafe; +import dev.failsafe.RetryPolicy; + import java.util.Collections; import java.util.Set; import java.util.concurrent.Executors; @@ -85,7 +85,7 @@ public String getName() { @Override public String getDescription() { - return "Adds this server as a node in the selenium grid."; + return "Adds this server as a Node in the Selenium Grid."; } @Override @@ -203,6 +203,9 @@ public NettyServer start() { Executors.newSingleThreadExecutor().submit(() -> { Failsafe.with(registrationPolicy).run( () -> { + if (nodeRegistered.get()) { + throw new InterruptedException("Stopping registration thread."); + } HealthCheck.Result check = node.getHealthCheck().check(); if (DOWN.equals(check.getAvailability())) { LOG.severe("Node is not alive: " + check.getMessage()); @@ -210,9 +213,6 @@ public NettyServer start() { throw new UnsupportedOperationException("Node cannot be registered"); } bus.fire(new NodeStatusEvent(node.getStatus())); - if (nodeRegistered.get()) { - throw new InterruptedException("Stopping registration thread."); - } LOG.info("Sending registration event..."); } ); diff --git a/java/src/org/openqa/selenium/grid/node/relay/RelayOptions.java b/java/src/org/openqa/selenium/grid/node/relay/RelayOptions.java index 28f9d2a35c431..fdfff64009576 100644 --- a/java/src/org/openqa/selenium/grid/node/relay/RelayOptions.java +++ b/java/src/org/openqa/selenium/grid/node/relay/RelayOptions.java @@ -27,7 +27,6 @@ import org.openqa.selenium.grid.node.SessionFactory; import org.openqa.selenium.internal.Require; import org.openqa.selenium.json.Json; -import org.openqa.selenium.remote.http.ClientConfig; import org.openqa.selenium.remote.http.HttpClient; import org.openqa.selenium.remote.http.HttpRequest; import org.openqa.selenium.remote.http.HttpResponse; @@ -105,6 +104,8 @@ public URI getServiceStatusUri() { } } + // Method being used in SessionSlot + @SuppressWarnings("unused") private boolean isServiceUp(HttpClient client) { URI serviceStatusUri = getServiceStatusUri(); if (serviceStatusUri == null) { @@ -125,13 +126,6 @@ public Map> getSessionFactories( HttpClient.Factory clientFactory, Duration sessionTimeout) { - HttpClient client = clientFactory - .createClient(ClientConfig.defaultConfig().baseUri(getServiceUri())); - - if (!isServiceUp(client)) { - throw new ConfigException("Unable to reach the service at " + getServiceUri()); - } - List allConfigs = config.getAll(RELAY_SECTION, "configs") .orElseThrow(() -> new ConfigException("Unable to find configs for " + getServiceUri())); diff --git a/java/src/org/openqa/selenium/grid/node/relay/RelaySessionFactory.java b/java/src/org/openqa/selenium/grid/node/relay/RelaySessionFactory.java index 51bd7c700c43b..7e766c9fa0cd6 100644 --- a/java/src/org/openqa/selenium/grid/node/relay/RelaySessionFactory.java +++ b/java/src/org/openqa/selenium/grid/node/relay/RelaySessionFactory.java @@ -215,7 +215,12 @@ public boolean isServiceUp() { LOG.log(Debug.getDebugLogLevel(), Contents.string(response)); return response.getStatus() == 200; } catch (Exception e) { - LOG.log(Level.WARNING, "Error checking service status " + serviceStatusUrl, e); + LOG.log( + Level.WARNING, + () -> String.format("Error checking service status %s. %s", + serviceStatusUrl, + e.getMessage())); + LOG.log(Debug.getDebugLogLevel(), "Error checking service status " + serviceStatusUrl, e); } return false; }