Skip to content

Commit

Permalink
Log running tasks in EsqlDisruptionIT (#108440) (#108467)
Browse files Browse the repository at this point in the history
This PR logs tasks that are running after the disruption is cleared, 
allowing us to investigate why the disruption tests failed in #107347.

Relates #107347
  • Loading branch information
dnhatn committed May 9, 2024
1 parent ee58956 commit 1d99311
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 1 deletion.
Expand Up @@ -121,7 +121,6 @@ public void testRow() {
}
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/107347")
public void testFromStatsGroupingAvgWithSort() {
testFromStatsGroupingAvgImpl("from test | stats avg(count) by data | sort data | limit 2", "data", "avg(count)");
}
Expand Down
Expand Up @@ -8,12 +8,14 @@
package org.elasticsearch.xpack.esql.action;

import org.elasticsearch.action.ActionFuture;
import org.elasticsearch.action.admin.cluster.node.tasks.list.TransportListTasksAction;
import org.elasticsearch.cluster.coordination.Coordinator;
import org.elasticsearch.cluster.coordination.FollowersChecker;
import org.elasticsearch.cluster.coordination.LeaderChecker;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.compute.operator.exchange.ExchangeService;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.health.node.selection.HealthNode;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.disruption.NetworkDisruption;
Expand Down Expand Up @@ -91,6 +93,21 @@ private EsqlQueryResponse runQueryWithDisruption(EsqlQueryRequest request) {
try {
return future.actionGet(2, TimeUnit.MINUTES);
} catch (Exception e) {
logger.info(
"running tasks: {}",
client().admin()
.cluster()
.prepareListTasks()
.get()
.getTasks()
.stream()
.filter(
// Skip the tasks we that'd get in the way while debugging
t -> false == t.action().contains(TransportListTasksAction.TYPE.name())
&& false == t.action().contains(HealthNode.TASK_NAME)
)
.toList()
);
assertTrue("request must be failed or completed after clearing disruption", future.isDone());
ensureBlocksReleased();
logger.info("--> failed to execute esql query with disruption; retrying...", e);
Expand Down

0 comments on commit 1d99311

Please sign in to comment.