From 06a3f59778ed3d0a30f6d9c10f4d6a1e365e83b6 Mon Sep 17 00:00:00 2001
From: Jiaming Yuan <jm.yuan@outlook.com>
Date: Thu, 25 Apr 2024 05:00:30 +0800
Subject: [PATCH] [pyspark] Sort workers by task ID.

The PySpark interface uses partition ID as task ID. Unlike the ordering of workers,
partitions should be deterministic.
---
 python-package/xgboost/spark/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/xgboost/spark/utils.py b/python-package/xgboost/spark/utils.py
index 7dbe290ae3e5..1403596c0c2a 100644
--- a/python-package/xgboost/spark/utils.py
+++ b/python-package/xgboost/spark/utils.py
@@ -55,7 +55,7 @@ def _start_tracker(context: BarrierTaskContext, n_workers: int) -> Dict[str, Any
     """Start Rabit tracker with n_workers"""
     env: Dict[str, Any] = {"DMLC_NUM_WORKER": n_workers}
     host = _get_host_ip(context)
-    rabit_context = RabitTracker(host_ip=host, n_workers=n_workers)
+    rabit_context = RabitTracker(host_ip=host, n_workers=n_workers, sortby="task")
     env.update(rabit_context.worker_envs())
     rabit_context.start(n_workers)
     thread = Thread(target=rabit_context.join)