From 3c9f122f2b5f8cb6ba9c168c6dbc3389e3a3497a Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Mon, 18 Jan 2021 10:44:01 +0800 Subject: [PATCH] fix potential TaskFailedListener's callback won't be called there is possibility that onJobStart of TaskFailedListener won't be called, if the job is submitted before the other thread adds addSparkListener. detail can be found at https://github.com/dmlc/xgboost/pull/6019#issuecomment-760937628 --- .../scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala index 62d5eb0b4b71..f838512af67e 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala @@ -613,8 +613,12 @@ object XGBoost extends Serializable { } } sparkJobThread.setUncaughtExceptionHandler(tracker) - sparkJobThread.start() - val trackerReturnVal = parallelismTracker.execute(tracker.waitFor(0L)) + + val trackerReturnVal = parallelismTracker.execute { + sparkJobThread.start() + tracker.waitFor(0L) + } + logger.info(s"Rabit returns with exit code $trackerReturnVal") val (booster, metrics) = postTrackerReturnProcessing(trackerReturnVal, boostersAndMetrics, sparkJobThread)