From 9d2832a3a3d17cfefa22f024b89228e3f0e16085 Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Thu, 21 Jan 2021 14:20:32 +0800 Subject: [PATCH] fix potential TaskFailedListener's callback won't be called (#6612) there is possibility that onJobStart of TaskFailedListener won't be called, if the job is submitted before the other thread adds addSparkListener. detail can be found at https://github.com/dmlc/xgboost/pull/6019#issuecomment-760937628 --- .../scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala index 62d5eb0b4b71..f838512af67e 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala @@ -613,8 +613,12 @@ object XGBoost extends Serializable { } } sparkJobThread.setUncaughtExceptionHandler(tracker) - sparkJobThread.start() - val trackerReturnVal = parallelismTracker.execute(tracker.waitFor(0L)) + + val trackerReturnVal = parallelismTracker.execute { + sparkJobThread.start() + tracker.waitFor(0L) + } + logger.info(s"Rabit returns with exit code $trackerReturnVal") val (booster, metrics) = postTrackerReturnProcessing(trackerReturnVal, boostersAndMetrics, sparkJobThread)