From ba0534ac6fd7a04bbb0ddb370a0ce1dd8980d759 Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Mon, 18 Jan 2021 10:44:01 +0800 Subject: [PATCH] fix potential TaskFailedListener's callback won't be called there is possibility that onJobStart of TaskFailedListener won't be called, if the job is submitted before the other thread adds addSparkListener. detail can be found at https://github.com/dmlc/xgboost/pull/6019#issuecomment-760937628 --- .../scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala index 62d5eb0b4b71..f838512af67e 100644 --- a/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala +++ b/jvm-packages/xgboost4j-spark/src/main/scala/ml/dmlc/xgboost4j/scala/spark/XGBoost.scala @@ -613,8 +613,12 @@ object XGBoost extends Serializable { } } sparkJobThread.setUncaughtExceptionHandler(tracker) - sparkJobThread.start() - val trackerReturnVal = parallelismTracker.execute(tracker.waitFor(0L)) + + val trackerReturnVal = parallelismTracker.execute { + sparkJobThread.start() + tracker.waitFor(0L) + } + logger.info(s"Rabit returns with exit code $trackerReturnVal") val (booster, metrics) = postTrackerReturnProcessing(trackerReturnVal, boostersAndMetrics, sparkJobThread)