From e0d49501a4a93d9b5ebda3210b9233e518e512dc Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Fri, 2 Sep 2022 12:05:09 +0800 Subject: [PATCH] [pyspark] Cleanup the comments --- python-package/setup.py | 2 +- python-package/xgboost/spark/core.py | 17 ++++++++++++++++- python-package/xgboost/spark/params.py | 5 ++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/python-package/setup.py b/python-package/setup.py index ee42a9f7e0a0..8a1b1b709514 100644 --- a/python-package/setup.py +++ b/python-package/setup.py @@ -322,7 +322,7 @@ def run(self) -> None: # - python setup.py bdist_wheel && pip install # When XGBoost is compiled directly with CMake: - # - pip install . -e + # - pip install -e . # - python setup.py develop # same as above logging.basicConfig(level=logging.INFO) diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index ca1acdeaebb6..edff40349676 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -713,6 +713,13 @@ def _fit(self, dataset): is_local = _is_local(_get_spark_session().sparkContext) + # Remove the parameters whose value is None + booster_params = {k: v for k, v in booster_params.items() if v is not None} + train_call_kwargs_params = { + k: v for k, v in train_call_kwargs_params.items() if v is not None + } + dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None} + def _train_booster(pandas_df_iter): """Takes in an RDD partition and outputs a booster for that partition after going through the Rabit Ring protocol @@ -737,6 +744,15 @@ def _train_booster(pandas_df_iter): _rabit_args = "" if context.partitionId() == 0: + get_logger("XGBoostPySpark").info( + "booster params: %s\n" + "train_call_kwargs_params: %s\n" + "dmatrix_kwargs: %s", + booster_params, + train_call_kwargs_params, + dmatrix_kwargs, + ) + _rabit_args = str(_get_rabit_args(context, num_workers)) messages = context.allGather(message=str(_rabit_args)) @@ -754,7 +770,6 @@ def _train_booster(pandas_df_iter): dval = [(dtrain, "training"), (dvalid, "validation")] else: dval = None - booster = worker_train( params=booster_params, dtrain=dtrain, diff --git a/python-package/xgboost/spark/params.py b/python-package/xgboost/spark/params.py index ed46ba20ec40..2053b43fce87 100644 --- a/python-package/xgboost/spark/params.py +++ b/python-package/xgboost/spark/params.py @@ -36,7 +36,7 @@ class HasBaseMarginCol(Params): class HasFeaturesCols(Params): """ - Mixin for param featuresCols: a list of feature column names. + Mixin for param features_cols: a list of feature column names. This parameter is taken effect only when use_gpu is enabled. """ @@ -76,8 +76,7 @@ def __init__(self): class HasQueryIdCol(Params): """ - Mixin for param featuresCols: a list of feature column names. - This parameter is taken effect only when use_gpu is enabled. + Mixin for param qid_col: query id column name. """ qid_col = Param(