Skip to content

Commit

Permalink
remove debug logging
Browse files Browse the repository at this point in the history
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
  • Loading branch information
WeichenXu123 committed Sep 13, 2022
1 parent 9f8f64e commit 8596b89
Showing 1 changed file with 1 addition and 11 deletions.
12 changes: 1 addition & 11 deletions python-package/xgboost/spark/core.py
Expand Up @@ -684,16 +684,6 @@ def _fit(self, dataset):
num_workers,
)

def log_partition_rows(df, msg):
def count_partition_rows(iter):
yield len(list(iter))

result = df.rdd.mapPartitions(count_partition_rows).collect()
get_logger(self.__class__.__name__).warning(
f"debug-repartition: {msg}: {str(list(result))}\n"
)

log_partition_rows(dataset, "before-repartition")
if self._repartition_needed(dataset) or (
self.isDefined(self.validationIndicatorCol)
and self.getOrDefault(self.validationIndicatorCol)
Expand All @@ -706,7 +696,7 @@ def count_partition_rows(iter):
# result unbalance. Directly using `.repartition(N)` might result in some
# empty partitions.
dataset = dataset.repartition(num_workers, rand(1))
log_partition_rows(dataset, "after-repartition")

train_params = self._get_distributed_train_params(dataset)
booster_params, train_call_kwargs_params = self._get_xgb_train_call_args(
train_params
Expand Down

0 comments on commit 8596b89

Please sign in to comment.