fixes.

dmlc · Jul 21, 2022 · 27d7cb8 · 27d7cb8
1 parent cc9ae98
commit 27d7cb8
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 17 deletions.
diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py
@@ -131,7 +131,7 @@ def concat(value: Sequence[_T]) -> _T:  # pylint: disable=too-many-return-statem
 
         return CUDF_concat(value, axis=0)
     if lazy_isinstance(value[0], "cupy._core.core", "ndarray"):
-        import cupy
+        import cupy  # pylint: disable=import-error
 
         # pylint: disable=c-extension-no-member,no-member
         d = cupy.cuda.runtime.getDevice()

diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py
@@ -227,6 +227,7 @@ def __init__(self, args: List[bytes]) -> None:
 
 
 def dconcat(value: Sequence[_T]) -> _T:  # pylint: disable=too-many-return-statements
+    """Concatenate sequence of partitions."""
     try:
         return concat(value)
     except TypeError:

diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py
@@ -715,7 +715,7 @@ def _transform(self, dataset):
                 alias.margin
             )
 
-        @pandas_udf(FloatType())
+        @pandas_udf("double")
         def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
             model = xgb_sklearn_model
             for data in iterator:
@@ -779,17 +779,15 @@ def transform_margin(margins: np.ndarray):
                 classone_probs = expit(margins)
                 classzero_probs = 1.0 - classone_probs
                 raw_preds = np.vstack((-margins, margins)).transpose()
-                class_probs = np.vstack(
-                    (classzero_probs, classone_probs)
-                ).transpose()
+                class_probs = np.vstack((classzero_probs, classone_probs)).transpose()
             else:
                 # multinomial case
                 raw_preds = margins
                 class_probs = softmax(raw_preds, axis=1)
             return raw_preds, class_probs
 
         @pandas_udf(
-            "rawPrediction array<float>, prediction float, probability array<float>"
+            "rawPrediction array<double>, prediction double, probability array<double>"
         )
         def predict_udf(
             iterator: Iterator[Tuple[pd.Series, ...]]

diff --git a/python-package/xgboost/spark/data.py b/python-package/xgboost/spark/data.py
@@ -1,27 +1,23 @@
+"""Utilities for processing spark partitions."""
 from collections import defaultdict, namedtuple
 from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
 
 import numpy as np
 import pandas as pd
-
 from xgboost.compat import concat
+
 from xgboost import DMatrix
 
 
-def stack_series(df: pd.Series) -> np.ndarray:
-    array = df.to_numpy(copy=False)
+def stack_series(series: pd.Series) -> np.ndarray:
+    """Stack a series of arrays."""
+    array = series.to_numpy(copy=False)
     if array.ndim == 1:
         array = array.reshape(array.shape[0], 1)
     array = np.stack(array[:, 0])
     return array
 
 
-def concat_or_none(seq: Optional[Sequence[np.ndarray]]) -> Optional[np.ndarray]:
-    if seq:
-        return concat(seq)
-    return None
-
-
 # Global constant for defining column alias shared between estimator and data
 # processing procedures.
 Alias = namedtuple("Alias", ("data", "label", "weight", "margin", "valid"))
@@ -71,6 +67,11 @@ def make_blob(part: pd.DataFrame, is_valid: bool) -> None:
         if valid is not None:
             make_blob(valid, True)
 
+    def concat_or_none(seq: Optional[Sequence[np.ndarray]]) -> Optional[np.ndarray]:
+        if seq:
+            return concat(seq)
+        return None
+
     def make(values: Dict[str, List[np.ndarray]]) -> DMatrix:
         data = concat_or_none(values[alias.data])
         label = concat_or_none(values[alias.label])

diff --git a/tests/ci_build/lint_python.py b/tests/ci_build/lint_python.py
@@ -18,7 +18,7 @@ def run_formatter(rel_path: str):
     if isort_ret != 0 or black_ret != 0:
         msg = (
             "Please run the following command on your machine to address the format"
-            f" errors:\n isort --check --profile=black {rel_path}\n black {rel_path}\n"
+            f" errors:\n isort --profile=black {rel_path}\n black {rel_path}\n"
         )
         print(msg, file=sys.stdout)
         return False

diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
@@ -780,7 +780,6 @@ def test_regressor_with_weight_eval(self):
             f"Expected best score: {self.reg_with_eval_best_score}, but ",
             f"get {model_with_eval._xgb_sklearn_model.best_score}",
         )
-        return
 
         pred_result_with_eval = model_with_eval.transform(
             self.reg_df_test_with_eval_weight