Skip to content

Commit

Permalink
fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Jul 21, 2022
1 parent cc9ae98 commit 27d7cb8
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 17 deletions.
2 changes: 1 addition & 1 deletion python-package/xgboost/compat.py
Expand Up @@ -131,7 +131,7 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem

return CUDF_concat(value, axis=0)
if lazy_isinstance(value[0], "cupy._core.core", "ndarray"):
import cupy
import cupy # pylint: disable=import-error

# pylint: disable=c-extension-no-member,no-member
d = cupy.cuda.runtime.getDevice()
Expand Down
1 change: 1 addition & 0 deletions python-package/xgboost/dask.py
Expand Up @@ -227,6 +227,7 @@ def __init__(self, args: List[bytes]) -> None:


def dconcat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statements
"""Concatenate sequence of partitions."""
try:
return concat(value)
except TypeError:
Expand Down
8 changes: 3 additions & 5 deletions python-package/xgboost/spark/core.py
Expand Up @@ -715,7 +715,7 @@ def _transform(self, dataset):
alias.margin
)

@pandas_udf(FloatType())
@pandas_udf("double")
def predict_udf(iterator: Iterator[pd.DataFrame]) -> Iterator[pd.Series]:
model = xgb_sklearn_model
for data in iterator:
Expand Down Expand Up @@ -779,17 +779,15 @@ def transform_margin(margins: np.ndarray):
classone_probs = expit(margins)
classzero_probs = 1.0 - classone_probs
raw_preds = np.vstack((-margins, margins)).transpose()
class_probs = np.vstack(
(classzero_probs, classone_probs)
).transpose()
class_probs = np.vstack((classzero_probs, classone_probs)).transpose()
else:
# multinomial case
raw_preds = margins
class_probs = softmax(raw_preds, axis=1)
return raw_preds, class_probs

@pandas_udf(
"rawPrediction array<float>, prediction float, probability array<float>"
"rawPrediction array<double>, prediction double, probability array<double>"
)
def predict_udf(
iterator: Iterator[Tuple[pd.Series, ...]]
Expand Down
19 changes: 10 additions & 9 deletions python-package/xgboost/spark/data.py
@@ -1,27 +1,23 @@
"""Utilities for processing spark partitions."""
from collections import defaultdict, namedtuple
from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple

import numpy as np
import pandas as pd

from xgboost.compat import concat

from xgboost import DMatrix


def stack_series(df: pd.Series) -> np.ndarray:
array = df.to_numpy(copy=False)
def stack_series(series: pd.Series) -> np.ndarray:
"""Stack a series of arrays."""
array = series.to_numpy(copy=False)
if array.ndim == 1:
array = array.reshape(array.shape[0], 1)
array = np.stack(array[:, 0])
return array


def concat_or_none(seq: Optional[Sequence[np.ndarray]]) -> Optional[np.ndarray]:
if seq:
return concat(seq)
return None


# Global constant for defining column alias shared between estimator and data
# processing procedures.
Alias = namedtuple("Alias", ("data", "label", "weight", "margin", "valid"))
Expand Down Expand Up @@ -71,6 +67,11 @@ def make_blob(part: pd.DataFrame, is_valid: bool) -> None:
if valid is not None:
make_blob(valid, True)

def concat_or_none(seq: Optional[Sequence[np.ndarray]]) -> Optional[np.ndarray]:
if seq:
return concat(seq)
return None

def make(values: Dict[str, List[np.ndarray]]) -> DMatrix:
data = concat_or_none(values[alias.data])
label = concat_or_none(values[alias.label])
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/lint_python.py
Expand Up @@ -18,7 +18,7 @@ def run_formatter(rel_path: str):
if isort_ret != 0 or black_ret != 0:
msg = (
"Please run the following command on your machine to address the format"
f" errors:\n isort --check --profile=black {rel_path}\n black {rel_path}\n"
f" errors:\n isort --profile=black {rel_path}\n black {rel_path}\n"
)
print(msg, file=sys.stdout)
return False
Expand Down
1 change: 0 additions & 1 deletion tests/python/test_spark/test_spark_local.py
Expand Up @@ -780,7 +780,6 @@ def test_regressor_with_weight_eval(self):
f"Expected best score: {self.reg_with_eval_best_score}, but ",
f"get {model_with_eval._xgb_sklearn_model.best_score}",
)
return

pred_result_with_eval = model_with_eval.transform(
self.reg_df_test_with_eval_weight
Expand Down

0 comments on commit 27d7cb8

Please sign in to comment.