Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into switch-to-communi…
Browse files Browse the repository at this point in the history
…cator
  • Loading branch information
rongou committed Sep 27, 2022
2 parents c89df47 + 6d14520 commit 01e19ab
Show file tree
Hide file tree
Showing 12 changed files with 100 additions and 111 deletions.
23 changes: 16 additions & 7 deletions python-package/xgboost/data.py
Expand Up @@ -231,13 +231,15 @@ def _is_modin_df(data: DataType) -> bool:
"Int16": "int",
"Int32": "int",
"Int64": "int",
"Float32": "float",
"Float64": "float",
"boolean": "i",
}


_ENABLE_CAT_ERR = (
"When categorical type is supplied, DMatrix parameter `enable_categorical` must "
"be set to `True`."
"When categorical type is supplied, The experimental DMatrix parameter"
"`enable_categorical` must be set to `True`."
)


Expand All @@ -246,7 +248,7 @@ def _invalid_dataframe_dtype(data: DataType) -> None:
# cudf series doesn't have `dtypes`.
if hasattr(data, "dtypes") and hasattr(data.dtypes, "__iter__"):
bad_fields = [
str(data.columns[i])
f"{data.columns[i]}: {dtype}"
for i, dtype in enumerate(data.dtypes)
if dtype.name not in _pandas_dtype_mapper
]
Expand Down Expand Up @@ -296,13 +298,20 @@ def _pandas_feature_info(

def is_nullable_dtype(dtype: PandasDType) -> bool:
"""Wether dtype is a pandas nullable type."""
from pandas.api.types import is_integer_dtype, is_bool_dtype
from pandas.api.types import (
is_integer_dtype,
is_bool_dtype,
is_float_dtype,
is_categorical_dtype,
)

# dtype: pd.core.arrays.numeric.NumericDtype
nullable_alias = {"Int16", "Int32", "Int64"}
nullable_alias = {"Int16", "Int32", "Int64", "Float32", "Float64", "category"}
is_int = is_integer_dtype(dtype) and dtype.name in nullable_alias
# np.bool has alias `bool`, while pd.BooleanDtype has `bzoolean`.
is_bool = is_bool_dtype(dtype) and dtype.name == "boolean"
return is_int or is_bool
is_float = is_float_dtype(dtype) and dtype.name in nullable_alias
return is_int or is_bool or is_float or is_categorical_dtype(dtype)


def _pandas_cat_null(data: DataFrame) -> DataFrame:
Expand Down Expand Up @@ -353,7 +362,7 @@ def _transform_pandas_df(
if not all(
dtype.name in _pandas_dtype_mapper
or is_sparse(dtype)
or is_nullable_dtype(dtype)
or (is_nullable_dtype(dtype) and not is_categorical_dtype(dtype))
or (is_categorical_dtype(dtype) and enable_categorical)
for dtype in data.dtypes
):
Expand Down
8 changes: 7 additions & 1 deletion src/common/common.cu
@@ -1,11 +1,17 @@
/*!
* Copyright 2018 XGBoost contributors
* Copyright 2018-2022 XGBoost contributors
*/
#include "common.h"

namespace xgboost {
namespace common {

void SetDevice(std::int32_t device) {
if (device >= 0) {
dh::safe_cuda(cudaSetDevice(device));
}
}

int AllVisibleGPUs() {
int n_visgpus = 0;
try {
Expand Down
10 changes: 10 additions & 0 deletions src/common/common.h
Expand Up @@ -246,6 +246,16 @@ inline void AssertOneAPISupport() {
#endif // XGBOOST_USE_ONEAPI
}

void SetDevice(std::int32_t device);

#if !defined(XGBOOST_USE_CUDA)
inline void SetDevice(std::int32_t device) {
if (device >= 0) {
AssertGPUSupport();
}
}
#endif

template <typename Idx, typename Container,
typename V = typename Container::value_type,
typename Comp = std::less<V>>
Expand Down
2 changes: 2 additions & 0 deletions src/learner.cc
Expand Up @@ -328,6 +328,8 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
// Just set it to CPU, don't think about it.
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
#endif // defined(XGBOOST_USE_CUDA)

common::SetDevice(this->gpu_id);
}

int32_t GenericParameter::Threads() const {
Expand Down
2 changes: 1 addition & 1 deletion tests/buildkite/pipeline.yml
Expand Up @@ -78,7 +78,7 @@ steps:
command: "tests/buildkite/test-cpp-gpu.sh"
key: test-cpp-gpu
agents:
queue: linux-amd64-mgpu
queue: linux-amd64-gpu
- label: ":console: Run integration tests with JVM packages"
command: "tests/buildkite/test-integration-jvm-packages.sh"
key: test-integration-jvm-packages
Expand Down
20 changes: 5 additions & 15 deletions tests/cpp/common/test_host_device_vector.cu
Expand Up @@ -11,13 +11,14 @@

namespace xgboost {
namespace common {

void SetDevice(int device) {
namespace {
void SetDeviceForTest(int device) {
int n_devices;
dh::safe_cuda(cudaGetDeviceCount(&n_devices));
device %= n_devices;
dh::safe_cuda(cudaSetDevice(device));
}
} // namespace

struct HostDeviceVectorSetDeviceHandler {
template <typename Functor>
Expand Down Expand Up @@ -57,7 +58,7 @@ void InitHostDeviceVector(size_t n, int device, HostDeviceVector<int> *v) {

void PlusOne(HostDeviceVector<int> *v) {
int device = v->DeviceIdx();
SetDevice(device);
SetDeviceForTest(device);
thrust::transform(dh::tcbegin(*v), dh::tcend(*v), dh::tbegin(*v),
[=]__device__(unsigned int a){ return a + 1; });
ASSERT_TRUE(v->DeviceCanWrite());
Expand All @@ -68,7 +69,7 @@ void CheckDevice(HostDeviceVector<int>* v,
unsigned int first,
GPUAccess access) {
ASSERT_EQ(v->Size(), size);
SetDevice(v->DeviceIdx());
SetDeviceForTest(v->DeviceIdx());

ASSERT_TRUE(thrust::equal(dh::tcbegin(*v), dh::tcend(*v),
thrust::make_counting_iterator(first)));
Expand Down Expand Up @@ -182,16 +183,5 @@ TEST(HostDeviceVector, Empty) {
ASSERT_FALSE(another.Empty());
ASSERT_TRUE(vec.Empty());
}

TEST(HostDeviceVector, MGPU_Basic) { // NOLINT
if (AllVisibleGPUs() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}

size_t n = 1001;
int device = 1;
TestHostDeviceVector(n, device);
}
} // namespace common
} // namespace xgboost
35 changes: 0 additions & 35 deletions tests/cpp/common/test_transform_range.cu

This file was deleted.

26 changes: 0 additions & 26 deletions tests/cpp/metric/test_multiclass_metric.cc
Expand Up @@ -84,29 +84,3 @@ TEST(Metric, DeclareUnifiedTest(MultiClassLogLoss)) {
TestMultiClassLogLoss(GPUIDX);
xgboost::CheckDeterministicMetricMultiClass(xgboost::StringView{"mlogloss"}, GPUIDX);
}

#if defined(__CUDACC__)
namespace xgboost {
namespace common {
TEST(Metric, MGPU_MultiClassError) {
if (AllVisibleGPUs() < 2) {
LOG(WARNING) << "Not testing in multi-gpu environment.";
return;
}

{
TestMultiClassError(0);
}
{
TestMultiClassError(1);
}
{
TestMultiClassLogLoss(0);
}
{
TestMultiClassLogLoss(1);
}
}
} // namespace common
} // namespace xgboost
#endif // defined(__CUDACC__)
4 changes: 2 additions & 2 deletions tests/cpp/predictor/test_cpu_predictor.cc
Expand Up @@ -172,7 +172,7 @@ TEST(CpuPredictor, InplacePredict) {
std::string arr_str;
Json::Dump(array_interface, &arr_str);
x->SetArrayData(arr_str.data());
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, -1);
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId);
}

{
Expand All @@ -189,7 +189,7 @@ TEST(CpuPredictor, InplacePredict) {
Json::Dump(col_interface, &col_str);
std::shared_ptr<data::DMatrixProxy> x{new data::DMatrixProxy};
x->SetCSRData(rptr_str.data(), col_str.data(), data_str.data(), kCols, true);
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, -1);
TestInplacePrediction(x, "cpu_predictor", kRows, kCols, Context::kCpuId);
}
}

Expand Down
18 changes: 1 addition & 17 deletions tests/cpp/predictor/test_gpu_predictor.cu
Expand Up @@ -140,26 +140,10 @@ TEST(GPUPredictor, InplacePredictCuDF) {
TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0);
}

TEST(GPUPredictor, MGPU_InplacePredict) { // NOLINT
int32_t n_gpus = xgboost::common::AllVisibleGPUs();
if (n_gpus <= 1) {
LOG(WARNING) << "GPUPredictor.MGPU_InplacePredict is skipped.";
return;
}
size_t constexpr kRows{128}, kCols{64};
RandomDataGenerator gen(kRows, kCols, 0.5);
gen.Device(1);
HostDeviceVector<float> data;
std::string interface_str = gen.GenerateArrayInterface(&data);
std::shared_ptr<DMatrix> p_fmat{new data::DMatrixProxy};
dynamic_cast<data::DMatrixProxy*>(p_fmat.get())->SetCUDAArray(interface_str.c_str());
TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 1);
EXPECT_THROW(TestInplacePrediction(p_fmat, "gpu_predictor", kRows, kCols, 0), dmlc::Error);
}

TEST(GpuPredictor, LesserFeatures) {
TestPredictionWithLesserFeatures("gpu_predictor");
}

// Very basic test of empty model
TEST(GPUPredictor, ShapStump) {
cudaSetDevice(0);
Expand Down
31 changes: 24 additions & 7 deletions tests/python-gpu/test_gpu_prediction.py
Expand Up @@ -148,10 +148,9 @@ def run_inplace_base_margin(self, booster, dtrain, X, base_margin):
from_dmatrix = booster.predict(dtrain)
cp.testing.assert_allclose(from_inplace, from_dmatrix)

@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
def run_inplace_predict_cupy(self, device: int) -> None:
import cupy as cp
cp.cuda.runtime.setDevice(0)
cp.cuda.runtime.setDevice(device)
rows = 1000
cols = 10
missing = 11 # set to integer for testing
Expand All @@ -166,15 +165,17 @@ def test_inplace_predict_cupy(self):

dtrain = xgb.DMatrix(X, y)

booster = xgb.train({'tree_method': 'gpu_hist'}, dtrain, num_boost_round=10)
booster = xgb.train(
{'tree_method': 'gpu_hist', "gpu_id": device}, dtrain, num_boost_round=10
)

test = xgb.DMatrix(X[:10, ...], missing=missing)
predt_from_array = booster.inplace_predict(X[:10, ...], missing=missing)
predt_from_dmatrix = booster.predict(test)

cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)

def predict_dense(x):
cp.cuda.runtime.setDevice(device)
inplace_predt = booster.inplace_predict(x)
d = xgb.DMatrix(x)
copied_predt = cp.array(booster.predict(d))
Expand All @@ -183,7 +184,8 @@ def predict_dense(x):
# Don't do this on Windows, see issue #5793
if sys.platform.startswith("win"):
pytest.skip(
'Multi-threaded in-place prediction with cuPy is not working on Windows')
'Multi-threaded in-place prediction with cuPy is not working on Windows'
)
for i in range(10):
run_threaded_predict(X, rows, predict_dense)

Expand All @@ -196,13 +198,28 @@ def predict_dense(x):

missing_idx = [i for i in range(0, X.shape[1], 16)]
X[:, missing_idx] = missing
reg = xgb.XGBRegressor(tree_method="gpu_hist", n_estimators=8, missing=missing)
reg = xgb.XGBRegressor(
tree_method="gpu_hist", n_estimators=8, missing=missing, gpu_id=device
)
reg.fit(X, y)

gpu_predt = reg.predict(X)
reg.set_params(predictor="cpu_predictor")
cpu_predt = reg.predict(X)
np.testing.assert_allclose(gpu_predt, cpu_predt, atol=1e-6)
cp.cuda.runtime.setDevice(0)

@pytest.mark.skipif(**tm.no_cupy())
def test_inplace_predict_cupy(self):
self.run_inplace_predict_cupy(0)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.mgpu
def test_inplace_predict_cupy_specified_device(self):
import cupy as cp
n_devices = cp.cuda.runtime.getDeviceCount()
for d in range(n_devices):
self.run_inplace_predict_cupy(d)

@pytest.mark.skipif(**tm.no_cupy())
@pytest.mark.skipif(**tm.no_cudf())
Expand Down
32 changes: 32 additions & 0 deletions tests/python/test_with_pandas.py
Expand Up @@ -330,3 +330,35 @@ def test_bool(dtype) -> bytes:
b0 = test_bool(pd.BooleanDtype())
b1 = test_bool(bool)
assert b0 != b1 # None is converted to False with np.bool

data = {"f0": [1.0, 2.0, None, 3.0], "f1": [3.0, 2.0, None, 1.0]}

arr = np.array([data["f0"], data["f1"]]).T
Xy = xgb.DMatrix(arr, y)
Xy.feature_types = None
Xy.feature_names = None
from_np = to_bytes(Xy)

def test_float(dtype) -> bytes:
arr = pd.DataFrame(data, dtype=dtype)
Xy = xgb.DMatrix(arr, y)
Xy.feature_types = None
Xy.feature_names = None
return to_bytes(Xy)

b0 = test_float(pd.Float64Dtype())
b1 = test_float(float)
assert b0 == b1 # both are converted to NaN
assert b0 == from_np

def test_cat(dtype) -> bytes:
arr = pd.DataFrame(data, dtype=dtype)
if dtype is None:
arr = arr.astype("category")
Xy = xgb.DMatrix(arr, y, enable_categorical=True)
Xy.feature_types = None
return to_bytes(Xy)

b0 = test_cat(pd.CategoricalDtype())
b1 = test_cat(None)
assert b0 == b1

0 comments on commit 01e19ab

Please sign in to comment.