Skip to content

Commit

Permalink
Optionaly fail when gpu_id is set to invalid value (#6342)
Browse files Browse the repository at this point in the history
  • Loading branch information
honzasterba committed Nov 28, 2020
1 parent 956beea commit b0036b3
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 1 deletion.
6 changes: 6 additions & 0 deletions include/xgboost/generic_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <string>

namespace xgboost {

struct GenericParameter : public XGBoostParameter<GenericParameter> {
// Constant representing the device ID of CPU.
static int32_t constexpr kCpuId = -1;
Expand All @@ -26,6 +27,8 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
int nthread;
// primary device, -1 means no gpu.
int gpu_id;
// fail when gpu_id is invalid
bool fail_on_invalid_gpu_id {false};
// gpu page size in external memory mode, 0 means using the default.
size_t gpu_page_size;
bool enable_experimental_json_serialization {true};
Expand Down Expand Up @@ -64,6 +67,9 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
.set_default(-1)
.set_lower_bound(-1)
.describe("The primary GPU device ordinal.");
DMLC_DECLARE_FIELD(fail_on_invalid_gpu_id)
.set_default(false)
.describe("Fail with error when gpu_id is invalid.");
DMLC_DECLARE_FIELD(gpu_page_size)
.set_default(0)
.set_lower_bound(0)
Expand Down
4 changes: 4 additions & 0 deletions src/learner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ void GenericParameter::ConfigureGpuId(bool require_gpu) {
LOG(WARNING) << "No visible GPU is found, setting `gpu_id` to -1";
}
this->UpdateAllowUnknown(Args{{"gpu_id", std::to_string(kCpuId)}});
} else if (fail_on_invalid_gpu_id) {
CHECK(gpu_id == kCpuId || gpu_id < n_gpus)
<< "Only " << n_gpus << " GPUs are visible, gpu_id "
<< gpu_id << " is invalid.";
} else if (gpu_id != kCpuId && gpu_id >= n_gpus) {
LOG(WARNING) << "Only " << n_gpus
<< " GPUs are visible, setting `gpu_id` to " << gpu_id % n_gpus;
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ class GPUPredictor : public xgboost::Predictor {
Predictor::Predictor{generic_param} {}

~GPUPredictor() override {
if (generic_param_->gpu_id >= 0) {
if (generic_param_->gpu_id >= 0 && generic_param_->gpu_id < common::AllVisibleGPUs()) {
dh::safe_cuda(cudaSetDevice(generic_param_->gpu_id));
}
}
Expand Down
14 changes: 14 additions & 0 deletions tests/python-gpu/test_gpu_basic_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,17 @@ def test_deterministic_gpu_hist(self):

model_0, model_1 = self.run_cls(X, y, False)
assert model_0 != model_1

def test_invalid_gpu_id(self):
X = np.random.randn(10, 5) * 1e4
y = np.random.randint(0, 2, size=10) * 1e4
# should pass with invalid gpu id
cls1 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999)
cls1.fit(X, y)
# should throw error with fail_on_invalid_gpu_id enabled
cls2 = xgb.XGBClassifier(tree_method='gpu_hist', gpu_id=9999, fail_on_invalid_gpu_id=True)
try:
cls2.fit(X, y)
assert False, "Should have failed with with fail_on_invalid_gpu_id enabled"
except xgb.core.XGBoostError as err:
assert "gpu_id 9999 is invalid" in str(err)

0 comments on commit b0036b3

Please sign in to comment.