/
device_adapter.cuh
216 lines (198 loc) · 7.08 KB
/
device_adapter.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
/*!
* Copyright (c) 2019 by Contributors
* \file device_adapter.cuh
*/
#ifndef XGBOOST_DATA_DEVICE_ADAPTER_H_
#define XGBOOST_DATA_DEVICE_ADAPTER_H_
#include <limits>
#include <memory>
#include <string>
#include "../common/device_helpers.cuh"
#include "../common/math.h"
#include "adapter.h"
#include "array_interface.h"
namespace xgboost {
namespace data {
class CudfAdapterBatch : public detail::NoMetaInfo {
friend class CudfAdapter;
public:
CudfAdapterBatch() = default;
CudfAdapterBatch(common::Span<ArrayInterface<1>> columns, size_t num_rows)
: columns_(columns),
num_rows_(num_rows) {}
size_t Size() const { return num_rows_ * columns_.size(); }
__device__ COOTuple GetElement(size_t idx) const {
size_t column_idx = idx % columns_.size();
size_t row_idx = idx / columns_.size();
auto const& column = columns_[column_idx];
float value = column.valid.Data() == nullptr || column.valid.Check(row_idx)
? column(row_idx)
: std::numeric_limits<float>::quiet_NaN();
return {row_idx, column_idx, value};
}
XGBOOST_DEVICE bst_row_t NumRows() const { return num_rows_; }
XGBOOST_DEVICE bst_row_t NumCols() const { return columns_.size(); }
private:
common::Span<ArrayInterface<1>> columns_;
size_t num_rows_{0};
};
/*!
* Please be careful that, in official specification, the only three required
* fields are `shape', `version' and `typestr'. Any other is optional,
* including `data'. But here we have one additional requirements for input
* data:
*
* - `data' field is required, passing in an empty dataset is not accepted, as
* most (if not all) of our algorithms don't have test for empty dataset. An
* error is better than a crash.
*
* What if invalid value from dataframe is 0 but I specify missing=NaN in
* XGBoost? Since validity mask is ignored, all 0s are preserved in XGBoost.
*
* FIXME(trivialfis): Put above into document after we have a consistent way for
* processing input data.
*
* Sample input:
* [
* {
* "shape": [
* 10
* ],
* "strides": [
* 4
* ],
* "data": [
* 30074864128,
* false
* ],
* "typestr": "<f4",
* "version": 1,
* "mask": {
* "shape": [
* 64
* ],
* "strides": [
* 1
* ],
* "data": [
* 30074864640,
* false
* ],
* "typestr": "|i1",
* "version": 1
* }
* }
* ]
*/
class CudfAdapter : public detail::SingleBatchDataIter<CudfAdapterBatch> {
public:
explicit CudfAdapter(StringView cuda_interfaces_str) {
Json interfaces = Json::Load(cuda_interfaces_str);
std::vector<Json> const& json_columns = get<Array>(interfaces);
size_t n_columns = json_columns.size();
CHECK_GT(n_columns, 0) << "Number of columns must not equal to 0.";
auto const& typestr = get<String const>(json_columns[0]["typestr"]);
CHECK_EQ(typestr.size(), 3) << ArrayInterfaceErrors::TypestrFormat();
std::vector<ArrayInterface<1>> columns;
auto first_column = ArrayInterface<1>(get<Object const>(json_columns[0]));
num_rows_ = first_column.Shape(0);
if (num_rows_ == 0) {
return;
}
device_idx_ = dh::CudaGetPointerDevice(first_column.data);
CHECK_NE(device_idx_, Context::kCpuId);
dh::safe_cuda(cudaSetDevice(device_idx_));
for (auto& json_col : json_columns) {
auto column = ArrayInterface<1>(get<Object const>(json_col));
columns.push_back(column);
num_rows_ = std::max(num_rows_, column.Shape(0));
CHECK_EQ(device_idx_, dh::CudaGetPointerDevice(column.data))
<< "All columns should use the same device.";
CHECK_EQ(num_rows_, column.Shape(0))
<< "All columns should have same number of rows.";
}
columns_ = columns;
batch_ = CudfAdapterBatch(dh::ToSpan(columns_), num_rows_);
}
explicit CudfAdapter(std::string cuda_interfaces_str)
: CudfAdapter{StringView{cuda_interfaces_str}} {}
const CudfAdapterBatch& Value() const override {
CHECK_EQ(batch_.columns_.data(), columns_.data().get());
return batch_;
}
size_t NumRows() const { return num_rows_; }
size_t NumColumns() const { return columns_.size(); }
int32_t DeviceIdx() const { return device_idx_; }
private:
CudfAdapterBatch batch_;
dh::device_vector<ArrayInterface<1>> columns_;
size_t num_rows_{0};
int32_t device_idx_{Context::kCpuId};
};
class CupyAdapterBatch : public detail::NoMetaInfo {
public:
CupyAdapterBatch() = default;
explicit CupyAdapterBatch(ArrayInterface<2> array_interface)
: array_interface_(std::move(array_interface)) {}
size_t Size() const {
return array_interface_.Shape(0) * array_interface_.Shape(1);
}
__device__ COOTuple GetElement(size_t idx) const {
size_t column_idx = idx % array_interface_.Shape(1);
size_t row_idx = idx / array_interface_.Shape(1);
float value = array_interface_(row_idx, column_idx);
return {row_idx, column_idx, value};
}
XGBOOST_DEVICE bst_row_t NumRows() const { return array_interface_.Shape(0); }
XGBOOST_DEVICE bst_row_t NumCols() const { return array_interface_.Shape(1); }
private:
ArrayInterface<2> array_interface_;
};
class CupyAdapter : public detail::SingleBatchDataIter<CupyAdapterBatch> {
public:
explicit CupyAdapter(StringView cuda_interface_str) {
Json json_array_interface = Json::Load(cuda_interface_str);
array_interface_ = ArrayInterface<2>(get<Object const>(json_array_interface));
batch_ = CupyAdapterBatch(array_interface_);
if (array_interface_.Shape(0) == 0) {
return;
}
device_idx_ = dh::CudaGetPointerDevice(array_interface_.data);
CHECK_NE(device_idx_, Context::kCpuId);
}
explicit CupyAdapter(std::string cuda_interface_str)
: CupyAdapter{StringView{cuda_interface_str}} {}
const CupyAdapterBatch& Value() const override { return batch_; }
size_t NumRows() const { return array_interface_.Shape(0); }
size_t NumColumns() const { return array_interface_.Shape(1); }
int32_t DeviceIdx() const { return device_idx_; }
private:
ArrayInterface<2> array_interface_;
CupyAdapterBatch batch_;
int32_t device_idx_ {Context::kCpuId};
};
// Returns maximum row length
template <typename AdapterBatchT>
size_t GetRowCounts(const AdapterBatchT batch, common::Span<size_t> offset,
int device_idx, float missing) {
dh::safe_cuda(cudaSetDevice(device_idx));
IsValidFunctor is_valid(missing);
// Count elements per row
dh::LaunchN(batch.Size(), [=] __device__(size_t idx) {
auto element = batch.GetElement(idx);
if (is_valid(element)) {
atomicAdd(reinterpret_cast<unsigned long long*>( // NOLINT
&offset[element.row_idx]),
static_cast<unsigned long long>(1)); // NOLINT
}
});
dh::XGBCachingDeviceAllocator<char> alloc;
size_t row_stride = dh::Reduce(
thrust::cuda::par(alloc), thrust::device_pointer_cast(offset.data()),
thrust::device_pointer_cast(offset.data()) + offset.size(), size_t(0),
thrust::maximum<size_t>());
return row_stride;
}
}; // namespace data
} // namespace xgboost
#endif // XGBOOST_DATA_DEVICE_ADAPTER_H_