Skip to content

Commit

Permalink
Display rank too.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Oct 9, 2020
1 parent cbceb99 commit a02cd2c
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
16 changes: 8 additions & 8 deletions src/common/device_helpers.cuh
Expand Up @@ -389,10 +389,11 @@ template <typename T>
using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1

inline void ThrowOOMError(std::exception const& e, size_t bytes) {
inline void ThrowOOMError(std::string const& err, size_t bytes) {
auto device = CurrentDevice();
auto rank = rabit::GetRank();
std::stringstream ss;
ss << "Memory allocation error: " << e.what() << "\n"
ss << "Memory allocation error on worker " << rank << ": " << err << "\n"
<< "- Free memory: " << AvailableMemory(device) << "\n"
<< "- Requested memory: " << bytes << std::endl;
LOG(FATAL) << ss.str();
Expand All @@ -415,7 +416,7 @@ struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
try {
ptr = SuperT::allocate(n);
} catch (const std::exception& e) {
ThrowOOMError(e, n * sizeof(T));
ThrowOOMError(e.what(), n * sizeof(T));
}
GlobalMemoryLogger().RegisterAllocation(ptr.get(), n * sizeof(T));
return ptr;
Expand Down Expand Up @@ -451,11 +452,10 @@ struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
}
pointer allocate(size_t n) { // NOLINT
T* ptr;
try {
GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
n * sizeof(T));
} catch (const std::exception& e) {
ThrowOOMError(e, n * sizeof(T));
auto errc = GetGlobalCachingAllocator().DeviceAllocate(reinterpret_cast<void **>(&ptr),
n * sizeof(T));
if (errc != cudaSuccess) {
ThrowOOMError("Caching allocator", n * sizeof(T));
}
pointer thrust_ptr{ ptr };
GlobalMemoryLogger().RegisterAllocation(thrust_ptr.get(), n * sizeof(T));
Expand Down
6 changes: 3 additions & 3 deletions tests/cpp/common/test_device_helpers.cu
Expand Up @@ -158,9 +158,9 @@ TEST(SegmentedUnique, Regression) {
}

TEST(Allocator, OOM) {
auto size = dh::AvailableMemory() * 2;
ASSERT_THROW({dh::caching_device_vector<char> vec(size)}, dmlc::Error);
ASSERT_THROW({dh::device_vector<char> vec(size)}, dmlc::Error);
auto size = dh::AvailableMemory(0) * 4;
ASSERT_THROW({dh::caching_device_vector<char> vec(size);}, dmlc::Error);
ASSERT_THROW({dh::device_vector<char> vec(size);}, dmlc::Error);
}
} // namespace common
} // namespace xgboost

0 comments on commit a02cd2c

Please sign in to comment.