Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Build custom argsort for GPU quantile sketching. #9194

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CMakeLists.txt
Expand Up @@ -149,6 +149,8 @@ if (USE_CUDA)
set(GEN_CODE "")
format_gencode_flags("${GPU_COMPUTE_VER}" GEN_CODE)
add_subdirectory(${PROJECT_SOURCE_DIR}/gputreeshap)

find_package(CUDAToolkit REQUIRED)
endif (USE_CUDA)

if (FORCE_COLORED_OUTPUT AND (CMAKE_GENERATOR STREQUAL "Ninja") AND
Expand Down
16 changes: 6 additions & 10 deletions cmake/Utils.cmake
Expand Up @@ -124,13 +124,6 @@ function(format_gencode_flags flags out)
endif (CMAKE_VERSION VERSION_GREATER_EQUAL "3.18")
endfunction(format_gencode_flags flags)

macro(enable_nvtx target)
find_package(NVTX REQUIRED)
target_include_directories(${target} PRIVATE "${NVTX_INCLUDE_DIR}")
target_link_libraries(${target} PRIVATE "${NVTX_LIBRARY}")
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
endmacro()

# Set CUDA related flags to target. Must be used after code `format_gencode_flags`.
function(xgboost_set_cuda_flags target)
target_compile_options(${target} PRIVATE
Expand Down Expand Up @@ -162,11 +155,14 @@ function(xgboost_set_cuda_flags target)
endif (USE_DEVICE_DEBUG)

if (USE_NVTX)
enable_nvtx(${target})
target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_NVTX=1)
endif (USE_NVTX)

target_compile_definitions(${target} PRIVATE -DXGBOOST_USE_CUDA=1)
target_include_directories(${target} PRIVATE ${xgboost_SOURCE_DIR}/gputreeshap)
target_include_directories(
${target} PRIVATE
${xgboost_SOURCE_DIR}/gputreeshap
${CUDAToolkit_INCLUDE_DIRS})

if (MSVC)
target_compile_options(${target} PRIVATE
Expand Down Expand Up @@ -289,7 +285,7 @@ macro(xgboost_target_link_libraries target)
endif (USE_NCCL)

if (USE_NVTX)
enable_nvtx(${target})
target_link_libraries(${target} PRIVATE CUDA::nvToolsExt)
endif (USE_NVTX)

if (RABIT_BUILD_MPI)
Expand Down
26 changes: 0 additions & 26 deletions cmake/modules/FindNVTX.cmake

This file was deleted.

47 changes: 42 additions & 5 deletions include/xgboost/span.h
@@ -1,5 +1,5 @@
/*!
* Copyright 2018 XGBoost contributors
/**
* Copyright 2018-2023, XGBoost contributors
* \brief span class based on ISO++20 span
*
* About NOLINTs in this file:
Expand Down Expand Up @@ -32,11 +32,12 @@
#include <xgboost/base.h>
#include <xgboost/logging.h>

#include <cinttypes> // size_t
#include <limits> // numeric_limits
#include <cinttypes> // size_t
#include <cstdio>
#include <iterator>
#include <limits> // numeric_limits
#include <type_traits>
#include <cstdio>
#include <utility> // for move

#if defined(__CUDACC__)
#include <cuda_runtime.h>
Expand Down Expand Up @@ -668,6 +669,42 @@ XGBOOST_DEVICE auto as_writable_bytes(Span<T, E> s) __span_noexcept -> // NOLIN
Span<byte, detail::ExtentAsBytesValue<T, E>::value> {
return {reinterpret_cast<byte*>(s.data()), s.size_bytes()};
}

// A simple custom Span type that uses general iterator instead of pointer.
template <typename It>
class IterSpan {
public:
using element_type = typename std::iterator_traits<It>::value_type; // NOLINT
using index_type = std::size_t; // NOLINT
using iterator = It; // NOLINT

private:
It it_;
index_type size_{0};

public:
IterSpan() = default;
XGBOOST_DEVICE IterSpan(It it, index_type size) : it_{std::move(it)}, size_{size} {}
XGBOOST_DEVICE explicit IterSpan(common::Span<It, dynamic_extent> span)
: it_{span.data()}, size_{span.size()} {}

XGBOOST_DEVICE index_type size() const { return size_; } // NOLINT
XGBOOST_DEVICE decltype(auto) operator[](index_type i) const { return it_[i]; }
XGBOOST_DEVICE decltype(auto) operator[](index_type i) { return it_[i]; }
XGBOOST_DEVICE bool empty() const { return size() == 0; } // NOLINT
XGBOOST_DEVICE It data() const { return it_; } // NOLINT
XGBOOST_DEVICE IterSpan<It> subspan( // NOLINT
index_type _offset, index_type _count = dynamic_extent) const {
SPAN_CHECK((_count == dynamic_extent) ? (_offset <= size()) : (_offset + _count <= size()));
return {data() + _offset, _count == dynamic_extent ? size() - _offset : _count};
}
XGBOOST_DEVICE constexpr iterator begin() const __span_noexcept { // NOLINT
return {this, 0};
}
XGBOOST_DEVICE constexpr iterator end() const __span_noexcept { // NOLINT
return {this, size()};
}
};
} // namespace common
} // namespace xgboost

Expand Down
2 changes: 1 addition & 1 deletion python-package/xgboost/data.py
Expand Up @@ -882,7 +882,7 @@ def _transform_cupy_array(data: DataType) -> CupyT:

if not hasattr(data, "__cuda_array_interface__") and hasattr(data, "__array__"):
data = cupy.array(data, copy=False)
if data.dtype.hasobject or data.dtype in [cupy.float16, cupy.bool_]:
if data.dtype.hasobject or data.dtype in [cupy.bool_]:
data = data.astype(cupy.float32, copy=False)
return data

Expand Down