Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup data generator. #8094

Merged
merged 1 commit into from Jul 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions tests/cpp/data/test_iterative_dmatrix.cu
Expand Up @@ -27,8 +27,8 @@ void TestEquivalent(float sparsity) {
offset += num_elements;
}
auto from_iter = page_concatenated->GetDeviceAccessor(0);
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::kCols);
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::kRows);
ASSERT_EQ(m.Info().num_col_, CudaArrayIterForTest::Cols());
ASSERT_EQ(m.Info().num_row_, CudaArrayIterForTest::Rows());

std::string interface_str = iter.AsArray();
auto adapter = CupyAdapter(interface_str);
Expand Down Expand Up @@ -98,8 +98,8 @@ TEST(IterativeDeviceDMatrix, RowMajor) {
auto impl = ellpack.Impl();
common::CompressedIterator<uint32_t> iterator(
impl->gidx_buffer.HostVector().data(), impl->NumSymbols());
auto cols = CudaArrayIterForTest::kCols;
auto rows = CudaArrayIterForTest::kRows;
auto cols = CudaArrayIterForTest::Cols();
auto rows = CudaArrayIterForTest::Rows();

auto j_interface =
Json::Load({interface_str.c_str(), interface_str.size()});
Expand Down
58 changes: 36 additions & 22 deletions tests/cpp/helpers.cc
@@ -1,25 +1,27 @@
/*!
* Copyright 2016-2022 by XGBoost contributors
*/
#include "helpers.h"

#include <dmlc/filesystem.h>
#include <xgboost/logging.h>
#include <xgboost/objective.h>
#include <xgboost/metric.h>
#include <xgboost/learner.h>
#include <gtest/gtest.h>
#include <xgboost/gbm.h>
#include <xgboost/json.h>
#include <gtest/gtest.h>
#include <xgboost/learner.h>
#include <xgboost/logging.h>
#include <xgboost/metric.h>
#include <xgboost/objective.h>

#include <algorithm>
#include <random>
#include <cinttypes>
#include <random>

#include "helpers.h"
#include "xgboost/c_api.h"
#include "../../src/data/adapter.h"
#include "../../src/data/iterative_dmatrix.h"
#include "../../src/data/simple_dmatrix.h"
#include "../../src/data/sparse_page_dmatrix.h"
#include "../../src/gbm/gbtree_model.h"
#include "xgboost/c_api.h"
#include "xgboost/predictor.h"

#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
Expand Down Expand Up @@ -379,6 +381,30 @@ RandomDataGenerator::GenerateDMatrix(bool with_label, bool float_label,
return out;
}

std::shared_ptr<DMatrix> RandomDataGenerator::GenerateQuantileDMatrix() {
NumpyArrayIterForTest iter{this->sparsity_, this->rows_, this->cols_, 1};
auto m = std::make_shared<data::IterativeDMatrix>(
&iter, iter.Proxy(), Reset, Next, std::numeric_limits<float>::quiet_NaN(), 0, bins_);
return m;
}

NumpyArrayIterForTest::NumpyArrayIterForTest(float sparsity, size_t rows, size_t cols,
size_t batches)
: ArrayIterForTest{sparsity, rows, cols, batches} {
rng_->Device(Context::kCpuId);
std::tie(batches_, interface_) = rng_->GenerateArrayInterfaceBatch(&data_, n_batches_);
this->Reset();
}

int NumpyArrayIterForTest::Next() {
if (iter_ == n_batches_) {
return 0;
}
XGProxyDMatrixSetDataDense(proxy_, batches_[iter_].c_str());
iter_++;
return 1;
}

std::shared_ptr<DMatrix>
GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
data::DenseAdapter adapter(x.data(), num_rows, num_columns);
Expand All @@ -389,7 +415,7 @@ GetDMatrixFromData(const std::vector<float> &x, int num_rows, int num_columns){
std::unique_ptr<DMatrix> CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features,
size_t n_batches, std::string prefix) {
CHECK_GE(n_samples, n_batches);
ArrayIterForTest iter(0, n_samples, n_features, n_batches);
NumpyArrayIterForTest iter(0, n_samples, n_features, n_batches);

std::unique_ptr<DMatrix> dmat{
DMatrix::Create(static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
Expand All @@ -416,7 +442,7 @@ std::unique_ptr<DMatrix> CreateSparsePageDMatrix(size_t n_entries,
std::string prefix) {
size_t n_columns = 3;
size_t n_rows = n_entries / n_columns;
ArrayIterForTest iter(0, n_rows, n_columns, 2);
NumpyArrayIterForTest iter(0, n_rows, n_columns, 2);

std::unique_ptr<DMatrix> dmat{DMatrix::Create(
static_cast<DataIterHandle>(&iter), iter.Proxy(), Reset, Next,
Expand Down Expand Up @@ -563,18 +589,6 @@ ArrayIterForTest::ArrayIterForTest(float sparsity, size_t rows, size_t cols,

ArrayIterForTest::~ArrayIterForTest() { XGDMatrixFree(proxy_); }

int ArrayIterForTest::Next() {
if (iter_ == n_batches_) {
return 0;
}
XGProxyDMatrixSetDataDense(proxy_, batches_[iter_].c_str());
iter_++;
return 1;
}

size_t constexpr ArrayIterForTest::kRows;
size_t constexpr ArrayIterForTest::kCols;

void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
std::vector<size_t> *p_row_ptr,
std::vector<bst_feature_t> *p_cids) {
Expand Down
4 changes: 0 additions & 4 deletions tests/cpp/helpers.cu
Expand Up @@ -15,10 +15,6 @@ CudaArrayIterForTest::CudaArrayIterForTest(float sparsity, size_t rows,
this->Reset();
}

size_t constexpr CudaArrayIterForTest::kRows;
size_t constexpr CudaArrayIterForTest::kCols;
size_t constexpr CudaArrayIterForTest::kBatches;

int CudaArrayIterForTest::Next() {
if (iter_ == n_batches_) {
return 0;
Expand Down
37 changes: 19 additions & 18 deletions tests/cpp/helpers.h
Expand Up @@ -298,6 +298,7 @@ class RandomDataGenerator {
#if defined(XGBOOST_USE_CUDA)
std::shared_ptr<DMatrix> GenerateDeviceDMatrix();
#endif
std::shared_ptr<DMatrix> GenerateQuantileDMatrix();
};

inline std::vector<float>
Expand Down Expand Up @@ -401,38 +402,38 @@ class ArrayIterForTest {
size_t n_batches_;

public:
size_t static constexpr kRows { 1000 };
size_t static constexpr kBatches { 100 };
size_t static constexpr kCols { 13 };
size_t static constexpr Rows() { return 1024; }
size_t static constexpr Batches() { return 100; }
size_t static constexpr Cols() { return 13; }

std::string AsArray() const {
return interface_;
}
public:
std::string AsArray() const { return interface_; }

virtual int Next();
virtual void Reset() {
iter_ = 0;
}
virtual int Next() = 0;
virtual void Reset() { iter_ = 0; }
size_t Iter() const { return iter_; }
auto Proxy() -> decltype(proxy_) { return proxy_; }

explicit ArrayIterForTest(float sparsity, size_t rows = kRows,
size_t cols = kCols, size_t batches = kBatches);
explicit ArrayIterForTest(float sparsity, size_t rows, size_t cols, size_t batches);
virtual ~ArrayIterForTest();
};

class CudaArrayIterForTest : public ArrayIterForTest {
public:
size_t static constexpr kRows{1000};
size_t static constexpr kBatches{100};
size_t static constexpr kCols{13};

explicit CudaArrayIterForTest(float sparsity, size_t rows = kRows,
size_t cols = kCols, size_t batches = kBatches);
explicit CudaArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
size_t batches = Batches());
int Next() override;
~CudaArrayIterForTest() override = default;
};

class NumpyArrayIterForTest : public ArrayIterForTest {
public:
explicit NumpyArrayIterForTest(float sparsity, size_t rows = Rows(), size_t cols = Cols(),
size_t batches = Batches());
int Next() override;
~NumpyArrayIterForTest() override = default;
};

void DMatrixToCSR(DMatrix *dmat, std::vector<float> *p_data,
std::vector<size_t> *p_row_ptr,
std::vector<bst_feature_t> *p_cids);
Expand Down