Skip to content

Commit

Permalink
Fix more tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
trivialfis committed Jul 1, 2021
1 parent 2617120 commit d9ccd05
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 43 deletions.
47 changes: 12 additions & 35 deletions tests/cpp/data/test_sparse_page_dmatrix.cc
Expand Up @@ -20,8 +20,6 @@ TEST(SparsePageDMatrix, MetaInfo) {

xgboost::DMatrix *dmat = xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", false, false);
std::cout << tmp_file << std::endl;
EXPECT_TRUE(FileExists(tmp_file + ".cache"));

// Test the metadata that was parsed
EXPECT_EQ(dmat->Info().num_row_, 8ul);
Expand Down Expand Up @@ -62,7 +60,7 @@ TEST(SparsePageDMatrix, ColAccess) {
if (iter == 1) {
ASSERT_EQ(col_page[0][0].fvalue, 0.f);
ASSERT_EQ(col_page[3][0].fvalue, 30.f);
ASSERT_EQ(col_page[3][0].index, 0);
ASSERT_EQ(col_page[3][0].index, 1);
ASSERT_EQ(col_page[3].size(), 1);
} else {
ASSERT_EQ(col_page[1][0].fvalue, 10.0f);
Expand All @@ -85,31 +83,7 @@ TEST(SparsePageDMatrix, ColAccess) {
}
iter++;
}

EXPECT_TRUE(FileExists(tmp_file + ".cache"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.col.page"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.sorted.col.page"));

delete dmat;

EXPECT_FALSE(FileExists(tmp_file + ".cache"));
EXPECT_FALSE(FileExists(tmp_file + ".cache.row.page"));
EXPECT_FALSE(FileExists(tmp_file + ".cache.col.page"));
EXPECT_FALSE(FileExists(tmp_file + ".cache.sorted.col.page"));
}

TEST(SparsePageDMatrix, ExistingCacheFile) {
dmlc::TemporaryDirectory tmpdir;
std::string filename = tmpdir.path + "/big.libsvm";
size_t constexpr kPageSize = 64, kEntriesPerCol = 3;
size_t constexpr kEntries = kPageSize * kEntriesPerCol * 2;
std::unique_ptr<xgboost::DMatrix> dmat =
xgboost::CreateSparsePageDMatrix(kEntries, kPageSize, filename);
EXPECT_ANY_THROW({
std::unique_ptr<xgboost::DMatrix> dmat2 =
xgboost::CreateSparsePageDMatrix(kEntries, kPageSize, filename);
});
}

TEST(SparsePageDMatrix, ThreadSafetyException) {
Expand Down Expand Up @@ -166,32 +140,35 @@ TEST(SparsePageDMatrix, ColAccessBatches) {
omp_set_num_threads(n_threads);
}

auto TestSparsePageDMatrixDeterminism(int32_t threads, std::string const& filename) {
auto TestSparsePageDMatrixDeterminism(int32_t threads) {
omp_set_num_threads(threads);
std::vector<float> sparse_data;
std::vector<size_t> sparse_rptr;
std::vector<bst_feature_t> sparse_cids;
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/simple.libsvm";
data::FileIterator iter(tmp_file, 0, 1, "auto",
std::string filename = tempdir.path + "/simple.libsvm";
CreateBigTestData(filename, 1 << 16);

data::FileIterator iter(filename, 0, 1, "auto",
std::numeric_limits<float>::quiet_NaN());
std::unique_ptr<DMatrix> sparse{new data::SparsePageDMatrix{
&iter, iter.Proxy(), data::fileiter::Reset, data::fileiter::Next,
std::numeric_limits<float>::quiet_NaN(), 1, ""}};
std::numeric_limits<float>::quiet_NaN(), 1, filename}};

DMatrixToCSR(sparse.get(), &sparse_data, &sparse_rptr, &sparse_cids);

std::string cache_name = tmp_file + ".row.page";
auto cache_name =
data::MakeId(filename,
dynamic_cast<data::SparsePageDMatrix *>(sparse.get())) +
".row.page";
std::string cache = common::LoadSequentialFile(cache_name);
return cache;
}

TEST(SparsePageDMatrix, Determinism) {
std::string filename = "test.libsvm";
CreateBigTestData(filename, 1 << 16);
std::vector<std::string> caches;
for (size_t i = 1; i < 18; i += 2) {
caches.emplace_back(TestSparsePageDMatrixDeterminism(i, filename));
caches.emplace_back(TestSparsePageDMatrixDeterminism(i));
}

for (size_t i = 1; i < caches.size(); ++i) {
Expand Down
37 changes: 30 additions & 7 deletions tests/cpp/data/test_sparse_page_dmatrix.cu
Expand Up @@ -4,6 +4,7 @@
#include "../helpers.h"
#include "../../../src/common/compressed_iterator.h"
#include "../../../src/data/ellpack_page.cuh"
#include "../../../src/data/sparse_page_dmatrix.h"

namespace xgboost {

Expand All @@ -14,13 +15,22 @@ TEST(SparsePageDMatrix, EllpackPage) {
DMatrix* dmat = DMatrix::Load(tmp_file + "#" + tmp_file + ".cache", true, false);

// Loop over the batches and assert the data is as expected
size_t n = 0;
for (const auto& batch : dmat->GetBatches<EllpackPage>({0, 256, 64})) {
EXPECT_EQ(batch.Size(), dmat->Info().num_row_);
n += batch.Size();
}

EXPECT_TRUE(FileExists(tmp_file + ".cache"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
EXPECT_TRUE(FileExists(tmp_file + ".cache.ellpack.page"));
EXPECT_EQ(n, dmat->Info().num_row_);

auto path =
data::MakeId(tmp_file + ".cache",
dynamic_cast<data::SparsePageDMatrix *>(dmat)) +
".row.page";
EXPECT_TRUE(FileExists(path));
path =
data::MakeId(tmp_file + ".cache",
dynamic_cast<data::SparsePageDMatrix *>(dmat)) +
".ellpack.page";
EXPECT_TRUE(FileExists(path));

delete dmat;
}
Expand All @@ -43,7 +53,10 @@ TEST(SparsePageDMatrix, MultipleEllpackPages) {
EXPECT_GE(batch_count, 2);
EXPECT_EQ(row_count, dmat->Info().num_row_);

EXPECT_TRUE(FileExists(filename + ".cache.ellpack.page"));
auto path =
data::MakeId(filename,
dynamic_cast<data::SparsePageDMatrix *>(dmat.get())) +
".ellpack.page";
}

TEST(SparsePageDMatrix, EllpackPageContent) {
Expand All @@ -67,7 +80,17 @@ TEST(SparsePageDMatrix, EllpackPageContent) {
EXPECT_EQ(impl->row_stride, 2);
EXPECT_EQ(impl->Cuts().TotalBins(), 4);

auto impl_ext = (*dmat_ext->GetBatches<EllpackPage>(param).begin()).Impl();
std::unique_ptr<EllpackPageImpl> impl_ext;
size_t offset = 0;
for (auto& batch : dmat_ext->GetBatches<EllpackPage>(param)) {
if (!impl_ext) {
impl_ext.reset(new EllpackPageImpl(
batch.Impl()->gidx_buffer.DeviceIdx(), batch.Impl()->Cuts(),
batch.Impl()->is_dense, batch.Impl()->row_stride, kRows));
}
auto n_elems = impl_ext->Copy(0, batch.Impl(), offset);
offset += n_elems;
}
EXPECT_EQ(impl_ext->base_rowid, 0);
EXPECT_EQ(impl_ext->n_rows, kRows);
EXPECT_FALSE(impl_ext->is_dense);
Expand Down
1 change: 0 additions & 1 deletion tests/cpp/test_learner.cc
Expand Up @@ -99,7 +99,6 @@ TEST(Learner, SLOW_CheckMultiBatch) { // NOLINT
CreateBigTestData(tmp_file, 50000);
std::shared_ptr<DMatrix> dmat(xgboost::DMatrix::Load(
tmp_file + "#" + tmp_file + ".cache", true, false, "auto", 100));
EXPECT_TRUE(FileExists(tmp_file + ".cache.row.page"));
EXPECT_FALSE(dmat->SingleColBlock());
size_t num_row = dmat->Info().num_row_;
std::vector<bst_float> labels(num_row);
Expand Down
4 changes: 4 additions & 0 deletions tests/cpp/tree/test_gpu_hist.cu
Expand Up @@ -291,9 +291,13 @@ void TestHistogramIndexImpl() {
// Extract the device maker from the histogram makers and from that its compressed
// histogram index
const auto &maker = hist_maker.maker;
auto grad = GenerateRandomGradients(kNRows);
grad.SetDevice(0);
maker->Reset(&grad, hist_maker_dmat.get(), kNCols);
std::vector<common::CompressedByteT> h_gidx_buffer(maker->page->gidx_buffer.HostVector());

const auto &maker_ext = hist_maker_ext.maker;
maker_ext->Reset(&grad, hist_maker_ext_dmat.get(), kNCols);
std::vector<common::CompressedByteT> h_gidx_buffer_ext(maker_ext->page->gidx_buffer.HostVector());

ASSERT_EQ(maker->page->Cuts().TotalBins(), maker_ext->page->Cuts().TotalBins());
Expand Down

0 comments on commit d9ccd05

Please sign in to comment.