forked from dmlc/xgboost
/
sparse_page_raw_format.cc
78 lines (68 loc) · 2.23 KB
/
sparse_page_raw_format.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
/*!
* Copyright (c) 2015-2021 by Contributors
* \file sparse_page_raw_format.cc
* Raw binary format of sparse page.
*/
#include <xgboost/data.h>
#include <dmlc/registry.h>
#include "xgboost/logging.h"
#include "./sparse_page_writer.h"
namespace xgboost {
namespace data {
DMLC_REGISTRY_FILE_TAG(sparse_page_raw_format);
template<typename T>
class SparsePageRawFormat : public SparsePageFormat<T> {
public:
bool Read(T* page, dmlc::SeekStream* fi) override {
auto& offset_vec = page->offset.HostVector();
if (!fi->Read(&offset_vec)) {
return false;
}
auto& data_vec = page->data.HostVector();
CHECK_NE(page->offset.Size(), 0U) << "Invalid SparsePage file";
data_vec.resize(offset_vec.back());
if (page->data.Size() != 0) {
size_t n_bytes = fi->Read(dmlc::BeginPtr(data_vec),
(page->data).Size() * sizeof(Entry));
CHECK_EQ(n_bytes, (page->data).Size() * sizeof(Entry))
<< "Invalid SparsePage file";
}
fi->Read(&page->base_rowid, sizeof(page->base_rowid));
return true;
}
size_t Write(const T& page, dmlc::Stream* fo) override {
const auto& offset_vec = page.offset.HostVector();
const auto& data_vec = page.data.HostVector();
CHECK(page.offset.Size() != 0 && offset_vec[0] == 0);
CHECK_EQ(offset_vec.back(), page.data.Size());
fo->Write(offset_vec);
auto bytes = page.MemCostBytes();
bytes += sizeof(uint64_t);
if (page.data.Size() != 0) {
fo->Write(dmlc::BeginPtr(data_vec), page.data.Size() * sizeof(Entry));
}
fo->Write(&page.base_rowid, sizeof(page.base_rowid));
bytes += sizeof(page.base_rowid);
return bytes;
}
private:
/*! \brief external memory column offset */
std::vector<size_t> disk_offset_;
};
XGBOOST_REGISTER_SPARSE_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<SparsePage>();
});
XGBOOST_REGISTER_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<CSCPage>();
});
XGBOOST_REGISTER_SORTED_CSC_PAGE_FORMAT(raw)
.describe("Raw binary data format.")
.set_body([]() {
return new SparsePageRawFormat<SortedCSCPage>();
});
} // namespace data
} // namespace xgboost