forked from dmlc/xgboost
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_config.cc
171 lines (156 loc) · 6.51 KB
/
test_config.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*!
* Copyright 2019 by Contributors
*/
#include <gtest/gtest.h>
#include <fstream>
#include <string>
#include "../../../src/common/config.h"
#include "../filesystem.h" // dmlc::TemporaryDirectory
#include "../helpers.h"
namespace xgboost {
namespace common {
TEST(ConfigParser, NormalizeConfigEOL) {
// Test whether strings with NL are loaded correctly.
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/my.conf";
/* Old Mac OS uses \r for line ending */
{
std::string const input = "foo\rbar\rdog\r";
std::string const output = "foo\nbar\ndog\n";
{
std::ofstream fp(
tmp_file,
std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
fp << input;
}
{
ConfigParser parser(tmp_file);
auto content = parser.LoadConfigFile(tmp_file);
content = parser.NormalizeConfigEOL(content);
ASSERT_EQ(content, output);
}
}
/* Windows uses \r\n for line ending */
{
std::string const input = "foo\r\nbar\r\ndog\r\n";
std::string const output = "foo\n\nbar\n\ndog\n\n";
{
std::ofstream fp(tmp_file,
std::ios_base::out | std::ios_base::trunc | std::ios_base::binary);
fp << input;
}
{
ConfigParser parser(tmp_file);
auto content = parser.LoadConfigFile(tmp_file);
content = parser.NormalizeConfigEOL(content);
ASSERT_EQ(content, output);
}
}
}
TEST(ConfigParser, TrimWhitespace) {
ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar"), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace(" foo bar"), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar "), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace("foo bar\t"), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace(" foo bar "), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace("\t\t foo bar \t"), "foo bar");
ASSERT_EQ(ConfigParser::TrimWhitespace("\tabc\t"), "abc");
ASSERT_EQ(ConfigParser::TrimWhitespace("\r abc\t"), "abc");
}
TEST(ConfigParser, ParseKeyValuePair) {
// Create dummy configuration file
dmlc::TemporaryDirectory tempdir;
const std::string tmp_file = tempdir.path + "/my.conf";
{
std::ofstream fp(tmp_file);
fp << "";
}
ConfigParser parser(tmp_file);
std::string key, value;
// 1. Empty lines or comments
ASSERT_FALSE(parser.ParseKeyValuePair("# Mary had a little lamb",
&key, &value));
ASSERT_FALSE(parser.ParseKeyValuePair("#tree_method = gpu_hist",
&key, &value));
ASSERT_FALSE(parser.ParseKeyValuePair(
"# minimum sum of instance weight(hessian) needed in a child",
&key, &value));
ASSERT_FALSE(parser.ParseKeyValuePair("", &key, &value));
// 2. Key-value pairs
ASSERT_TRUE(parser.ParseKeyValuePair("booster = gbtree", &key, &value));
ASSERT_EQ(key, "booster");
ASSERT_EQ(value, "gbtree");
ASSERT_TRUE(parser.ParseKeyValuePair("gpu_id = 2", &key, &value));
ASSERT_EQ(key, "gpu_id");
ASSERT_EQ(value, "2");
ASSERT_TRUE(parser.ParseKeyValuePair("monotone_constraints = (1,0,-1)",
&key, &value));
ASSERT_EQ(key, "monotone_constraints");
ASSERT_EQ(value, "(1,0,-1)");
// whitespace should not matter
ASSERT_TRUE(parser.ParseKeyValuePair(" objective=binary:logistic",
&key, &value));
ASSERT_EQ(key, "objective");
ASSERT_EQ(value, "binary:logistic");
ASSERT_TRUE(parser.ParseKeyValuePair("tree_method\t=\thist ", &key, &value));
ASSERT_EQ(key, "tree_method");
ASSERT_EQ(value, "hist");
// 3. Use of forward and backward slashes in value
ASSERT_TRUE(parser.ParseKeyValuePair("test:data = test/data.libsvm",
&key, &value));
ASSERT_EQ(key, "test:data");
ASSERT_EQ(value, "test/data.libsvm");
ASSERT_TRUE(parser.ParseKeyValuePair("data = C:\\data.libsvm", &key, &value));
ASSERT_EQ(key, "data");
ASSERT_EQ(value, "C:\\data.libsvm");
// 4. One-line comment
ASSERT_TRUE(parser.ParseKeyValuePair("learning_rate = 0.3 # small step",
&key, &value));
ASSERT_EQ(key, "learning_rate");
ASSERT_EQ(value, "0.3");
// Note: '#' in path won't be accepted correctly unless the whole path is
// wrapped with quotes. This is important for external memory.
ASSERT_TRUE(parser.ParseKeyValuePair("data = dmatrix.libsvm#dtrain.cache",
&key, &value));
ASSERT_EQ(key, "data");
ASSERT_EQ(value, "dmatrix.libsvm"); // cache was silently ignored
// 5. Wrapping key/value with quotes
// Any key or value containing '#' needs to be wrapped with quotes
ASSERT_TRUE(parser.ParseKeyValuePair("data = \"dmatrix.libsvm#dtrain.cache\"",
&key, &value));
ASSERT_EQ(key, "data");
ASSERT_EQ(value, "dmatrix.libsvm#dtrain.cache"); // cache is now kept
ASSERT_TRUE(parser.ParseKeyValuePair(
"data = \"C:\\Administrator\\train_file.txt#trainbincache\"",
&key, &value));
ASSERT_EQ(key, "data");
ASSERT_EQ(value, "C:\\Administrator\\train_file.txt#trainbincache");
ASSERT_TRUE(parser.ParseKeyValuePair("\'month#day\' = \"November#2019\"",
&key, &value));
ASSERT_EQ(key, "month#day");
ASSERT_EQ(value, "November#2019");
// Likewise, key or value containing a space needs to be quoted
ASSERT_TRUE(parser.ParseKeyValuePair("\"my data\" = \' so precious! \'",
&key, &value));
ASSERT_EQ(key, "my data");
ASSERT_EQ(value, " so precious! ");
ASSERT_TRUE(parser.ParseKeyValuePair("interaction_constraints = "
"\"[[0, 2], [1, 3, 4], [5, 6]]\"",
&key, &value));
ASSERT_EQ(key, "interaction_constraints");
ASSERT_EQ(value, "[[0, 2], [1, 3, 4], [5, 6]]");
// 6. Unicode
ASSERT_TRUE(parser.ParseKeyValuePair("클래스상속 = 类继承", &key, &value));
ASSERT_EQ(key, "클래스상속");
ASSERT_EQ(value, "类继承");
// 7. Ill-formed data should throw exception
for (const char* str : {"data = C:\\My Documents\\cat.csv", "cow=",
"C# = 100%", "= woof ",
"interaction_constraints = [[0, 2], [1]]",
"data = \"train.txt#cache",
"data = \'train.txt#cache", "foo = \'bar\""}) {
ASSERT_THROW(parser.ParseKeyValuePair(str, &key, &value), dmlc::Error);
}
}
} // namespace common
} // namespace xgboost