forked from dmlc/xgboost
/
test_linear.py
82 lines (73 loc) · 3.3 KB
/
test_linear.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import testing as tm
from hypothesis import strategies, given, settings, note
import xgboost as xgb
parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),
'eta': strategies.floats(0.01, 0.25),
'tolerance': strategies.floats(1e-5, 1e-2),
'nthread': strategies.integers(1, 4),
})
coord_strategy = strategies.fixed_dictionaries({
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
'greedy', 'thrifty']),
'top_k': strategies.integers(1, 10),
})
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
return result
class TestLinear:
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy, coord_strategy)
@settings(deadline=None)
def test_coordinate(self, param, num_rounds, dataset, coord_param):
param['updater'] = 'coord_descent'
param.update(coord_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing(result, 5e-4)
# Loss is not guaranteed to always decrease because of regularisation parameters
# We test a weaker condition that the loss has not increased between the first and last
# iteration
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy, coord_strategy, strategies.floats(1e-5, 2.0),
strategies.floats(1e-5, 2.0))
@settings(deadline=None)
def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
param['updater'] = 'coord_descent'
param['alpha'] = alpha
param['lambda'] = lambd
param.update(coord_param)
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]])
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy)
@settings(deadline=None)
def test_shotgun(self, param, num_rounds, dataset):
param['updater'] = 'shotgun'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
# shotgun is non-deterministic, so we relax the test by only using first and last
# iteration.
if len(result) > 2:
sampled_result = (result[0], result[-1])
else:
sampled_result = result
assert tm.non_increasing(sampled_result)
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
strategies.floats(1e-5, 2.0))
@settings(deadline=None)
def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
param['updater'] = 'shotgun'
param['alpha'] = alpha
param['lambda'] = lambd
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
note(result)
assert tm.non_increasing([result[0], result[-1]])