/
params.py
88 lines (70 loc) · 2.66 KB
/
params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# type: ignore
"""Xgboost pyspark integration submodule for params."""
# pylint: disable=too-few-public-methods
from pyspark.ml.param import TypeConverters
from pyspark.ml.param.shared import Param, Params
class HasArbitraryParamsDict(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the **kwargs parts of the XGBoost
input.
"""
arbitrary_params_dict = Param(
Params._dummy(),
"arbitrary_params_dict",
"arbitrary_params_dict This parameter holds all of the additional parameters which are "
"not exposed as the the XGBoost Spark estimator params but can be recognized by "
"underlying XGBoost library. It is stored as a dictionary.",
)
class HasBaseMarginCol(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the base margin column part of XGboost.
"""
base_margin_col = Param(
Params._dummy(),
"base_margin_col",
"This stores the name for the column of the base margin",
)
class HasFeaturesCols(Params):
"""
Mixin for param featuresCols: a list of feature column names.
This parameter is taken effect only when use_gpu is enabled.
"""
features_cols = Param(
Params._dummy(),
"features_cols",
"feature column names.",
typeConverter=TypeConverters.toListString,
)
def __init__(self):
super().__init__()
self._setDefault(features_cols=[])
class HasEnableSparseDataOptim(Params):
"""
This is a Params based class that is extended by _SparkXGBParams
and holds the variable to store the boolean config of enabling sparse data optimization.
"""
enable_sparse_data_optim = Param(
Params._dummy(),
"enable_sparse_data_optim",
"This stores the boolean config of enabling sparse data optimization, if enabled, "
"Xgboost DMatrix object will be constructed from sparse matrix instead of "
"dense matrix. This config is disabled by default. If most of examples in your "
"training dataset contains sparse features, we suggest to enable this config.",
typeConverter=TypeConverters.toBoolean,
)
def __init__(self):
super().__init__()
self._setDefault(enable_sparse_data_optim=False)
class HasQueryIdCol(Params):
"""
Mixin for param featuresCols: a list of feature column names.
This parameter is taken effect only when use_gpu is enabled.
"""
qid_col = Param(
Params._dummy(),
"qid_col",
"query id column name",
typeConverter=TypeConverters.toString,
)