Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Move feature weight to skl parameters. #9506

Draft
wants to merge 3 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
104 changes: 36 additions & 68 deletions python-package/xgboost/sklearn.py
Expand Up @@ -290,6 +290,12 @@ def task(i: int) -> float:
Used for specifying feature types without constructing a dataframe. See
:py:class:`DMatrix` for details.

feature_weights : Optional[ArrayLike]

Weight for each feature, defines the probability of each feature being selected
when colsample is being used. All values must be greater than 0, otherwise a
`ValueError` is thrown.

max_cat_to_onehot : Optional[int]

.. versionadded:: 1.6.0
Expand Down Expand Up @@ -505,7 +511,7 @@ def _wrap_evaluation_matrices(
qid: Optional[Any],
sample_weight: Optional[Any],
base_margin: Optional[Any],
feature_weights: Optional[Any],
feature_weights: Optional[ArrayLike],
eval_set: Optional[Sequence[Tuple[Any, Any]]],
sample_weight_eval_set: Optional[Sequence[Any]],
base_margin_eval_set: Optional[Sequence[Any]],
Expand Down Expand Up @@ -649,6 +655,7 @@ def __init__(
validate_parameters: Optional[bool] = None,
enable_categorical: bool = False,
feature_types: Optional[FeatureTypes] = None,
feature_weights: Optional[ArrayLike] = None,
max_cat_to_onehot: Optional[int] = None,
max_cat_threshold: Optional[int] = None,
multi_strategy: Optional[str] = None,
Expand Down Expand Up @@ -695,6 +702,7 @@ def __init__(
self.validate_parameters = validate_parameters
self.enable_categorical = enable_categorical
self.feature_types = feature_types
self.feature_weights = feature_weights
self.max_cat_to_onehot = max_cat_to_onehot
self.max_cat_threshold = max_cat_threshold
self.multi_strategy = multi_strategy
Expand Down Expand Up @@ -872,16 +880,13 @@ def _load_model_attributes(self, config: dict) -> None:
def _configure_fit(
self,
booster: Optional[Union[Booster, "XGBModel", str]],
eval_metric: Optional[Union[Callable, str, Sequence[str]]],
params: Dict[str, Any],
early_stopping_rounds: Optional[int],
callbacks: Optional[Sequence[TrainingCallback]],
feature_weights: Optional[ArrayLike],
) -> Tuple[
Optional[Union[Booster, str, "XGBModel"]],
Optional[Metric],
Dict[str, Any],
Optional[int],
Optional[Sequence[TrainingCallback]],
Optional[ArrayLike],
]:
"""Configure parameters for :py:meth:`fit`."""
if isinstance(booster, XGBModel):
Expand All @@ -904,56 +909,35 @@ def _duplicated(parameter: str) -> None:
)

# Configure evaluation metric.
if eval_metric is not None:
_deprecated("eval_metric")
if self.eval_metric is not None and eval_metric is not None:
_duplicated("eval_metric")
# - track where does the evaluation metric come from
if self.eval_metric is not None:
from_fit = False
eval_metric = self.eval_metric
else:
from_fit = True
eval_metric = self.eval_metric
# - configure callable evaluation metric
metric: Optional[Metric] = None
if eval_metric is not None:
if callable(eval_metric) and from_fit:
# No need to wrap the evaluation function for old parameter.
metric = eval_metric
elif callable(eval_metric):
# Parameter from constructor or set_params
if callable(eval_metric):
if self._get_type() == "ranker":
metric = ltr_metric_decorator(eval_metric, self.n_jobs)
else:
metric = _metric_decorator(eval_metric)
else:
params.update({"eval_metric": eval_metric})

# Configure early_stopping_rounds
if early_stopping_rounds is not None:
_deprecated("early_stopping_rounds")
if early_stopping_rounds is not None and self.early_stopping_rounds is not None:
_duplicated("early_stopping_rounds")
early_stopping_rounds = (
self.early_stopping_rounds
if self.early_stopping_rounds is not None
else early_stopping_rounds
if feature_weights is not None:
_deprecated("feature_weights")
if feature_weights is not None and self.feature_types is not None:
_duplicated("feature_weights")
feature_weights = (
self.feature_weights
if self.feature_weights is not None
else feature_weights
)

# Configure callbacks
if callbacks is not None:
_deprecated("callbacks")
if callbacks is not None and self.callbacks is not None:
_duplicated("callbacks")
callbacks = self.callbacks if self.callbacks is not None else callbacks

tree_method = params.get("tree_method", None)
if self.enable_categorical and tree_method == "exact":
raise ValueError(
"Experimental support for categorical data is not implemented for"
" current tree method yet."
)
return model, metric, params, early_stopping_rounds, callbacks
return model, metric, params, feature_weights

def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix:
# Use `QuantileDMatrix` to save memory.
Expand Down Expand Up @@ -1045,16 +1029,23 @@ def fit(
A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like
object storing base margin for the i-th validation set.
feature_weights :
Weight for each feature, defines the probability of each feature being
selected when colsample is being used. All values must be greater than 0,
otherwise a `ValueError` is thrown.
.. deprecated:: 1.6.0

Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params`
instead.

callbacks :
.. deprecated:: 1.6.0
Use `callbacks` in :py:meth:`__init__` or :py:meth:`set_params` instead.

"""
with config_context(verbosity=self.verbosity):
params = self.get_xgb_params()

model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)

evals_result: TrainingCallback.EvalsLog = {}
train_dmatrix, evals = _wrap_evaluation_matrices(
missing=self.missing,
Expand All @@ -1074,23 +1065,13 @@ def fit(
enable_categorical=self.enable_categorical,
feature_types=self.feature_types,
)
params = self.get_xgb_params()

if callable(self.objective):
obj: Optional[Objective] = _objective_decorator(self.objective)
params["objective"] = "reg:squarederror"
else:
obj = None

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
)
self._Booster = train(
params,
train_dmatrix,
Expand Down Expand Up @@ -1492,14 +1473,8 @@ def fit(
params["objective"] = "multi:softprob"
params["num_class"] = self.n_classes_

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)
train_dmatrix, evals = _wrap_evaluation_matrices(
missing=self.missing,
Expand Down Expand Up @@ -2024,16 +1999,9 @@ def fit(
evals_result: TrainingCallback.EvalsLog = {}
params = self.get_xgb_params()

(
model,
metric,
params,
early_stopping_rounds,
callbacks,
) = self._configure_fit(
xgb_model, eval_metric, params, early_stopping_rounds, callbacks
model, metric, params, feature_weights = self._configure_fit(
xgb_model, params, feature_weights
)

self._Booster = train(
params,
train_dmatrix,
Expand Down
8 changes: 6 additions & 2 deletions python-package/xgboost/testing/shared.py
Expand Up @@ -61,9 +61,13 @@ def get_feature_weights(
"""Get feature weights using the demo parser."""
with tempfile.TemporaryDirectory() as tmpdir:
colsample_bynode = 0.5
reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode)
reg = model(
tree_method=tree_method,
colsample_bynode=colsample_bynode,
feature_weights=fw,
)

reg.fit(X, y, feature_weights=fw)
reg.fit(X, y)
model_path = os.path.join(tmpdir, "model.json")
reg.save_model(model_path)
with open(model_path, "r", encoding="utf-8") as fd:
Expand Down