diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 8c3a96784af6..2e0f7f9cfa07 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -290,6 +290,12 @@ def task(i: int) -> float: Used for specifying feature types without constructing a dataframe. See :py:class:`DMatrix` for details. + feature_weights : Optional[ArrayLike] + + Weight for each feature, defines the probability of each feature being selected + when colsample is being used. All values must be greater than 0, otherwise a + `ValueError` is thrown. + max_cat_to_onehot : Optional[int] .. versionadded:: 1.6.0 @@ -505,7 +511,7 @@ def _wrap_evaluation_matrices( qid: Optional[Any], sample_weight: Optional[Any], base_margin: Optional[Any], - feature_weights: Optional[Any], + feature_weights: Optional[ArrayLike], eval_set: Optional[Sequence[Tuple[Any, Any]]], sample_weight_eval_set: Optional[Sequence[Any]], base_margin_eval_set: Optional[Sequence[Any]], @@ -649,6 +655,7 @@ def __init__( validate_parameters: Optional[bool] = None, enable_categorical: bool = False, feature_types: Optional[FeatureTypes] = None, + feature_weights: Optional[ArrayLike] = None, max_cat_to_onehot: Optional[int] = None, max_cat_threshold: Optional[int] = None, multi_strategy: Optional[str] = None, @@ -695,6 +702,7 @@ def __init__( self.validate_parameters = validate_parameters self.enable_categorical = enable_categorical self.feature_types = feature_types + self.feature_weights = feature_weights self.max_cat_to_onehot = max_cat_to_onehot self.max_cat_threshold = max_cat_threshold self.multi_strategy = multi_strategy @@ -872,16 +880,13 @@ def _load_model_attributes(self, config: dict) -> None: def _configure_fit( self, booster: Optional[Union[Booster, "XGBModel", str]], - eval_metric: Optional[Union[Callable, str, Sequence[str]]], params: Dict[str, Any], - early_stopping_rounds: Optional[int], - callbacks: Optional[Sequence[TrainingCallback]], + feature_weights: Optional[ArrayLike], ) -> Tuple[ Optional[Union[Booster, str, "XGBModel"]], Optional[Metric], Dict[str, Any], - Optional[int], - Optional[Sequence[TrainingCallback]], + Optional[ArrayLike], ]: """Configure parameters for :py:meth:`fit`.""" if isinstance(booster, XGBModel): @@ -904,24 +909,11 @@ def _duplicated(parameter: str) -> None: ) # Configure evaluation metric. - if eval_metric is not None: - _deprecated("eval_metric") - if self.eval_metric is not None and eval_metric is not None: - _duplicated("eval_metric") - # - track where does the evaluation metric come from - if self.eval_metric is not None: - from_fit = False - eval_metric = self.eval_metric - else: - from_fit = True + eval_metric = self.eval_metric # - configure callable evaluation metric metric: Optional[Metric] = None if eval_metric is not None: - if callable(eval_metric) and from_fit: - # No need to wrap the evaluation function for old parameter. - metric = eval_metric - elif callable(eval_metric): - # Parameter from constructor or set_params + if callable(eval_metric): if self._get_type() == "ranker": metric = ltr_metric_decorator(eval_metric, self.n_jobs) else: @@ -929,31 +921,23 @@ def _duplicated(parameter: str) -> None: else: params.update({"eval_metric": eval_metric}) - # Configure early_stopping_rounds - if early_stopping_rounds is not None: - _deprecated("early_stopping_rounds") - if early_stopping_rounds is not None and self.early_stopping_rounds is not None: - _duplicated("early_stopping_rounds") - early_stopping_rounds = ( - self.early_stopping_rounds - if self.early_stopping_rounds is not None - else early_stopping_rounds + if feature_weights is not None: + _deprecated("feature_weights") + if feature_weights is not None and self.feature_types is not None: + _duplicated("feature_weights") + feature_weights = ( + self.feature_weights + if self.feature_weights is not None + else feature_weights ) - # Configure callbacks - if callbacks is not None: - _deprecated("callbacks") - if callbacks is not None and self.callbacks is not None: - _duplicated("callbacks") - callbacks = self.callbacks if self.callbacks is not None else callbacks - tree_method = params.get("tree_method", None) if self.enable_categorical and tree_method == "exact": raise ValueError( "Experimental support for categorical data is not implemented for" " current tree method yet." ) - return model, metric, params, early_stopping_rounds, callbacks + return model, metric, params, feature_weights def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix: # Use `QuantileDMatrix` to save memory. @@ -1045,9 +1029,10 @@ def fit( A list of the form [M_1, M_2, ..., M_n], where each M_i is an array like object storing base margin for the i-th validation set. feature_weights : - Weight for each feature, defines the probability of each feature being - selected when colsample is being used. All values must be greater than 0, - otherwise a `ValueError` is thrown. + .. deprecated:: 1.6.0 + + Use `early_stopping_rounds` in :py:meth:`__init__` or :py:meth:`set_params` + instead. callbacks : .. deprecated:: 1.6.0 @@ -1055,6 +1040,12 @@ def fit( """ with config_context(verbosity=self.verbosity): + params = self.get_xgb_params() + + model, metric, params, feature_weights = self._configure_fit( + xgb_model, params, feature_weights + ) + evals_result: TrainingCallback.EvalsLog = {} train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, @@ -1074,7 +1065,6 @@ def fit( enable_categorical=self.enable_categorical, feature_types=self.feature_types, ) - params = self.get_xgb_params() if callable(self.objective): obj: Optional[Objective] = _objective_decorator(self.objective) @@ -1082,15 +1072,6 @@ def fit( else: obj = None - ( - model, - metric, - params, - early_stopping_rounds, - callbacks, - ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks - ) self._Booster = train( params, train_dmatrix, @@ -1492,14 +1473,8 @@ def fit( params["objective"] = "multi:softprob" params["num_class"] = self.n_classes_ - ( - model, - metric, - params, - early_stopping_rounds, - callbacks, - ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks + model, metric, params, feature_weights = self._configure_fit( + xgb_model, params, feature_weights ) train_dmatrix, evals = _wrap_evaluation_matrices( missing=self.missing, @@ -2024,16 +1999,9 @@ def fit( evals_result: TrainingCallback.EvalsLog = {} params = self.get_xgb_params() - ( - model, - metric, - params, - early_stopping_rounds, - callbacks, - ) = self._configure_fit( - xgb_model, eval_metric, params, early_stopping_rounds, callbacks + model, metric, params, feature_weights = self._configure_fit( + xgb_model, params, feature_weights ) - self._Booster = train( params, train_dmatrix, diff --git a/python-package/xgboost/testing/shared.py b/python-package/xgboost/testing/shared.py index 930873163dbc..5a03565f52cc 100644 --- a/python-package/xgboost/testing/shared.py +++ b/python-package/xgboost/testing/shared.py @@ -61,9 +61,13 @@ def get_feature_weights( """Get feature weights using the demo parser.""" with tempfile.TemporaryDirectory() as tmpdir: colsample_bynode = 0.5 - reg = model(tree_method=tree_method, colsample_bynode=colsample_bynode) + reg = model( + tree_method=tree_method, + colsample_bynode=colsample_bynode, + feature_weights=fw, + ) - reg.fit(X, y, feature_weights=fw) + reg.fit(X, y) model_path = os.path.join(tmpdir, "model.json") reg.save_model(model_path) with open(model_path, "r", encoding="utf-8") as fd: