New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make QuantileDMatrix
default to sklearn esitmators.
#8220
Changes from all commits
d6781b3
7654b88
cc90c2b
23faf65
7004573
dd44ac9
bc81831
571da23
9d6e47d
4e388eb
376bdd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# .readthedocs.yaml | ||
# Read the Docs configuration file | ||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details | ||
|
||
# Required | ||
version: 2 | ||
|
||
# Set the version of Python and other tools you might need | ||
build: | ||
os: ubuntu-22.04 | ||
tools: | ||
python: "3.8" | ||
apt_packages: | ||
- graphviz | ||
|
||
# Build documentation in the docs/ directory with Sphinx | ||
sphinx: | ||
configuration: doc/conf.py | ||
|
||
# If using Sphinx, optionally build your docs in additional formats such as PDF | ||
formats: | ||
|
||
# Optionally declare the Python requirements required to build your docs | ||
python: | ||
install: | ||
- requirements: doc/requirements.txt | ||
system_packages: true |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
Booster, | ||
DMatrix, | ||
Metric, | ||
QuantileDMatrix, | ||
XGBoostError, | ||
_convert_ntree_limit, | ||
_deprecate_positional_args, | ||
|
@@ -430,7 +431,8 @@ def _wrap_evaluation_matrices( | |
enable_categorical: bool, | ||
feature_types: Optional[FeatureTypes], | ||
) -> Tuple[Any, List[Tuple[Any, str]]]: | ||
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the way.""" | ||
"""Convert array_like evaluation matrices into DMatrix. Perform validation on the | ||
way.""" | ||
train_dmatrix = create_dmatrix( | ||
data=X, | ||
label=y, | ||
|
@@ -442,6 +444,7 @@ def _wrap_evaluation_matrices( | |
missing=missing, | ||
enable_categorical=enable_categorical, | ||
feature_types=feature_types, | ||
ref=None, | ||
) | ||
|
||
n_validation = 0 if eval_set is None else len(eval_set) | ||
|
@@ -491,6 +494,7 @@ def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence: | |
missing=missing, | ||
enable_categorical=enable_categorical, | ||
feature_types=feature_types, | ||
ref=train_dmatrix, | ||
) | ||
evals.append(m) | ||
nevals = len(evals) | ||
|
@@ -904,6 +908,17 @@ def _duplicated(parameter: str) -> None: | |
|
||
return model, metric, params, early_stopping_rounds, callbacks | ||
|
||
def _create_dmatrix(self, ref: Optional[DMatrix], **kwargs: Any) -> DMatrix: | ||
# Use `QuantileDMatrix` to save memory. | ||
if self.tree_method in ("hist", "gpu_hist"): | ||
try: | ||
return QuantileDMatrix( | ||
**kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin | ||
) | ||
except TypeError: # `QuantileDMatrix` supports lesser types than DMatrix | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which cases are not supported out of curiosity? Just wondering if this is going to run into the exception regularly in some cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. datatable, CSC, arrow. Also, the DMatrix has a dispatcher to convert unknown types to scipy csr. |
||
pass | ||
return DMatrix(**kwargs, nthread=self.n_jobs) | ||
|
||
def _set_evaluation_result(self, evals_result: TrainingCallback.EvalsLog) -> None: | ||
if evals_result: | ||
self.evals_result_ = cast(Dict[str, Dict[str, List[float]]], evals_result) | ||
|
@@ -996,7 +1011,7 @@ def fit( | |
base_margin_eval_set=base_margin_eval_set, | ||
eval_group=None, | ||
eval_qid=None, | ||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs), | ||
create_dmatrix=self._create_dmatrix, | ||
enable_categorical=self.enable_categorical, | ||
feature_types=self.feature_types, | ||
) | ||
|
@@ -1479,7 +1494,7 @@ def fit( | |
base_margin_eval_set=base_margin_eval_set, | ||
eval_group=None, | ||
eval_qid=None, | ||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs), | ||
create_dmatrix=self._create_dmatrix, | ||
enable_categorical=self.enable_categorical, | ||
feature_types=self.feature_types, | ||
) | ||
|
@@ -1930,7 +1945,7 @@ def fit( | |
base_margin_eval_set=base_margin_eval_set, | ||
eval_group=eval_group, | ||
eval_qid=eval_qid, | ||
create_dmatrix=lambda **kwargs: DMatrix(nthread=self.n_jobs, **kwargs), | ||
create_dmatrix=self._create_dmatrix, | ||
enable_categorical=self.enable_categorical, | ||
feature_types=self.feature_types, | ||
) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It appears that
tree_method
andmax_bin
will no longer be in thekwargs
dictionary. Can you make sure this will not cause undesirable behavior? For example, iskwargs
passed toxgb.train
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Training parameters are obtained via
params = self.get_xgb_params()
, which you can find in theasync def _fit_async
function.