Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[R] Remove enable_categorical as parameter #10018

Merged
merged 1 commit into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 9 additions & 37 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#' \item `matrix` objects, with types `numeric`, `integer`, or `logical`.
#' \item `data.frame` objects, with columns of types `numeric`, `integer`, `logical`, or `factor`.
#'
#' If passing `enable_categorical=TRUE`, columns with `factor` type will be treated as categorical.
#' Otherwise, if passing `enable_categorical=FALSE` and the data contains `factor` columns, an error
#' will be thrown.
#'
#' Note that xgboost uses base-0 encoding for categorical types, hence `factor` types (which use base-1
#' encoding') will be converted inside the function call. Be aware that the encoding used for `factor`
#' types is not kept as part of the model, so in subsequent calls to `predict`, it is the user's
Expand Down Expand Up @@ -59,7 +55,7 @@
#' must be the same as in the DMatrix construction, regardless of the column names.
#' @param feature_types Set types for features.
#'
#' If `data` is a `data.frame` and passing `enable_categorical=TRUE`, the types will be deduced
#' If `data` is a `data.frame` and passing `feature_types` is not supplied, feature types will be deduced
#' automatically from the column types.
#'
#' Otherwise, one can pass a character vector with the same length as number of columns in `data`,
Expand All @@ -79,18 +75,6 @@
#' @param label_lower_bound Lower bound for survival training.
#' @param label_upper_bound Upper bound for survival training.
#' @param feature_weights Set feature weights for column sampling.
#' @param enable_categorical Experimental support of specializing for categorical features.
#'
#' If passing 'TRUE' and 'data' is a data frame,
#' columns of categorical types will automatically
#' be set to be of categorical type (feature_type='c') in the resulting DMatrix.
#'
#' If passing 'FALSE' and 'data' is a data frame with categorical columns,
#' it will result in an error being thrown.
#'
#' If 'data' is not a data frame, this argument is ignored.
#'
#' JSON/UBJSON serialization format is required for this.
#' @return An 'xgb.DMatrix' object. If calling 'xgb.QuantileDMatrix', it will have additional
#' subclass 'xgb.QuantileDMatrix'.
#'
Expand Down Expand Up @@ -127,8 +111,7 @@ xgb.DMatrix <- function(
qid = NULL,
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE
feature_weights = NULL
) {
if (!is.null(group) && !is.null(qid)) {
stop("Either one of 'group' or 'qid' should be NULL")
Expand Down Expand Up @@ -180,7 +163,7 @@ xgb.DMatrix <- function(
nthread
)
} else if (is.data.frame(data)) {
tmp <- .process.df.for.dmatrix(data, enable_categorical, feature_types)
tmp <- .process.df.for.dmatrix(data, feature_types)
feature_types <- tmp$feature_types
handle <- .Call(
XGDMatrixCreateFromDF_R, tmp$lst, missing, nthread
Expand Down Expand Up @@ -212,7 +195,7 @@ xgb.DMatrix <- function(
return(dmat)
}

.process.df.for.dmatrix <- function(df, enable_categorical, feature_types) {
.process.df.for.dmatrix <- function(df, feature_types) {
if (!nrow(df) || !ncol(df)) {
stop("'data' is an empty data.frame.")
}
Expand All @@ -225,12 +208,6 @@ xgb.DMatrix <- function(
} else {
feature_types <- sapply(df, function(col) {
if (is.factor(col)) {
if (!enable_categorical) {
stop(
"When factor type is used, the parameter `enable_categorical`",
" must be set to TRUE."
)
}
return("c")
} else if (is.integer(col)) {
return("int")
Expand Down Expand Up @@ -326,7 +303,6 @@ xgb.QuantileDMatrix <- function(
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE,
ref = NULL,
max_bin = NULL
) {
Expand Down Expand Up @@ -357,8 +333,7 @@ xgb.QuantileDMatrix <- function(
qid = qid,
label_lower_bound = label_lower_bound,
label_upper_bound = label_upper_bound,
feature_weights = feature_weights,
enable_categorical = enable_categorical
feature_weights = feature_weights
)
)
data_iterator <- .single.data.iterator(iterator_env)
Expand Down Expand Up @@ -470,8 +445,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
qid = env[["qid"]],
label_lower_bound = env[["label_lower_bound"]],
label_upper_bound = env[["label_upper_bound"]],
feature_weights = env[["feature_weights"]],
enable_categorical = env[["enable_categorical"]]
feature_weights = env[["feature_weights"]]
)
)
}
Expand Down Expand Up @@ -540,8 +514,7 @@ xgb.ProxyDMatrix <- function(
qid = NULL,
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE
feature_weights = NULL
) {
stopifnot(inherits(data, c("matrix", "data.frame", "dgRMatrix")))
out <- list(
Expand All @@ -555,8 +528,7 @@ xgb.ProxyDMatrix <- function(
qid = qid,
label_lower_bound = label_lower_bound,
label_upper_bound = label_upper_bound,
feature_weights = feature_weights,
enable_categorical = enable_categorical
feature_weights = feature_weights
)
class(out) <- "xgb.ProxyDMatrix"
return(out)
Expand All @@ -575,7 +547,7 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
stop("Either one of 'group' or 'qid' should be NULL")
}
if (is.data.frame(lst$data)) {
tmp <- .process.df.for.dmatrix(lst$data, lst$enable_categorical, lst$feature_types)
tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types)
lst$feature_types <- tmp$feature_types
.Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)
rm(tmp)
Expand Down
24 changes: 2 additions & 22 deletions R-package/man/xgb.DMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 2 additions & 17 deletions R-package/man/xgb.ProxyDMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions R-package/tests/testthat/test_dmatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,19 +338,18 @@ test_that("xgb.DMatrix: data.frame", {
stringsAsFactors = TRUE
)

m <- xgb.DMatrix(df, enable_categorical = TRUE)
m <- xgb.DMatrix(df)
expect_equal(colnames(m), colnames(df))
expect_equal(
getinfo(m, "feature_type"), c("float", "float", "int", "i", "c", "c")
)
expect_error(xgb.DMatrix(df, enable_categorical = FALSE))

df <- data.frame(
missing = c("a", "b", "d", NA),
valid = c("a", "b", "d", "c"),
stringsAsFactors = TRUE
)
m <- xgb.DMatrix(df, enable_categorical = TRUE)
m <- xgb.DMatrix(df)
expect_equal(getinfo(m, "feature_type"), c("c", "c"))
})

Expand Down