Skip to content

Commit

Permalink
[R] Remove enable_categorical parameter (#10018)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-cortes committed Jan 30, 2024
1 parent 3abbbe4 commit df7cf74
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 79 deletions.
46 changes: 9 additions & 37 deletions R-package/R/xgb.DMatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
#' \item `matrix` objects, with types `numeric`, `integer`, or `logical`.
#' \item `data.frame` objects, with columns of types `numeric`, `integer`, `logical`, or `factor`.
#'
#' If passing `enable_categorical=TRUE`, columns with `factor` type will be treated as categorical.
#' Otherwise, if passing `enable_categorical=FALSE` and the data contains `factor` columns, an error
#' will be thrown.
#'
#' Note that xgboost uses base-0 encoding for categorical types, hence `factor` types (which use base-1
#' encoding') will be converted inside the function call. Be aware that the encoding used for `factor`
#' types is not kept as part of the model, so in subsequent calls to `predict`, it is the user's
Expand Down Expand Up @@ -59,7 +55,7 @@
#' must be the same as in the DMatrix construction, regardless of the column names.
#' @param feature_types Set types for features.
#'
#' If `data` is a `data.frame` and passing `enable_categorical=TRUE`, the types will be deduced
#' If `data` is a `data.frame` and passing `feature_types` is not supplied, feature types will be deduced
#' automatically from the column types.
#'
#' Otherwise, one can pass a character vector with the same length as number of columns in `data`,
Expand All @@ -79,18 +75,6 @@
#' @param label_lower_bound Lower bound for survival training.
#' @param label_upper_bound Upper bound for survival training.
#' @param feature_weights Set feature weights for column sampling.
#' @param enable_categorical Experimental support of specializing for categorical features.
#'
#' If passing 'TRUE' and 'data' is a data frame,
#' columns of categorical types will automatically
#' be set to be of categorical type (feature_type='c') in the resulting DMatrix.
#'
#' If passing 'FALSE' and 'data' is a data frame with categorical columns,
#' it will result in an error being thrown.
#'
#' If 'data' is not a data frame, this argument is ignored.
#'
#' JSON/UBJSON serialization format is required for this.
#' @return An 'xgb.DMatrix' object. If calling 'xgb.QuantileDMatrix', it will have additional
#' subclass 'xgb.QuantileDMatrix'.
#'
Expand Down Expand Up @@ -127,8 +111,7 @@ xgb.DMatrix <- function(
qid = NULL,
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE
feature_weights = NULL
) {
if (!is.null(group) && !is.null(qid)) {
stop("Either one of 'group' or 'qid' should be NULL")
Expand Down Expand Up @@ -180,7 +163,7 @@ xgb.DMatrix <- function(
nthread
)
} else if (is.data.frame(data)) {
tmp <- .process.df.for.dmatrix(data, enable_categorical, feature_types)
tmp <- .process.df.for.dmatrix(data, feature_types)
feature_types <- tmp$feature_types
handle <- .Call(
XGDMatrixCreateFromDF_R, tmp$lst, missing, nthread
Expand Down Expand Up @@ -212,7 +195,7 @@ xgb.DMatrix <- function(
return(dmat)
}

.process.df.for.dmatrix <- function(df, enable_categorical, feature_types) {
.process.df.for.dmatrix <- function(df, feature_types) {
if (!nrow(df) || !ncol(df)) {
stop("'data' is an empty data.frame.")
}
Expand All @@ -225,12 +208,6 @@ xgb.DMatrix <- function(
} else {
feature_types <- sapply(df, function(col) {
if (is.factor(col)) {
if (!enable_categorical) {
stop(
"When factor type is used, the parameter `enable_categorical`",
" must be set to TRUE."
)
}
return("c")
} else if (is.integer(col)) {
return("int")
Expand Down Expand Up @@ -326,7 +303,6 @@ xgb.QuantileDMatrix <- function(
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE,
ref = NULL,
max_bin = NULL
) {
Expand Down Expand Up @@ -357,8 +333,7 @@ xgb.QuantileDMatrix <- function(
qid = qid,
label_lower_bound = label_lower_bound,
label_upper_bound = label_upper_bound,
feature_weights = feature_weights,
enable_categorical = enable_categorical
feature_weights = feature_weights
)
)
data_iterator <- .single.data.iterator(iterator_env)
Expand Down Expand Up @@ -470,8 +445,7 @@ xgb.DataIter <- function(env = new.env(), f_next, f_reset) {
qid = env[["qid"]],
label_lower_bound = env[["label_lower_bound"]],
label_upper_bound = env[["label_upper_bound"]],
feature_weights = env[["feature_weights"]],
enable_categorical = env[["enable_categorical"]]
feature_weights = env[["feature_weights"]]
)
)
}
Expand Down Expand Up @@ -540,8 +514,7 @@ xgb.ProxyDMatrix <- function(
qid = NULL,
label_lower_bound = NULL,
label_upper_bound = NULL,
feature_weights = NULL,
enable_categorical = FALSE
feature_weights = NULL
) {
stopifnot(inherits(data, c("matrix", "data.frame", "dgRMatrix")))
out <- list(
Expand All @@ -555,8 +528,7 @@ xgb.ProxyDMatrix <- function(
qid = qid,
label_lower_bound = label_lower_bound,
label_upper_bound = label_upper_bound,
feature_weights = feature_weights,
enable_categorical = enable_categorical
feature_weights = feature_weights
)
class(out) <- "xgb.ProxyDMatrix"
return(out)
Expand All @@ -575,7 +547,7 @@ xgb.ProxyDMatrix.internal <- function(proxy_handle, data_iterator) {
stop("Either one of 'group' or 'qid' should be NULL")
}
if (is.data.frame(lst$data)) {
tmp <- .process.df.for.dmatrix(lst$data, lst$enable_categorical, lst$feature_types)
tmp <- .process.df.for.dmatrix(lst$data, lst$feature_types)
lst$feature_types <- tmp$feature_types
.Call(XGProxyDMatrixSetDataColumnar_R, proxy_handle, tmp$lst)
rm(tmp)
Expand Down
24 changes: 2 additions & 22 deletions R-package/man/xgb.DMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 2 additions & 17 deletions R-package/man/xgb.ProxyDMatrix.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions R-package/tests/testthat/test_dmatrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,19 +338,18 @@ test_that("xgb.DMatrix: data.frame", {
stringsAsFactors = TRUE
)

m <- xgb.DMatrix(df, enable_categorical = TRUE)
m <- xgb.DMatrix(df)
expect_equal(colnames(m), colnames(df))
expect_equal(
getinfo(m, "feature_type"), c("float", "float", "int", "i", "c", "c")
)
expect_error(xgb.DMatrix(df, enable_categorical = FALSE))

df <- data.frame(
missing = c("a", "b", "d", NA),
valid = c("a", "b", "d", "c"),
stringsAsFactors = TRUE
)
m <- xgb.DMatrix(df, enable_categorical = TRUE)
m <- xgb.DMatrix(df)
expect_equal(getinfo(m, "feature_type"), c("c", "c"))
})

Expand Down

0 comments on commit df7cf74

Please sign in to comment.