Skip to content

Commit

Permalink
Merge pull request #5 from dmlc/master
Browse files Browse the repository at this point in the history
Update to latest version
  • Loading branch information
JohanManders committed Nov 3, 2015
2 parents 7c79c9a + e436c94 commit 96f221e
Show file tree
Hide file tree
Showing 95 changed files with 2,074 additions and 962 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Expand Up @@ -41,6 +41,9 @@ on going at master
- Fixed possible problem of poisson regression for R.
* Python module now throw exception instead of crash terminal when a parameter error happens.
* Python module now has importance plot and tree plot functions.
* Python module now accepts different learning rates for each boosting round.
* Python now allows model training continuation from previously saved model.
* Additional parameters added for sklearn wrapper
* Java api is ready for use
* Added more test cases and continuous integration to make each build more robust
* Improvements in sklearn compatible module
Expand Down
7 changes: 6 additions & 1 deletion CONTRIBUTORS.md
Expand Up @@ -13,6 +13,8 @@ Committers are people who have made substantial contribution to the project and
- Bing is the original creator of xgboost python package and currently the maintainer of [XGBoost.jl](https://github.com/antinucleon/XGBoost.jl).
* [Michael Benesty](https://github.com/pommedeterresautee)
- Micheal is a lawyer, data scientist in France, he is the creator of xgboost interactive analysis module in R.
* [Yuan Tang](https://github.com/terrytangyuan)
- Yuan is a data scientist in Chicago, US. He contributed mostly in R and Python packages.

Become a Comitter
-----------------
Expand All @@ -34,7 +36,6 @@ List of Contributors
* [Zygmunt Zając](https://github.com/zygmuntz)
- Zygmunt is the master behind the early stopping feature frequently used by kagglers.
* [Ajinkya Kale](https://github.com/ajkl)
* [Yuan Tang](https://github.com/terrytangyuan)
* [Boliang Chen](https://github.com/cblsjtu)
* [Vadim Khotilovich](https://github.com/khotilov)
* [Yangqing Men](https://github.com/yanqingmen)
Expand All @@ -49,5 +50,9 @@ List of Contributors
- Masaaki is the initial creator of xgboost python plotting module.
* [Hongliang Liu](https://github.com/phunterlau)
- Hongliang is the maintainer of xgboost python PyPI package for pip installation.
* [daiyl0320](https://github.com/daiyl0320)
- daiyl0320 contributed patch to xgboost distributed version more robust, and scales stably on TB scale datasets.
* [Huayi Zhang](https://github.com/irachex)
* [Johan Manders](https://github.com/johanmanders)
* [yoori](https://github.com/yoori)
* [Mathias Müller](https://github.com/far0n)
1 change: 0 additions & 1 deletion Makefile
Expand Up @@ -189,7 +189,6 @@ pythonpack:
cp -r multi-node xgboost-deploy/xgboost
cp -r windows xgboost-deploy/xgboost
cp -r src xgboost-deploy/xgboost

#make python

pythonbuild:
Expand Down
5 changes: 2 additions & 3 deletions R-package/R/getinfo.xgb.DMatrix.R
Expand Up @@ -35,15 +35,15 @@ getinfo <- function(object, ...){
#' @param ... other parameters
#' @rdname getinfo
#' @method getinfo xgb.DMatrix
setMethod("getinfo", signature = "xgb.DMatrix",
setMethod("getinfo", signature = "xgb.DMatrix",
definition = function(object, name) {
if (typeof(name) != "character") {
stop("xgb.getinfo: name must be character")
}
if (class(object) != "xgb.DMatrix") {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name != "label" && name != "weight" &&
if (name != "label" && name != "weight" &&
name != "base_margin" && name != "nrow") {
stop(paste("xgb.getinfo: unknown info name", name))
}
Expand All @@ -54,4 +54,3 @@ setMethod("getinfo", signature = "xgb.DMatrix",
}
return(ret)
})

9 changes: 4 additions & 5 deletions R-package/R/predict.xgb.Booster.R
Expand Up @@ -30,8 +30,8 @@ setClass("xgb.Booster",
#' pred <- predict(bst, test$data)
#' @export
#'
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA,
setMethod("predict", signature = "xgb.Booster",
definition = function(object, newdata, missing = NA,
outputmargin = FALSE, ntreelimit = NULL, predleaf = FALSE) {
if (class(object) != "xgb.Booster"){
stop("predict: model in prediction must be of class xgb.Booster")
Expand All @@ -48,14 +48,14 @@ setMethod("predict", signature = "xgb.Booster",
stop("predict: ntreelimit must be equal to or greater than 1")
}
}
option = 0
option <- 0
if (outputmargin) {
option <- option + 1
}
if (predleaf) {
option <- option + 2
}
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
ret <- .Call("XGBoosterPredict_R", object$handle, newdata, as.integer(option),
as.integer(ntreelimit), PACKAGE = "xgboost")
if (predleaf){
len <- getinfo(newdata, "nrow")
Expand All @@ -68,4 +68,3 @@ setMethod("predict", signature = "xgb.Booster",
}
return(ret)
})

9 changes: 4 additions & 5 deletions R-package/R/predict.xgb.Booster.handle.R
Expand Up @@ -5,15 +5,14 @@
#' @param object Object of class "xgb.Boost.handle"
#' @param ... Parameters pass to \code{predict.xgb.Booster}
#'
setMethod("predict", signature = "xgb.Booster.handle",
setMethod("predict", signature = "xgb.Booster.handle",
definition = function(object, ...) {
if (class(object) != "xgb.Booster.handle"){
stop("predict: model in prediction must be of class xgb.Booster.handle")
}

bst <- xgb.handleToBooster(object)
ret = predict(bst, ...)

ret <- predict(bst, ...)
return(ret)
})

2 changes: 1 addition & 1 deletion R-package/R/setinfo.xgb.DMatrix.R
Expand Up @@ -32,7 +32,7 @@ setinfo <- function(object, ...){
#' @param ... other parameters
#' @rdname setinfo
#' @method setinfo xgb.DMatrix
setMethod("setinfo", signature = "xgb.DMatrix",
setMethod("setinfo", signature = "xgb.DMatrix",
definition = function(object, name, info) {
xgb.setinfo(object, name, info)
})
10 changes: 5 additions & 5 deletions R-package/R/slice.xgb.DMatrix.R
Expand Up @@ -23,19 +23,19 @@ slice <- function(object, ...){
#' @param ... other parameters
#' @rdname slice
#' @method slice xgb.DMatrix
setMethod("slice", signature = "xgb.DMatrix",
setMethod("slice", signature = "xgb.DMatrix",
definition = function(object, idxset, ...) {
if (class(object) != "xgb.DMatrix") {
stop("slice: first argument dtrain must be xgb.DMatrix")
}
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
ret <- .Call("XGDMatrixSliceDMatrix_R", object, idxset,
PACKAGE = "xgboost")

attr_list <- attributes(object)
nr <- xgb.numrow(object)
len <- sapply(attr_list,length)
ind <- which(len==nr)
if (length(ind)>0) {
ind <- which(len == nr)
if (length(ind) > 0) {
nms <- names(attr_list)[ind]
for (i in 1:length(ind)) {
attr(ret,nms[i]) <- attr(object,nms[i])[idxset]
Expand Down
53 changes: 26 additions & 27 deletions R-package/R/utils.R
@@ -1,4 +1,4 @@
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @importClassesFrom Matrix dgCMatrix dgeMatrix
#' @import methods

# depends on matrix
Expand All @@ -15,30 +15,30 @@ xgb.setinfo <- function(dmat, name, info) {
stop("xgb.setinfo: first argument dtrain must be xgb.DMatrix")
}
if (name == "label") {
if (length(info)!=xgb.numrow(dmat))
if (length(info) != xgb.numrow(dmat))
stop("The length of labels must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "weight") {
if (length(info)!=xgb.numrow(dmat))
if (length(info) != xgb.numrow(dmat))
stop("The length of weights must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "base_margin") {
# if (length(info)!=xgb.numrow(dmat))
# stop("The length of base margin must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
.Call("XGDMatrixSetInfo_R", dmat, name, as.numeric(info),
PACKAGE = "xgboost")
return(TRUE)
}
if (name == "group") {
if (sum(info)!=xgb.numrow(dmat))
if (sum(info) != xgb.numrow(dmat))
stop("The sum of groups must equal to the number of rows in the input data")
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
.Call("XGDMatrixSetInfo_R", dmat, name, as.integer(info),
PACKAGE = "xgboost")
return(TRUE)
}
Expand Down Expand Up @@ -68,7 +68,7 @@ xgb.Booster <- function(params = list(), cachelist = list(), modelfile = NULL) {
if (typeof(modelfile) == "character") {
.Call("XGBoosterLoadModel_R", handle, modelfile, PACKAGE = "xgboost")
} else if (typeof(modelfile) == "raw") {
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
.Call("XGBoosterLoadModelFromRaw_R", handle, modelfile, PACKAGE = "xgboost")
} else {
stop("xgb.Booster: modelfile must be character or raw vector")
}
Expand Down Expand Up @@ -122,7 +122,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL) {
} else if (inClass == "xgb.DMatrix") {
dtrain <- data
} else if (inClass == "data.frame") {
stop("xgboost only support numerical matrix input,
stop("xgboost only support numerical matrix input,
use 'data.frame' to transform the data.")
} else {
stop("xgboost: Invalid input of data")
Expand All @@ -142,8 +142,7 @@ xgb.iter.boost <- function(booster, dtrain, gpair) {
if (class(dtrain) != "xgb.DMatrix") {
stop("xgb.iter.update: second argument must be type xgb.DMatrix")
}
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess,
PACKAGE = "xgboost")
.Call("XGBoosterBoostOneIter_R", booster, dtrain, gpair$grad, gpair$hess, PACKAGE = "xgboost")
return(TRUE)
}

Expand All @@ -157,9 +156,9 @@ xgb.iter.update <- function(booster, dtrain, iter, obj = NULL) {
}

if (is.null(obj)) {
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
.Call("XGBoosterUpdateOneIter_R", booster, as.integer(iter), dtrain,
PACKAGE = "xgboost")
} else {
} else {
pred <- predict(booster, dtrain)
gpair <- obj(pred, dtrain)
succ <- xgb.iter.boost(booster, dtrain, gpair)
Expand Down Expand Up @@ -190,7 +189,7 @@ xgb.iter.eval <- function(booster, watchlist, iter, feval = NULL, prediction = F
}
evnames <- append(evnames, names(w))
}
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
msg <- .Call("XGBoosterEvalOneIter_R", booster, as.integer(iter), watchlist,
evnames, PACKAGE = "xgboost")
} else {
msg <- paste("[", iter, "]", sep="")
Expand Down Expand Up @@ -248,29 +247,29 @@ xgb.cv.mknfold <- function(dall, nfold, param, stratified, folds) {
if (length(unique(y)) <= 5) y <- factor(y)
}
folds <- xgb.createFolds(y, nfold)
} else {
} else {
# make simple non-stratified folds
kstep <- length(randidx) %/% nfold
folds <- list()
for (i in 1:(nfold-1)) {
folds[[i]] = randidx[1:kstep]
randidx = setdiff(randidx, folds[[i]])
for (i in 1:(nfold - 1)) {
folds[[i]] <- randidx[1:kstep]
randidx <- setdiff(randidx, folds[[i]])
}
folds[[nfold]] = randidx
folds[[nfold]] <- randidx
}
}
ret <- list()
for (k in 1:nfold) {
dtest <- slice(dall, folds[[k]])
didx = c()
didx <- c()
for (i in 1:nfold) {
if (i != k) {
didx <- append(didx, folds[[i]])
}
}
dtrain <- slice(dall, didx)
bst <- xgb.Booster(param, list(dtrain, dtest))
watchlist = list(train=dtrain, test=dtest)
watchlist <- list(train=dtrain, test=dtest)
ret[[k]] <- list(dtrain=dtrain, booster=bst, watchlist=watchlist, index=folds[[k]])
}
return (ret)
Expand All @@ -283,7 +282,7 @@ xgb.cv.aggcv <- function(res, showsd = TRUE) {
kv <- strsplit(header[i], ":")[[1]]
ret <- paste(ret, "\t", kv[1], ":", sep="")
stats <- c()
stats[1] <- as.numeric(kv[2])
stats[1] <- as.numeric(kv[2])
for (j in 2:length(res)) {
tkv <- strsplit(res[[j]][i], ":")[[1]]
stats[j] <- as.numeric(tkv[2])
Expand Down Expand Up @@ -311,9 +310,9 @@ xgb.createFolds <- function(y, k = 10)
## At most, we will use quantiles. If the sample
## is too small, we just do regular unstratified
## CV
cuts <- floor(length(y)/k)
if(cuts < 2) cuts <- 2
if(cuts > 5) cuts <- 5
cuts <- floor(length(y) / k)
if (cuts < 2) cuts <- 2
if (cuts > 5) cuts <- 5
y <- cut(y,
unique(stats::quantile(y, probs = seq(0, 1, length = cuts))),
include.lowest = TRUE)
Expand All @@ -325,7 +324,7 @@ xgb.createFolds <- function(y, k = 10)
y <- factor(as.character(y))
numInClass <- table(y)
foldVector <- vector(mode = "integer", length(y))

## For each class, balance the fold allocation as far
## as possible, then resample the remainder.
## The final assignment of folds is also randomized.
Expand Down
14 changes: 7 additions & 7 deletions R-package/R/xgb.DMatrix.R
Expand Up @@ -20,26 +20,26 @@
#'
xgb.DMatrix <- function(data, info = list(), missing = NA, ...) {
if (typeof(data) == "character") {
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
handle <- .Call("XGDMatrixCreateFromFile_R", data, as.integer(FALSE),
PACKAGE = "xgboost")
} else if (is.matrix(data)) {
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
handle <- .Call("XGDMatrixCreateFromMat_R", data, missing,
PACKAGE = "xgboost")
} else if (class(data) == "dgCMatrix") {
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x,
handle <- .Call("XGDMatrixCreateFromCSC_R", data@p, data@i, data@x,
PACKAGE = "xgboost")
} else {
stop(paste("xgb.DMatrix: does not support to construct from ",
stop(paste("xgb.DMatrix: does not support to construct from ",
typeof(data)))
}
dmat <- structure(handle, class = "xgb.DMatrix")

info <- append(info, list(...))
if (length(info) == 0)
if (length(info) == 0)
return(dmat)
for (i in 1:length(info)) {
p <- info[i]
xgb.setinfo(dmat, names(p), p[[1]])
}
return(dmat)
}
}
4 changes: 2 additions & 2 deletions R-package/R/xgb.DMatrix.save.R
Expand Up @@ -18,10 +18,10 @@ xgb.DMatrix.save <- function(DMatrix, fname) {
stop("xgb.save: fname must be character")
}
if (class(DMatrix) == "xgb.DMatrix") {
.Call("XGDMatrixSaveBinary_R", DMatrix, fname, as.integer(FALSE),
.Call("XGDMatrixSaveBinary_R", DMatrix, fname, as.integer(FALSE),
PACKAGE = "xgboost")
return(TRUE)
}
stop("xgb.DMatrix.save: the input must be xgb.DMatrix")
return(FALSE)
}
}

0 comments on commit 96f221e

Please sign in to comment.