diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index bb6f3acd9c9f..4806692c0b57 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -170,7 +170,7 @@ jobs:
       run: |
         cd R-package
         R.exe CMD INSTALL .
-        Rscript.exe tests/run_lint.R
+        Rscript.exe tests/helper_scripts/run_lint.R
 
   test-with-R:
     runs-on: ${{ matrix.config.os }}
diff --git a/Makefile b/Makefile
index 20ddace00ed2..63319ba3ef63 100644
--- a/Makefile
+++ b/Makefile
@@ -134,14 +134,16 @@ Rpack: clean_all
 	sed -i -e 's/@OPENMP_LIB@//g' xgboost/src/Makevars.win
 	rm -f xgboost/src/Makevars.win-e   # OSX sed create this extra file; remove it
 	bash R-package/remove_warning_suppression_pragma.sh
+	bash xgboost/remove_warning_suppression_pragma.sh
 	rm xgboost/remove_warning_suppression_pragma.sh
+	rm -rfv xgboost/tests/helper_scripts/
 
 Rbuild: Rpack
 	R CMD build --no-build-vignettes xgboost
 	rm -rf xgboost
 
 Rcheck: Rbuild
-	R CMD check xgboost*.tar.gz
+	R CMD check --as-cran xgboost*.tar.gz
 
 -include build/*.d
 -include build/*/*.d
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 1a35eaa0612a..f599a57858be 100644
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -2,7 +2,7 @@ Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
 Version: 1.3.0.1
-Date: 2020-02-21
+Date: 2020-08-28
 Authors@R: c(
   person("Tianqi", "Chen", role = c("aut"),
          email = "tianqi.tchen@gmail.com"),
diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index fb0ac542f8bf..1795742c8160 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -38,6 +38,7 @@ export(xgb.dump)
 export(xgb.gblinear.history)
 export(xgb.ggplot.deepness)
 export(xgb.ggplot.importance)
+export(xgb.ggplot.shap.summary)
 export(xgb.importance)
 export(xgb.load)
 export(xgb.load.raw)
@@ -46,6 +47,7 @@ export(xgb.plot.deepness)
 export(xgb.plot.importance)
 export(xgb.plot.multi.trees)
 export(xgb.plot.shap)
+export(xgb.plot.shap.summary)
 export(xgb.plot.tree)
 export(xgb.save)
 export(xgb.save.raw)
diff --git a/R-package/R/utils.R b/R-package/R/utils.R
index b0c653f17671..846cc1f4404e 100644
--- a/R-package/R/utils.R
+++ b/R-package/R/utils.R
@@ -349,6 +349,7 @@ NULL
 #' # Save as a stand-alone file (JSON); load it with xgb.load()
 #' xgb.save(bst, 'xgb.model.json')
 #' bst2 <- xgb.load('xgb.model.json')
+#' if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
 #'
 #' # Save as a raw byte vector; load it with xgb.load.raw()
 #' xgb_bytes <- xgb.save.raw(bst)
@@ -364,6 +365,7 @@ NULL
 #' obj2 <- readRDS('my_object.rds')
 #' # Re-construct xgb.Booster object from the bytes
 #' bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
+#' if (file.exists('my_object.rds')) file.remove('my_object.rds')
 #'
 #' @name a-compatibility-note-for-saveRDS-save
 NULL
diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R
index fd74d0f6b625..fb48ca6071bf 100644
--- a/R-package/R/xgb.cv.R
+++ b/R-package/R/xgb.cv.R
@@ -79,7 +79,7 @@
 #'
 #' All observations are used for both training and validation.
 #'
-#' Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
+#' Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
 #'
 #' @return
 #' An object of class \code{xgb.cv.synchronous} with the following elements:
diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R
index d9ea69786ad9..70a357ee9866 100644
--- a/R-package/R/xgb.plot.shap.R
+++ b/R-package/R/xgb.plot.shap.R
@@ -200,9 +200,9 @@ xgb.plot.shap <- function(data, shap_contrib = NULL, features = NULL, top_n = 1,
 #' @return A \code{ggplot2} object.
 #' @export
 #'
-#' @examples See \code{\link{xgb.plot.shap}}.
+#' @examples # See \code{\link{xgb.plot.shap}}.
 #' @seealso \code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
-#'   \code{\url{https://github.com/slundberg/shap}}
+#'   \url{https://github.com/slundberg/shap}
 xgb.plot.shap.summary <- function(data, shap_contrib = NULL, features = NULL, top_n = 10, model = NULL,
                                   trees = NULL, target_class = NULL, approxcontrib = FALSE, subsample = NULL) {
   # Only ggplot implementation is available.
diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R
index a6755f5fa277..0449ae266243 100644
--- a/R-package/R/xgb.train.R
+++ b/R-package/R/xgb.train.R
@@ -130,16 +130,16 @@
 #' Note that when using a customized metric, only this single metric can be used.
 #' The following is the list of built-in metrics for which Xgboost provides optimized implementation:
 #'   \itemize{
-#'      \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
-#'      \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-#'      \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
+#'      \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
+#'      \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
+#'      \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
 #'      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
 #'            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
 #'            Different threshold (e.g., 0.) could be specified as "error@0."
 #'      \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-#'      \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
+#'      \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
 #'      \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
-#'      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
+#'      \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
 #'   }
 #'
 #' The following callbacks are automatically created when certain parameters are set:
diff --git a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
index 63b8dfce52ac..85b52243c1b9 100644
--- a/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
+++ b/R-package/man/a-compatibility-note-for-saveRDS-save.Rd
@@ -43,6 +43,7 @@ bst2 <- xgb.load('xgb.model')
 # Save as a stand-alone file (JSON); load it with xgb.load()
 xgb.save(bst, 'xgb.model.json')
 bst2 <- xgb.load('xgb.model.json')
+if (file.exists('xgb.model.json')) file.remove('xgb.model.json')
 
 # Save as a raw byte vector; load it with xgb.load.raw()
 xgb_bytes <- xgb.save.raw(bst)
@@ -58,5 +59,6 @@ saveRDS(obj, 'my_object.rds')
 obj2 <- readRDS('my_object.rds')
 # Re-construct xgb.Booster object from the bytes
 bst2 <- xgb.load.raw(obj2$xgb_model_bytes)
+if (file.exists('my_object.rds')) file.remove('my_object.rds')
 
 }
diff --git a/R-package/man/normalize.Rd b/R-package/man/normalize.Rd
new file mode 100644
index 000000000000..6a05e83426b1
--- /dev/null
+++ b/R-package/man/normalize.Rd
@@ -0,0 +1,18 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.ggplot.R
+\name{normalize}
+\alias{normalize}
+\title{Scale feature value to have mean 0, standard deviation 1}
+\usage{
+normalize(x)
+}
+\arguments{
+\item{x}{Numeric vector}
+}
+\value{
+Numeric vector with mean 0 and sd 1.
+}
+\description{
+This is used to compare multiple features on the same plot.
+Internal utility function
+}
diff --git a/R-package/man/prepare.ggplot.shap.data.Rd b/R-package/man/prepare.ggplot.shap.data.Rd
new file mode 100644
index 000000000000..57f71a3ff800
--- /dev/null
+++ b/R-package/man/prepare.ggplot.shap.data.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.ggplot.R
+\name{prepare.ggplot.shap.data}
+\alias{prepare.ggplot.shap.data}
+\title{Combine and melt feature values and SHAP contributions for sample
+observations.}
+\usage{
+prepare.ggplot.shap.data(data_list, normalize = FALSE)
+}
+\arguments{
+\item{data_list}{List containing 'data' and 'shap_contrib' returned by
+\code{xgb.shap.data()}.}
+
+\item{normalize}{Whether to standardize feature values to have mean 0 and
+standard deviation 1 (useful for comparing multiple features on the same
+plot). Default \code{FALSE}.}
+}
+\value{
+A data.table containing the observation ID, the feature name, the
+  feature value (normalized if specified), and the SHAP contribution value.
+}
+\description{
+Conforms to data format required for ggplot functions.
+}
+\details{
+Internal utility function.
+}
diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd
index 98e70e48cade..86a88007be21 100644
--- a/R-package/man/xgb.cv.Rd
+++ b/R-package/man/xgb.cv.Rd
@@ -154,7 +154,7 @@ The cross-validation process is then repeated \code{nrounds} times, with each of
 
 All observations are used for both training and validation.
 
-Adapted from \url{http://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29#k-fold_cross-validation}
+Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\%29}
 }
 \examples{
 data(agaricus.train, package='xgboost')
diff --git a/R-package/man/xgb.plot.shap.Rd b/R-package/man/xgb.plot.shap.Rd
index 3cd3a8953dc8..abb21ce1957a 100644
--- a/R-package/man/xgb.plot.shap.Rd
+++ b/R-package/man/xgb.plot.shap.Rd
@@ -131,6 +131,7 @@ bst <- xgboost(agaricus.train$data, agaricus.train$label, nrounds = 50,
 xgb.plot.shap(agaricus.test$data, model = bst, features = "odor=none")
 contr <- predict(bst, agaricus.test$data, predcontrib = TRUE)
 xgb.plot.shap(agaricus.test$data, contr, model = bst, top_n = 12, n_col = 3)
+xgb.ggplot.shap.summary(agaricus.test$data, contr, model = bst, top_n = 12)  # Summary plot
 
 # multiclass example - plots for each class separately:
 nclass <- 3
@@ -149,6 +150,7 @@ xgb.plot.shap(x, model = mbst, trees = trees0 + 1, target_class = 1, top_n = 4,
               n_col = 2, col = col, pch = 16, pch_NA = 17)
 xgb.plot.shap(x, model = mbst, trees = trees0 + 2, target_class = 2, top_n = 4,
               n_col = 2, col = col, pch = 16, pch_NA = 17)
+xgb.ggplot.shap.summary(x, model = mbst, target_class = 0, top_n = 4)  # Summary plot
 
 }
 \references{
diff --git a/R-package/man/xgb.plot.shap.summary.Rd b/R-package/man/xgb.plot.shap.summary.Rd
new file mode 100644
index 000000000000..f757fd7404a3
--- /dev/null
+++ b/R-package/man/xgb.plot.shap.summary.Rd
@@ -0,0 +1,78 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.ggplot.R, R/xgb.plot.shap.R
+\name{xgb.ggplot.shap.summary}
+\alias{xgb.ggplot.shap.summary}
+\alias{xgb.plot.shap.summary}
+\title{SHAP contribution dependency summary plot}
+\usage{
+xgb.ggplot.shap.summary(
+  data,
+  shap_contrib = NULL,
+  features = NULL,
+  top_n = 10,
+  model = NULL,
+  trees = NULL,
+  target_class = NULL,
+  approxcontrib = FALSE,
+  subsample = NULL
+)
+
+xgb.plot.shap.summary(
+  data,
+  shap_contrib = NULL,
+  features = NULL,
+  top_n = 10,
+  model = NULL,
+  trees = NULL,
+  target_class = NULL,
+  approxcontrib = FALSE,
+  subsample = NULL
+)
+}
+\arguments{
+\item{data}{data as a \code{matrix} or \code{dgCMatrix}.}
+
+\item{shap_contrib}{a matrix of SHAP contributions that was computed earlier for the above
+\code{data}. When it is NULL, it is computed internally using \code{model} and \code{data}.}
+
+\item{features}{a vector of either column indices or of feature names to plot. When it is NULL,
+feature importance is calculated, and \code{top_n} high ranked features are taken.}
+
+\item{top_n}{when \code{features} is NULL, top_n [1, 100] most important features in a model are taken.}
+
+\item{model}{an \code{xgb.Booster} model. It has to be provided when either \code{shap_contrib}
+or \code{features} is missing.}
+
+\item{trees}{passed to \code{\link{xgb.importance}} when \code{features = NULL}.}
+
+\item{target_class}{is only relevant for multiclass models. When it is set to a 0-based class index,
+only SHAP contributions for that specific class are used.
+If it is not set, SHAP importances are averaged over all classes.}
+
+\item{approxcontrib}{passed to \code{\link{predict.xgb.Booster}} when \code{shap_contrib = NULL}.}
+
+\item{subsample}{a random fraction of data points to use for plotting. When it is NULL,
+it is set so that up to 100K data points are used.}
+}
+\value{
+A \code{ggplot2} object.
+}
+\description{
+Compare SHAP contributions of different features.
+}
+\details{
+A point plot (each point representing one sample from \code{data}) is
+produced for each feature, with the points plotted on the SHAP value axis.
+Each point (observation) is coloured based on its feature value. The plot
+hence allows us to see which features have a negative / positive contribution
+on the model prediction, and whether the contribution is different for larger
+or smaller values of the feature. We effectively try to replicate the
+\code{summary_plot} function from https://github.com/slundberg/shap.
+}
+\examples{
+# See \code{\link{xgb.plot.shap}}.
+}
+\seealso{
+\code{\link{xgb.plot.shap}}, \code{\link{xgb.ggplot.shap.summary}},
+  \url{https://github.com/slundberg/shap}
+}
diff --git a/R-package/man/xgb.shap.data.Rd b/R-package/man/xgb.shap.data.Rd
new file mode 100644
index 000000000000..952445610bd2
--- /dev/null
+++ b/R-package/man/xgb.shap.data.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/xgb.plot.shap.R
+\name{xgb.shap.data}
+\alias{xgb.shap.data}
+\title{Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc.
+Internal utility function.}
+\usage{
+xgb.shap.data(
+  data,
+  shap_contrib = NULL,
+  features = NULL,
+  top_n = 1,
+  model = NULL,
+  trees = NULL,
+  target_class = NULL,
+  approxcontrib = FALSE,
+  subsample = NULL,
+  max_observations = 1e+05
+)
+}
+\value{
+A list containing: 'data', a matrix containing sample observations
+  and their feature values; 'shap_contrib', a matrix containing the SHAP contribution
+  values for these observations.
+}
+\description{
+Prepare data for SHAP plots. To be used in xgb.plot.shap, xgb.plot.shap.summary, etc.
+Internal utility function.
+}
diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd
index 94db595cbc65..e68962fb6b41 100644
--- a/R-package/man/xgb.train.Rd
+++ b/R-package/man/xgb.train.Rd
@@ -215,16 +215,16 @@ User may set one or several \code{eval_metric} parameters.
 Note that when using a customized metric, only this single metric can be used.
 The following is the list of built-in metrics for which Xgboost provides optimized implementation:
   \itemize{
-     \item \code{rmse} root mean square error. \url{http://en.wikipedia.org/wiki/Root_mean_square_error}
-     \item \code{logloss} negative log-likelihood. \url{http://en.wikipedia.org/wiki/Log-likelihood}
-     \item \code{mlogloss} multiclass logloss. \url{http://wiki.fast.ai/index.php/Log_Loss}
+     \item \code{rmse} root mean square error. \url{https://en.wikipedia.org/wiki/Root_mean_square_error}
+     \item \code{logloss} negative log-likelihood. \url{https://en.wikipedia.org/wiki/Log-likelihood}
+     \item \code{mlogloss} multiclass logloss. \url{https://scikit-learn.org/stable/modules/generated/sklearn.metrics.log_loss.html}
      \item \code{error} Binary classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
            By default, it uses the 0.5 threshold for predicted values to define negative and positive instances.
            Different threshold (e.g., 0.) could be specified as "error@0."
      \item \code{merror} Multiclass classification error rate. It is calculated as \code{(# wrong cases) / (# all cases)}.
-     \item \code{auc} Area under the curve. \url{http://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
+     \item \code{auc} Area under the curve. \url{https://en.wikipedia.org/wiki/Receiver_operating_characteristic#'Area_under_curve} for ranking evaluation.
      \item \code{aucpr} Area under the PR curve. \url{https://en.wikipedia.org/wiki/Precision_and_recall} for ranking evaluation.
-     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{http://en.wikipedia.org/wiki/NDCG}
+     \item \code{ndcg} Normalized Discounted Cumulative Gain (for ranking task). \url{https://en.wikipedia.org/wiki/NDCG}
   }
 
 The following callbacks are automatically created when certain parameters are set:
diff --git a/R-package/tests/generate_models_params.R b/R-package/tests/generate_models_params.R
deleted file mode 100644
index 0f71ab22c640..000000000000
--- a/R-package/tests/generate_models_params.R
+++ /dev/null
@@ -1,10 +0,0 @@
-model_generator_metadata <- function() {
-  return (list(
-    kRounds = 2,
-    kRows = 1000,
-    kCols = 4,
-    kForests = 2,
-    kMaxDepth = 2,
-    kClasses = 3
-  ))
-}
diff --git a/R-package/tests/generate_models.R b/R-package/tests/helper_scripts/generate_models.R
similarity index 96%
rename from R-package/tests/generate_models.R
rename to R-package/tests/helper_scripts/generate_models.R
index 854b7944e4a6..d38b23a19374 100644
--- a/R-package/tests/generate_models.R
+++ b/R-package/tests/helper_scripts/generate_models.R
@@ -5,7 +5,14 @@ library(Matrix)
 source('./generate_models_params.R')
 
 set.seed(0)
-metadata <- model_generator_metadata()
+metadata <- list(
+  kRounds = 2,
+  kRows = 1000,
+  kCols = 4,
+  kForests = 2,
+  kMaxDepth = 2,
+  kClasses = 3
+)
 X <- Matrix(data = rnorm(metadata$kRows * metadata$kCols), nrow = metadata$kRows,
             ncol = metadata$kCols, sparse = TRUE)
 w <- runif(metadata$kRows)
diff --git a/R-package/tests/run_lint.R b/R-package/tests/helper_scripts/run_lint.R
similarity index 100%
rename from R-package/tests/run_lint.R
rename to R-package/tests/helper_scripts/run_lint.R
diff --git a/R-package/tests/testthat/test_model_compatibility.R b/R-package/tests/testthat/test_model_compatibility.R
index 7204ed89142c..105a60d169a1 100644
--- a/R-package/tests/testthat/test_model_compatibility.R
+++ b/R-package/tests/testthat/test_model_compatibility.R
@@ -1,10 +1,16 @@
 require(xgboost)
 require(jsonlite)
-source('../generate_models_params.R')
 
 context("Models from previous versions of XGBoost can be loaded")
 
-metadata <- model_generator_metadata()
+metadata <- list(
+  kRounds = 2,
+  kRows = 1000,
+  kCols = 4,
+  kForests = 2,
+  kMaxDepth = 2,
+  kClasses = 3
+)
 
 run_model_param_check <- function (config) {
   testthat::expect_equal(config$learner$learner_model_param$num_feature, '4')
diff --git a/R-package/vignettes/discoverYourData.Rmd b/R-package/vignettes/discoverYourData.Rmd
index 8181fcbb969b..c41f4f125def 100644
--- a/R-package/vignettes/discoverYourData.Rmd
+++ b/R-package/vignettes/discoverYourData.Rmd
@@ -57,7 +57,7 @@ To answer the question above we will convert *categorical* variables to `numeric
 
 In this Vignette we will see how to transform a *dense* `data.frame` (*dense* = few zeroes in the matrix) with *categorical* variables to a very *sparse* matrix (*sparse* = lots of zero in the matrix) of `numeric` features.
 
-The method we are going to see is usually called [one-hot encoding](http://en.wikipedia.org/wiki/One-hot).
+The method we are going to see is usually called [one-hot encoding](https://en.wikipedia.org/wiki/One-hot).
 
 The first step is to load `Arthritis` dataset in memory and wrap it with `data.table` package.
 
@@ -66,7 +66,7 @@ data(Arthritis)
 df <- data.table(Arthritis, keep.rownames = FALSE)
 ```
 
-> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](http://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
+> `data.table` is 100% compliant with **R** `data.frame` but its syntax is more consistent and its performance for large dataset is [best in class](https://stackoverflow.com/questions/21435339/data-table-vs-dplyr-can-one-do-something-well-the-other-cant-or-does-poorly) (`dplyr` from **R** and `Pandas` from **Python** [included](https://github.com/Rdatatable/data.table/wiki/Benchmarks-%3A-Grouping)). Some parts of **Xgboost** **R** package use `data.table`.
 
 The first thing we want to do is to have a look to the first few lines of the `data.table`:
 
@@ -137,8 +137,8 @@ levels(df[,Treatment])
 #### Encoding categorical features
 
 Next step, we will transform the categorical data to dummy variables.
-Several encoding methods exist, e.g., [one-hot encoding](http://en.wikipedia.org/wiki/One-hot) is a common approach.
-We will use the [dummy contrast coding](http://www.ats.ucla.edu/stat/r/library/contrast_coding.htm#dummy) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
+Several encoding methods exist, e.g., [one-hot encoding](https://en.wikipedia.org/wiki/One-hot) is a common approach.
+We will use the [dummy contrast coding](https://stats.idre.ucla.edu/r/library/r-library-contrast-coding-systems-for-categorical-variables/) which is popular because it produces "full rank" encoding (also see [this blog post by Max Kuhn](http://appliedpredictivemodeling.com/blog/2013/10/23/the-basics-of-encoding-categorical-data-for-predictive-models)).
 
 The purpose is to transform each value of each *categorical* feature into a *binary* feature `{0, 1}`.
 
@@ -176,7 +176,7 @@ bst <- xgboost(data = sparse_matrix, label = output_vector, max_depth = 4,
 
 You can see some `train-error: 0.XXXXX` lines followed by a number. It decreases. Each line shows how well the model explains your data. Lower is better.
 
-A model which fits too well may [overfit](http://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
+A model which fits too well may [overfit](https://en.wikipedia.org/wiki/Overfitting) (meaning it copy/paste too much the past, and won't be that good to predict the future).
 
 > Here you can see the numbers decrease until line 7 and then increase.
 >
@@ -304,7 +304,7 @@ Linear model may not be that smart in this scenario.
 Special Note: What about Random Forests™?
 -----------------------------------------
 
-As you may know, [Random Forests™](http://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](http://en.wikipedia.org/wiki/Ensemble_learning) family.
+As you may know, [Random Forests™](https://en.wikipedia.org/wiki/Random_forest) algorithm is cousin with boosting and both are part of the [ensemble learning](https://en.wikipedia.org/wiki/Ensemble_learning) family.
 
 Both trains several decision trees for one dataset. The *main* difference is that in Random Forests™, trees are independent and in boosting, the tree `N+1` focus its learning on the loss (<=> what has not been well modeled by the tree `N`).
 
diff --git a/R-package/vignettes/xgboost.bib b/R-package/vignettes/xgboost.bib
index f21bdae1672e..5deb1e13d1f8 100644
--- a/R-package/vignettes/xgboost.bib
+++ b/R-package/vignettes/xgboost.bib
@@ -24,7 +24,7 @@ @misc{
     author = "K. Bache and M. Lichman",
     year = "2013",
     title = "{UCI} Machine Learning Repository",
-    url = "http://archive.ics.uci.edu/ml",
+    url = "http://archive.ics.uci.edu/ml/",
     institution = "University of California, Irvine, School of Information and Computer Sciences" 
 }
 
diff --git a/R-package/vignettes/xgboostPresentation.Rmd b/R-package/vignettes/xgboostPresentation.Rmd
index c2f990e1480a..ab72c677938e 100644
--- a/R-package/vignettes/xgboostPresentation.Rmd
+++ b/R-package/vignettes/xgboostPresentation.Rmd
@@ -68,7 +68,7 @@ The version 0.4-2 is on CRAN, and you can install it by:
 install.packages("xgboost")
 ```
 
-Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost)
+Formerly available versions can be obtained from the CRAN [archive](https://cran.r-project.org/src/contrib/Archive/xgboost/)
 
 ## Learning