Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up CI job for R #5232

Merged
merged 4 commits into from Jan 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
83 changes: 18 additions & 65 deletions .github/workflows/master.yml
Expand Up @@ -49,32 +49,21 @@ jobs:
./dev/lint.sh
r:
runs-on: ubuntu-latest
defaults:
run:
working-directory: mlflow/R/mlflow
steps:
- name: Uninstall default-jdk and adoptopenjdk-11-hotspot if present
run: |
# deleting other version(s) of JDK because they are not needed and they might interfere with JNI linker configuration in the 'setup-r' step
sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot || :
- uses: actions/checkout@master
- uses: actions/checkout@v2
- uses: actions/setup-java@v2
with:
java-version: 11
distribution: 'adopt'
- name: Re-configure dynamic linker run-time bindings for adoptopenjdk-8-hotspot-amd64
run: |
sudo mkdir -p /etc/ld.so.conf.d
sudo bash -c "cat > /etc/ld.so.conf.d/jre.conf <<< '/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server'"
sudo ldconfig -v
- uses: r-lib/actions/setup-r@v1
# This step dumps the current set of R dependencies and R version into files to be used
# as a cache key when caching/restoring R dependencies.
- name: Query dependencies
- name: Dump dependencies
run: |
print(R.version)
install.packages('remotes')
saveRDS(remotes::dev_package_deps("mlflow/R/mlflow", dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

Rscript -e 'source(".dump-r-dependencies.R", echo = TRUE)'
- name: Get OS name
id: os-name
run: |
Expand All @@ -87,62 +76,26 @@ jobs:
path: ${{ env.R_LIBS_USER }}
# We cache R dependencies based on a tuple of the current OS, the R version, and the list of
# R dependencies
key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
- name: Install system dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo R CMD javareconf
key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('mlflow/R/mlflow/R-version') }}-1-${{ hashFiles('mlflow/R/mlflow/depends.Rds') }}
# - name: Install system dependencies
# run: |
# sudo apt-get update -y
# sudo apt-get install -y libcurl4-openssl-dev pandoc
- name: Install dependencies
working-directory: mlflow/R/mlflow
run: |
install.packages("devtools")
remotes::install_deps('.', dependencies = TRUE, upgrade = FALSE)
shell: Rscript {0}
Rscript -e 'source(".install-deps.R", echo=TRUE)'
- name: Build package
run: |
./build-package.sh
- name: Create test environment
run: |
source ./dev/install-common-deps.sh
cd mlflow/R/mlflow
R CMD build .
cd tests
Rscript -e 'source("../.create-test-env.R", echo=TRUE)'
Rscript -e 'source(".create-test-env.R", echo=TRUE)'
- name: Run tests
env:
# Hack to get around this issue:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
#
# The system clock check during `R CMD check` relies on two external web APIs and fails
# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
_R_CHECK_SYSTEM_CLOCK_: FALSE
LINTR_COMMENT_BOT: false
run: |
export LINTR_COMMENT_BOT=false
cd mlflow/R/mlflow/tests
# `devtools::check_built` requires `pandoc` to analyze README.md
sudo apt-get install pandoc -y
cd tests
Rscript -e 'source("../.run-tests.R", echo=TRUE)'
- name: Calculate code coverage
if: ${{ success() }}
run: |
export MLFLOW_HOME=$(pwd)
cd mlflow/R/mlflow/tests
Rscript -e 'covr::codecov()' || :
env:
COVR_RUNNING: true
- name: Test package build
working-directory: mlflow/R/mlflow
run: |
./build-package.sh
- name: Show 00check.log on failure
if: ${{ failure() }}
run: |
LOG_FILE="${HOME}/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log"
[ -r "${LOG_FILE}" ] && cat "${LOG_FILE}"
cp "${LOG_FILE}" /tmp
- uses: actions/upload-artifact@v1
if: failure()
with:
name: 00check.log
path: /tmp/00check.log

# python-skinny tests cover a subset of mlflow functionality
# that is meant to be supported with a smaller dependency footprint.
Expand Down
29 changes: 25 additions & 4 deletions mlflow/R/mlflow/.build-package.R
@@ -1,11 +1,32 @@
source(".utils.R")

# Bundle up the package into a .tar.gz file. This file will be submitted to CRAN.
# Bundle up the package into a .tar.gz file.
package_path <- devtools::build(".", path = ".")
# Run the submission check against the built package.

# Hack to get around this issue:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
#
# The system clock check during `R CMD check` relies on two external web APIs and fails
# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
Sys.setenv("_R_CHECK_SYSTEM_CLOCK_" = "FALSE")

# Run the check with `cran = TRUE`
devtools::check_built(
path = normalizePath(package_path),
path = package_path,
cran = TRUE,
remote = should_enable_cran_incoming_checks(),
error_on = "note",
args = c("--no-tests", "--as-cran"),
check_dir = getwd(),
args = "--no-tests",
)

# Run the check with `cran = FALSE` to detect unused imports:
# https://github.com/wch/r-source/blob/b12ffba7584825d6b11bba8b7dbad084a74c1c20/src/library/tools/R/check.R#L6070
devtools::check_built(
path = package_path,
cran = FALSE,
error_on = "note",
check_dir = getwd(),
args = "--no-tests",
)
10 changes: 5 additions & 5 deletions mlflow/R/mlflow/.create-test-env.R
@@ -1,15 +1,15 @@
parent_dir <- dir("../", full.names = TRUE)
package <- parent_dir[grepl("mlflow_", parent_dir)]
# Install MLflow for R
files <- dir(".", full.names = TRUE)
package <- files[grepl("mlflow_.+\\.tar\\.gz$", files)]
install.packages(package)

mlflow:::mlflow_maybe_create_conda_env(python_version = "3.7")
library(reticulate)
use_condaenv(mlflow:::mlflow_conda_env_name())
# Install python dependencies
reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../.."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
# pinning tensorflow version to 1.14 until test_keras_model.R is fixed
keras::install_keras(method = "conda", envname = mlflow:::mlflow_conda_env_name(), tensorflow="1.15.2")
# pinning h5py < 3.0.0 to avoid this issue: https://github.com/tensorflow/tensorflow/issues/44467
# TODO: unpin after we use tensorflow >= 2.4
reticulate::conda_install("'h5py<3.0.0'", envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../../../."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
reticulate::conda_install("xgboost", envname = mlflow:::mlflow_conda_env_name())
reticulate::conda_install(paste0("h2o==", packageVersion("h2o")), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
24 changes: 2 additions & 22 deletions mlflow/R/mlflow/.run-tests.R
@@ -1,23 +1,3 @@
source("../.utils.R")
reticulate::use_condaenv(mlflow:::mlflow_conda_env_name())

parent_dir <- dir("../", full.names = TRUE)
package <- parent_dir[grepl("mlflow_", parent_dir)]

library(reticulate)
use_condaenv(mlflow:::mlflow_conda_env_name())

devtools::check_built(
path = package,
cran = TRUE,
remote = should_enable_cran_incoming_checks(),
error_on = "note",
args = "--no-tests"
)
# This runs checks that are disabled when `cran` is TRUE (e.g. unused import check).
devtools::check_built(
path = package,
cran = FALSE,
error_on = "note",
args = "--no-tests"
)
source("testthat.R")
source("testthat.R", echo = TRUE)
2 changes: 1 addition & 1 deletion mlflow/R/mlflow/tests/testthat.R
Expand Up @@ -21,7 +21,7 @@ library(mlflow)

if (identical(Sys.getenv("NOT_CRAN"), "true")) {
message("Current working directory: ", getwd())
mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../../.")
mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../..")
message('MLFLOW_HOME: ', mlflow_home)
test_check("mlflow")
}
6 changes: 3 additions & 3 deletions mlflow/R/mlflow/tests/testthat/test-model-h2o.R
Expand Up @@ -40,7 +40,7 @@ test_that("can print model correctly after it is loaded", {
})

test_that("can load and predict with python pyfunct and h2o backend", {
pyfunc <- import("mlflow.pyfunc")
pyfunc <- reticulate::import("mlflow.pyfunc")
py_model <- pyfunc$load_model(testthat_model_dir)

expected <- as.data.frame(h2o::h2o.predict(model, h2o::as.h2o(test)))
Expand All @@ -50,9 +50,9 @@ test_that("can load and predict with python pyfunct and h2o backend", {
as.data.frame(py_model$predict(test)), expected
)

mlflow.h2o <- import("mlflow.h2o")
mlflow.h2o <- reticulate::import("mlflow.h2o")
h2o_native_model <- mlflow.h2o$load_model(testthat_model_dir)
h2o <- import("h2o")
h2o <- reticulate::import("h2o")

expect_equivalent(
as.data.frame(
Expand Down
6 changes: 3 additions & 3 deletions mlflow/R/mlflow/tests/testthat/test-model-xgboost.R
Expand Up @@ -40,16 +40,16 @@ test_that("can load model and predict with rfunc backend", {
})

test_that("can load and predict with python pyfunct and xgboost backend", {
pyfunc <- import("mlflow.pyfunc")
pyfunc <- reticulate::import("mlflow.pyfunc")
py_model <- pyfunc$load_model(testthat_model_dir)
expect_equal(
as.numeric(py_model$predict(test$data)),
unname(predict(model, as.matrix(test$data)))
)

mlflow.xgboost <- import("mlflow.xgboost")
mlflow.xgboost <- reticulate::import("mlflow.xgboost")
xgboost_native_model <- mlflow.xgboost$load_model(testthat_model_dir)
xgboost <- import("xgboost")
xgboost <- reticulate::import("xgboost")

expect_equivalent(
as.numeric(xgboost_native_model$predict(xgboost$DMatrix(test$data))),
Expand Down