Skip to content

Commit

Permalink
Clean up CI job for R (#5232)
Browse files Browse the repository at this point in the history
* clean r job

Signed-off-by: harupy <hkawamura0130@gmail.com>

* build package once

Signed-off-by: harupy <hkawamura0130@gmail.com>

* fix named argument

Signed-off-by: harupy <hkawamura0130@gmail.com>

* remove system dependencies

Signed-off-by: harupy <hkawamura0130@gmail.com>
  • Loading branch information
harupy committed Jan 9, 2022
1 parent d5b0b55 commit dfa9fc7
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 103 deletions.
83 changes: 18 additions & 65 deletions .github/workflows/master.yml
Expand Up @@ -49,32 +49,21 @@ jobs:
./dev/lint.sh
r:
runs-on: ubuntu-latest
defaults:
run:
working-directory: mlflow/R/mlflow
steps:
- name: Uninstall default-jdk and adoptopenjdk-11-hotspot if present
run: |
# deleting other version(s) of JDK because they are not needed and they might interfere with JNI linker configuration in the 'setup-r' step
sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot || :
- uses: actions/checkout@master
- uses: actions/checkout@v2
- uses: actions/setup-java@v2
with:
java-version: 11
distribution: 'adopt'
- name: Re-configure dynamic linker run-time bindings for adoptopenjdk-8-hotspot-amd64
run: |
sudo mkdir -p /etc/ld.so.conf.d
sudo bash -c "cat > /etc/ld.so.conf.d/jre.conf <<< '/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server'"
sudo ldconfig -v
- uses: r-lib/actions/setup-r@v1
# This step dumps the current set of R dependencies and R version into files to be used
# as a cache key when caching/restoring R dependencies.
- name: Query dependencies
- name: Dump dependencies
run: |
print(R.version)
install.packages('remotes')
saveRDS(remotes::dev_package_deps("mlflow/R/mlflow", dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

Rscript -e 'source(".dump-r-dependencies.R", echo = TRUE)'
- name: Get OS name
id: os-name
run: |
Expand All @@ -87,62 +76,26 @@ jobs:
path: ${{ env.R_LIBS_USER }}
# We cache R dependencies based on a tuple of the current OS, the R version, and the list of
# R dependencies
key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
- name: Install system dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev
sudo R CMD javareconf
key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('mlflow/R/mlflow/R-version') }}-1-${{ hashFiles('mlflow/R/mlflow/depends.Rds') }}
# - name: Install system dependencies
# run: |
# sudo apt-get update -y
# sudo apt-get install -y libcurl4-openssl-dev pandoc
- name: Install dependencies
working-directory: mlflow/R/mlflow
run: |
install.packages("devtools")
remotes::install_deps('.', dependencies = TRUE, upgrade = FALSE)
shell: Rscript {0}
Rscript -e 'source(".install-deps.R", echo=TRUE)'
- name: Build package
run: |
./build-package.sh
- name: Create test environment
run: |
source ./dev/install-common-deps.sh
cd mlflow/R/mlflow
R CMD build .
cd tests
Rscript -e 'source("../.create-test-env.R", echo=TRUE)'
Rscript -e 'source(".create-test-env.R", echo=TRUE)'
- name: Run tests
env:
# Hack to get around this issue:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
#
# The system clock check during `R CMD check` relies on two external web APIs and fails
# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
_R_CHECK_SYSTEM_CLOCK_: FALSE
LINTR_COMMENT_BOT: false
run: |
export LINTR_COMMENT_BOT=false
cd mlflow/R/mlflow/tests
# `devtools::check_built` requires `pandoc` to analyze README.md
sudo apt-get install pandoc -y
cd tests
Rscript -e 'source("../.run-tests.R", echo=TRUE)'
- name: Calculate code coverage
if: ${{ success() }}
run: |
export MLFLOW_HOME=$(pwd)
cd mlflow/R/mlflow/tests
Rscript -e 'covr::codecov()' || :
env:
COVR_RUNNING: true
- name: Test package build
working-directory: mlflow/R/mlflow
run: |
./build-package.sh
- name: Show 00check.log on failure
if: ${{ failure() }}
run: |
LOG_FILE="${HOME}/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log"
[ -r "${LOG_FILE}" ] && cat "${LOG_FILE}"
cp "${LOG_FILE}" /tmp
- uses: actions/upload-artifact@v1
if: failure()
with:
name: 00check.log
path: /tmp/00check.log
# python-skinny tests cover a subset of mlflow functionality
# that is meant to be supported with a smaller dependency footprint.
Expand Down
29 changes: 25 additions & 4 deletions mlflow/R/mlflow/.build-package.R
@@ -1,11 +1,32 @@
source(".utils.R")

# Bundle up the package into a .tar.gz file. This file will be submitted to CRAN.
# Bundle up the package into a .tar.gz file.
package_path <- devtools::build(".", path = ".")
# Run the submission check against the built package.

# Hack to get around this issue:
# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
#
# The system clock check during `R CMD check` relies on two external web APIs and fails
# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
Sys.setenv("_R_CHECK_SYSTEM_CLOCK_" = "FALSE")

# Run the check with `cran = TRUE`
devtools::check_built(
path = normalizePath(package_path),
path = package_path,
cran = TRUE,
remote = should_enable_cran_incoming_checks(),
error_on = "note",
args = c("--no-tests", "--as-cran"),
check_dir = getwd(),
args = "--no-tests",
)

# Run the check with `cran = FALSE` to detect unused imports:
# https://github.com/wch/r-source/blob/b12ffba7584825d6b11bba8b7dbad084a74c1c20/src/library/tools/R/check.R#L6070
devtools::check_built(
path = package_path,
cran = FALSE,
error_on = "note",
check_dir = getwd(),
args = "--no-tests",
)
10 changes: 5 additions & 5 deletions mlflow/R/mlflow/.create-test-env.R
@@ -1,15 +1,15 @@
parent_dir <- dir("../", full.names = TRUE)
package <- parent_dir[grepl("mlflow_", parent_dir)]
# Install MLflow for R
files <- dir(".", full.names = TRUE)
package <- files[grepl("mlflow_.+\\.tar\\.gz$", files)]
install.packages(package)

mlflow:::mlflow_maybe_create_conda_env(python_version = "3.7")
library(reticulate)
use_condaenv(mlflow:::mlflow_conda_env_name())
# Install python dependencies
reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../.."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
# pinning tensorflow version to 1.14 until test_keras_model.R is fixed
keras::install_keras(method = "conda", envname = mlflow:::mlflow_conda_env_name(), tensorflow="1.15.2")
# pinning h5py < 3.0.0 to avoid this issue: https://github.com/tensorflow/tensorflow/issues/44467
# TODO: unpin after we use tensorflow >= 2.4
reticulate::conda_install("'h5py<3.0.0'", envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../../../."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
reticulate::conda_install("xgboost", envname = mlflow:::mlflow_conda_env_name())
reticulate::conda_install(paste0("h2o==", packageVersion("h2o")), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
24 changes: 2 additions & 22 deletions mlflow/R/mlflow/.run-tests.R
@@ -1,23 +1,3 @@
source("../.utils.R")
reticulate::use_condaenv(mlflow:::mlflow_conda_env_name())

parent_dir <- dir("../", full.names = TRUE)
package <- parent_dir[grepl("mlflow_", parent_dir)]

library(reticulate)
use_condaenv(mlflow:::mlflow_conda_env_name())

devtools::check_built(
path = package,
cran = TRUE,
remote = should_enable_cran_incoming_checks(),
error_on = "note",
args = "--no-tests"
)
# This runs checks that are disabled when `cran` is TRUE (e.g. unused import check).
devtools::check_built(
path = package,
cran = FALSE,
error_on = "note",
args = "--no-tests"
)
source("testthat.R")
source("testthat.R", echo = TRUE)
2 changes: 1 addition & 1 deletion mlflow/R/mlflow/tests/testthat.R
Expand Up @@ -21,7 +21,7 @@ library(mlflow)

if (identical(Sys.getenv("NOT_CRAN"), "true")) {
message("Current working directory: ", getwd())
mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../../.")
mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../..")
message('MLFLOW_HOME: ', mlflow_home)
test_check("mlflow")
}
6 changes: 3 additions & 3 deletions mlflow/R/mlflow/tests/testthat/test-model-h2o.R
Expand Up @@ -40,7 +40,7 @@ test_that("can print model correctly after it is loaded", {
})

test_that("can load and predict with python pyfunct and h2o backend", {
pyfunc <- import("mlflow.pyfunc")
pyfunc <- reticulate::import("mlflow.pyfunc")
py_model <- pyfunc$load_model(testthat_model_dir)

expected <- as.data.frame(h2o::h2o.predict(model, h2o::as.h2o(test)))
Expand All @@ -50,9 +50,9 @@ test_that("can load and predict with python pyfunct and h2o backend", {
as.data.frame(py_model$predict(test)), expected
)

mlflow.h2o <- import("mlflow.h2o")
mlflow.h2o <- reticulate::import("mlflow.h2o")
h2o_native_model <- mlflow.h2o$load_model(testthat_model_dir)
h2o <- import("h2o")
h2o <- reticulate::import("h2o")

expect_equivalent(
as.data.frame(
Expand Down
6 changes: 3 additions & 3 deletions mlflow/R/mlflow/tests/testthat/test-model-xgboost.R
Expand Up @@ -40,16 +40,16 @@ test_that("can load model and predict with rfunc backend", {
})

test_that("can load and predict with python pyfunct and xgboost backend", {
pyfunc <- import("mlflow.pyfunc")
pyfunc <- reticulate::import("mlflow.pyfunc")
py_model <- pyfunc$load_model(testthat_model_dir)
expect_equal(
as.numeric(py_model$predict(test$data)),
unname(predict(model, as.matrix(test$data)))
)

mlflow.xgboost <- import("mlflow.xgboost")
mlflow.xgboost <- reticulate::import("mlflow.xgboost")
xgboost_native_model <- mlflow.xgboost$load_model(testthat_model_dir)
xgboost <- import("xgboost")
xgboost <- reticulate::import("xgboost")

expect_equivalent(
as.numeric(xgboost_native_model$predict(xgboost$DMatrix(test$data))),
Expand Down

0 comments on commit dfa9fc7

Please sign in to comment.