From dfa9fc710238ff8f7662de923be409297169f0d0 Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Sun, 9 Jan 2022 23:16:46 +0900 Subject: [PATCH] Clean up CI job for R (#5232) * clean r job Signed-off-by: harupy * build package once Signed-off-by: harupy * fix named argument Signed-off-by: harupy * remove system dependencies Signed-off-by: harupy --- .github/workflows/master.yml | 83 ++++--------------- mlflow/R/mlflow/.build-package.R | 29 ++++++- mlflow/R/mlflow/.create-test-env.R | 10 +-- mlflow/R/mlflow/.run-tests.R | 24 +----- mlflow/R/mlflow/tests/testthat.R | 2 +- .../R/mlflow/tests/testthat/test-model-h2o.R | 6 +- .../tests/testthat/test-model-xgboost.R | 6 +- 7 files changed, 57 insertions(+), 103 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f0d6a309f4665..edb2449f016df 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -49,32 +49,21 @@ jobs: ./dev/lint.sh r: runs-on: ubuntu-latest + defaults: + run: + working-directory: mlflow/R/mlflow steps: - - name: Uninstall default-jdk and adoptopenjdk-11-hotspot if present - run: | - # deleting other version(s) of JDK because they are not needed and they might interfere with JNI linker configuration in the 'setup-r' step - sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot || : - - uses: actions/checkout@master + - uses: actions/checkout@v2 - uses: actions/setup-java@v2 with: java-version: 11 distribution: 'adopt' - - name: Re-configure dynamic linker run-time bindings for adoptopenjdk-8-hotspot-amd64 - run: | - sudo mkdir -p /etc/ld.so.conf.d - sudo bash -c "cat > /etc/ld.so.conf.d/jre.conf <<< '/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server'" - sudo ldconfig -v - uses: r-lib/actions/setup-r@v1 # This step dumps the current set of R dependencies and R version into files to be used # as a cache key when caching/restoring R dependencies. - - name: Query dependencies + - name: Dump dependencies run: | - print(R.version) - install.packages('remotes') - saveRDS(remotes::dev_package_deps("mlflow/R/mlflow", dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} - + Rscript -e 'source(".dump-r-dependencies.R", echo = TRUE)' - name: Get OS name id: os-name run: | @@ -87,62 +76,26 @@ jobs: path: ${{ env.R_LIBS_USER }} # We cache R dependencies based on a tuple of the current OS, the R version, and the list of # R dependencies - key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - - name: Install system dependencies - run: | - sudo apt-get install -y libcurl4-openssl-dev - sudo R CMD javareconf + key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('mlflow/R/mlflow/R-version') }}-1-${{ hashFiles('mlflow/R/mlflow/depends.Rds') }} + # - name: Install system dependencies + # run: | + # sudo apt-get update -y + # sudo apt-get install -y libcurl4-openssl-dev pandoc - name: Install dependencies - working-directory: mlflow/R/mlflow run: | - install.packages("devtools") - remotes::install_deps('.', dependencies = TRUE, upgrade = FALSE) - shell: Rscript {0} + Rscript -e 'source(".install-deps.R", echo=TRUE)' + - name: Build package + run: | + ./build-package.sh - name: Create test environment run: | - source ./dev/install-common-deps.sh - cd mlflow/R/mlflow - R CMD build . - cd tests - Rscript -e 'source("../.create-test-env.R", echo=TRUE)' + Rscript -e 'source(".create-test-env.R", echo=TRUE)' - name: Run tests env: - # Hack to get around this issue: - # https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html - # - # The system clock check during `R CMD check` relies on two external web APIs and fails - # when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it: - # https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511 - _R_CHECK_SYSTEM_CLOCK_: FALSE + LINTR_COMMENT_BOT: false run: | - export LINTR_COMMENT_BOT=false - cd mlflow/R/mlflow/tests - # `devtools::check_built` requires `pandoc` to analyze README.md - sudo apt-get install pandoc -y + cd tests Rscript -e 'source("../.run-tests.R", echo=TRUE)' - - name: Calculate code coverage - if: ${{ success() }} - run: | - export MLFLOW_HOME=$(pwd) - cd mlflow/R/mlflow/tests - Rscript -e 'covr::codecov()' || : - env: - COVR_RUNNING: true - - name: Test package build - working-directory: mlflow/R/mlflow - run: | - ./build-package.sh - - name: Show 00check.log on failure - if: ${{ failure() }} - run: | - LOG_FILE="${HOME}/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log" - [ -r "${LOG_FILE}" ] && cat "${LOG_FILE}" - cp "${LOG_FILE}" /tmp - - uses: actions/upload-artifact@v1 - if: failure() - with: - name: 00check.log - path: /tmp/00check.log # python-skinny tests cover a subset of mlflow functionality # that is meant to be supported with a smaller dependency footprint. diff --git a/mlflow/R/mlflow/.build-package.R b/mlflow/R/mlflow/.build-package.R index 2022e2abad9b7..acdaef5356192 100644 --- a/mlflow/R/mlflow/.build-package.R +++ b/mlflow/R/mlflow/.build-package.R @@ -1,11 +1,32 @@ source(".utils.R") -# Bundle up the package into a .tar.gz file. This file will be submitted to CRAN. +# Bundle up the package into a .tar.gz file. package_path <- devtools::build(".", path = ".") -# Run the submission check against the built package. + +# Hack to get around this issue: +# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html +# +# The system clock check during `R CMD check` relies on two external web APIs and fails +# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it: +# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511 +Sys.setenv("_R_CHECK_SYSTEM_CLOCK_" = "FALSE") + +# Run the check with `cran = TRUE` devtools::check_built( - path = normalizePath(package_path), + path = package_path, + cran = TRUE, remote = should_enable_cran_incoming_checks(), error_on = "note", - args = c("--no-tests", "--as-cran"), + check_dir = getwd(), + args = "--no-tests", +) + +# Run the check with `cran = FALSE` to detect unused imports: +# https://github.com/wch/r-source/blob/b12ffba7584825d6b11bba8b7dbad084a74c1c20/src/library/tools/R/check.R#L6070 +devtools::check_built( + path = package_path, + cran = FALSE, + error_on = "note", + check_dir = getwd(), + args = "--no-tests", ) diff --git a/mlflow/R/mlflow/.create-test-env.R b/mlflow/R/mlflow/.create-test-env.R index e2a10953c7048..b106a51d9b0f0 100644 --- a/mlflow/R/mlflow/.create-test-env.R +++ b/mlflow/R/mlflow/.create-test-env.R @@ -1,15 +1,15 @@ -parent_dir <- dir("../", full.names = TRUE) -package <- parent_dir[grepl("mlflow_", parent_dir)] +# Install MLflow for R +files <- dir(".", full.names = TRUE) +package <- files[grepl("mlflow_.+\\.tar\\.gz$", files)] install.packages(package) mlflow:::mlflow_maybe_create_conda_env(python_version = "3.7") -library(reticulate) -use_condaenv(mlflow:::mlflow_conda_env_name()) +# Install python dependencies +reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../.."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE) # pinning tensorflow version to 1.14 until test_keras_model.R is fixed keras::install_keras(method = "conda", envname = mlflow:::mlflow_conda_env_name(), tensorflow="1.15.2") # pinning h5py < 3.0.0 to avoid this issue: https://github.com/tensorflow/tensorflow/issues/44467 # TODO: unpin after we use tensorflow >= 2.4 reticulate::conda_install("'h5py<3.0.0'", envname = mlflow:::mlflow_conda_env_name(), pip = TRUE) -reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../../../."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE) reticulate::conda_install("xgboost", envname = mlflow:::mlflow_conda_env_name()) reticulate::conda_install(paste0("h2o==", packageVersion("h2o")), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE) diff --git a/mlflow/R/mlflow/.run-tests.R b/mlflow/R/mlflow/.run-tests.R index d55df992872eb..cf76cd93e3fd4 100644 --- a/mlflow/R/mlflow/.run-tests.R +++ b/mlflow/R/mlflow/.run-tests.R @@ -1,23 +1,3 @@ -source("../.utils.R") +reticulate::use_condaenv(mlflow:::mlflow_conda_env_name()) -parent_dir <- dir("../", full.names = TRUE) -package <- parent_dir[grepl("mlflow_", parent_dir)] - -library(reticulate) -use_condaenv(mlflow:::mlflow_conda_env_name()) - -devtools::check_built( - path = package, - cran = TRUE, - remote = should_enable_cran_incoming_checks(), - error_on = "note", - args = "--no-tests" -) -# This runs checks that are disabled when `cran` is TRUE (e.g. unused import check). -devtools::check_built( - path = package, - cran = FALSE, - error_on = "note", - args = "--no-tests" -) -source("testthat.R") +source("testthat.R", echo = TRUE) diff --git a/mlflow/R/mlflow/tests/testthat.R b/mlflow/R/mlflow/tests/testthat.R index bc3af0a0ab83b..e8c59b301aaf5 100644 --- a/mlflow/R/mlflow/tests/testthat.R +++ b/mlflow/R/mlflow/tests/testthat.R @@ -21,7 +21,7 @@ library(mlflow) if (identical(Sys.getenv("NOT_CRAN"), "true")) { message("Current working directory: ", getwd()) - mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../../.") + mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../..") message('MLFLOW_HOME: ', mlflow_home) test_check("mlflow") } diff --git a/mlflow/R/mlflow/tests/testthat/test-model-h2o.R b/mlflow/R/mlflow/tests/testthat/test-model-h2o.R index 916b5c4476979..3162517b5817c 100644 --- a/mlflow/R/mlflow/tests/testthat/test-model-h2o.R +++ b/mlflow/R/mlflow/tests/testthat/test-model-h2o.R @@ -40,7 +40,7 @@ test_that("can print model correctly after it is loaded", { }) test_that("can load and predict with python pyfunct and h2o backend", { - pyfunc <- import("mlflow.pyfunc") + pyfunc <- reticulate::import("mlflow.pyfunc") py_model <- pyfunc$load_model(testthat_model_dir) expected <- as.data.frame(h2o::h2o.predict(model, h2o::as.h2o(test))) @@ -50,9 +50,9 @@ test_that("can load and predict with python pyfunct and h2o backend", { as.data.frame(py_model$predict(test)), expected ) - mlflow.h2o <- import("mlflow.h2o") + mlflow.h2o <- reticulate::import("mlflow.h2o") h2o_native_model <- mlflow.h2o$load_model(testthat_model_dir) - h2o <- import("h2o") + h2o <- reticulate::import("h2o") expect_equivalent( as.data.frame( diff --git a/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R b/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R index de0e7f5e8dfb5..2f934cb124200 100644 --- a/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R +++ b/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R @@ -40,16 +40,16 @@ test_that("can load model and predict with rfunc backend", { }) test_that("can load and predict with python pyfunct and xgboost backend", { - pyfunc <- import("mlflow.pyfunc") + pyfunc <- reticulate::import("mlflow.pyfunc") py_model <- pyfunc$load_model(testthat_model_dir) expect_equal( as.numeric(py_model$predict(test$data)), unname(predict(model, as.matrix(test$data))) ) - mlflow.xgboost <- import("mlflow.xgboost") + mlflow.xgboost <- reticulate::import("mlflow.xgboost") xgboost_native_model <- mlflow.xgboost$load_model(testthat_model_dir) - xgboost <- import("xgboost") + xgboost <- reticulate::import("xgboost") expect_equivalent( as.numeric(xgboost_native_model$predict(xgboost$DMatrix(test$data))),