From dfa9fc710238ff8f7662de923be409297169f0d0 Mon Sep 17 00:00:00 2001
From: Harutaka Kawamura <hkawamura0130@gmail.com>
Date: Sun, 9 Jan 2022 23:16:46 +0900
Subject: [PATCH] Clean up CI job for R (#5232)

* clean r job

Signed-off-by: harupy <hkawamura0130@gmail.com>

* build package once

Signed-off-by: harupy <hkawamura0130@gmail.com>

* fix named argument

Signed-off-by: harupy <hkawamura0130@gmail.com>

* remove system dependencies

Signed-off-by: harupy <hkawamura0130@gmail.com>
---
 .github/workflows/master.yml                  | 83 ++++---------------
 mlflow/R/mlflow/.build-package.R              | 29 ++++++-
 mlflow/R/mlflow/.create-test-env.R            | 10 +--
 mlflow/R/mlflow/.run-tests.R                  | 24 +-----
 mlflow/R/mlflow/tests/testthat.R              |  2 +-
 .../R/mlflow/tests/testthat/test-model-h2o.R  |  6 +-
 .../tests/testthat/test-model-xgboost.R       |  6 +-
 7 files changed, 57 insertions(+), 103 deletions(-)

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index f0d6a309f4665..edb2449f016df 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -49,32 +49,21 @@ jobs:
         ./dev/lint.sh
   r:
     runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: mlflow/R/mlflow
     steps:
-    - name: Uninstall default-jdk and adoptopenjdk-11-hotspot if present
-      run: |
-        # deleting other version(s) of JDK because they are not needed and they might interfere with JNI linker configuration in the 'setup-r' step
-        sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot || :
-    - uses: actions/checkout@master
+    - uses: actions/checkout@v2
     - uses: actions/setup-java@v2
       with:
         java-version: 11
         distribution: 'adopt'
-    - name: Re-configure dynamic linker run-time bindings for adoptopenjdk-8-hotspot-amd64
-      run: |
-        sudo mkdir -p /etc/ld.so.conf.d
-        sudo bash -c "cat > /etc/ld.so.conf.d/jre.conf <<< '/usr/lib/jvm/adoptopenjdk-8-hotspot-amd64/jre/lib/amd64/server'"
-        sudo ldconfig -v
     - uses: r-lib/actions/setup-r@v1
     # This step dumps the current set of R dependencies and R version into files to be used
     # as a cache key when caching/restoring R dependencies.
-    - name: Query dependencies
+    - name: Dump dependencies
       run: |
-        print(R.version)
-        install.packages('remotes')
-        saveRDS(remotes::dev_package_deps("mlflow/R/mlflow", dependencies = TRUE), ".github/depends.Rds", version = 2)
-        writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
-      shell: Rscript {0}
-
+        Rscript -e 'source(".dump-r-dependencies.R", echo = TRUE)'
     - name: Get OS name
       id: os-name
       run: |
@@ -87,62 +76,26 @@ jobs:
         path: ${{ env.R_LIBS_USER }}
         # We cache R dependencies based on a tuple of the current OS, the R version, and the list of
         # R dependencies
-        key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
-    - name: Install system dependencies
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-        sudo R CMD javareconf
+        key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('mlflow/R/mlflow/R-version') }}-1-${{ hashFiles('mlflow/R/mlflow/depends.Rds') }}
+    # - name: Install system dependencies
+    #   run: |
+    #     sudo apt-get update -y
+    #     sudo apt-get install -y libcurl4-openssl-dev pandoc
     - name: Install dependencies
-      working-directory: mlflow/R/mlflow
       run: |
-        install.packages("devtools")
-        remotes::install_deps('.', dependencies = TRUE, upgrade = FALSE)
-      shell: Rscript {0}
+        Rscript -e 'source(".install-deps.R", echo=TRUE)'
+    - name: Build package
+      run: |
+        ./build-package.sh
     - name: Create test environment
       run: |
-        source ./dev/install-common-deps.sh
-        cd mlflow/R/mlflow
-        R CMD build .
-        cd tests
-        Rscript -e 'source("../.create-test-env.R", echo=TRUE)'
+        Rscript -e 'source(".create-test-env.R", echo=TRUE)'
     - name: Run tests
       env:
-        # Hack to get around this issue:
-        # https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
-        #
-        # The system clock check during `R CMD check` relies on two external web APIs and fails
-        # when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
-        # https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
-        _R_CHECK_SYSTEM_CLOCK_: FALSE
+        LINTR_COMMENT_BOT: false
       run: |
-        export LINTR_COMMENT_BOT=false
-        cd mlflow/R/mlflow/tests
-        # `devtools::check_built` requires `pandoc` to analyze README.md
-        sudo apt-get install pandoc -y
+        cd tests
         Rscript -e 'source("../.run-tests.R", echo=TRUE)'
-    - name: Calculate code coverage
-      if: ${{ success() }}
-      run: |
-        export MLFLOW_HOME=$(pwd)
-        cd mlflow/R/mlflow/tests
-        Rscript -e 'covr::codecov()' || :
-      env:
-        COVR_RUNNING: true
-    - name: Test package build
-      working-directory: mlflow/R/mlflow
-      run: |
-        ./build-package.sh
-    - name: Show 00check.log on failure
-      if: ${{ failure() }}
-      run: |
-        LOG_FILE="${HOME}/build/mlflow/mlflow/mlflow/R/mlflow/mlflow.Rcheck/00check.log"
-        [ -r "${LOG_FILE}" ] && cat "${LOG_FILE}"
-        cp "${LOG_FILE}" /tmp
-    - uses: actions/upload-artifact@v1
-      if: failure()
-      with:
-        name: 00check.log
-        path: /tmp/00check.log
 
   # python-skinny tests cover a subset of mlflow functionality
   # that is meant to be supported with a smaller dependency footprint.
diff --git a/mlflow/R/mlflow/.build-package.R b/mlflow/R/mlflow/.build-package.R
index 2022e2abad9b7..acdaef5356192 100644
--- a/mlflow/R/mlflow/.build-package.R
+++ b/mlflow/R/mlflow/.build-package.R
@@ -1,11 +1,32 @@
 source(".utils.R")
 
-# Bundle up the package into a .tar.gz file. This file will be submitted to CRAN.
+# Bundle up the package into a .tar.gz file.
 package_path <- devtools::build(".", path = ".")
-# Run the submission check against the built package.
+
+# Hack to get around this issue:
+# https://stat.ethz.ch/pipermail/r-package-devel/2020q3/005930.html
+#
+# The system clock check during `R CMD check` relies on two external web APIs and fails
+# when they are unavailable. By setting `_R_CHECK_SYSTEM_CLOCK_` to FALSE, we can skip it:
+# https://github.com/wch/r-source/blob/59a1965239143ca6242b9cc948d8834e1194e84a/src/library/tools/R/check.R#L511
+Sys.setenv("_R_CHECK_SYSTEM_CLOCK_" = "FALSE")
+
+# Run the check with `cran = TRUE`
 devtools::check_built(
-    path = normalizePath(package_path),
+    path = package_path,
+    cran = TRUE,
     remote = should_enable_cran_incoming_checks(),
     error_on = "note",
-    args = c("--no-tests", "--as-cran"),
+    check_dir = getwd(),
+    args = "--no-tests",
+)
+
+# Run the check with `cran = FALSE` to detect unused imports:
+# https://github.com/wch/r-source/blob/b12ffba7584825d6b11bba8b7dbad084a74c1c20/src/library/tools/R/check.R#L6070
+devtools::check_built(
+    path = package_path,
+    cran = FALSE,
+    error_on = "note",
+    check_dir = getwd(),
+    args = "--no-tests",
 )
diff --git a/mlflow/R/mlflow/.create-test-env.R b/mlflow/R/mlflow/.create-test-env.R
index e2a10953c7048..b106a51d9b0f0 100644
--- a/mlflow/R/mlflow/.create-test-env.R
+++ b/mlflow/R/mlflow/.create-test-env.R
@@ -1,15 +1,15 @@
-parent_dir <- dir("../", full.names = TRUE)
-package <- parent_dir[grepl("mlflow_", parent_dir)]
+# Install MLflow for R
+files <- dir(".", full.names = TRUE)
+package <- files[grepl("mlflow_.+\\.tar\\.gz$", files)]
 install.packages(package)
 
 mlflow:::mlflow_maybe_create_conda_env(python_version = "3.7")
-library(reticulate)
-use_condaenv(mlflow:::mlflow_conda_env_name())
+# Install python dependencies
+reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../.."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
 # pinning tensorflow version to 1.14 until test_keras_model.R is fixed
 keras::install_keras(method = "conda", envname = mlflow:::mlflow_conda_env_name(), tensorflow="1.15.2")
 # pinning h5py < 3.0.0 to avoid this issue:  https://github.com/tensorflow/tensorflow/issues/44467
 # TODO: unpin after we use tensorflow >= 2.4
 reticulate::conda_install("'h5py<3.0.0'", envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
-reticulate::conda_install(Sys.getenv("MLFLOW_HOME", "../../../../."), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
 reticulate::conda_install("xgboost", envname = mlflow:::mlflow_conda_env_name())
 reticulate::conda_install(paste0("h2o==", packageVersion("h2o")), envname = mlflow:::mlflow_conda_env_name(), pip = TRUE)
diff --git a/mlflow/R/mlflow/.run-tests.R b/mlflow/R/mlflow/.run-tests.R
index d55df992872eb..cf76cd93e3fd4 100644
--- a/mlflow/R/mlflow/.run-tests.R
+++ b/mlflow/R/mlflow/.run-tests.R
@@ -1,23 +1,3 @@
-source("../.utils.R")
+reticulate::use_condaenv(mlflow:::mlflow_conda_env_name())
 
-parent_dir <- dir("../", full.names = TRUE)
-package <- parent_dir[grepl("mlflow_", parent_dir)]
-
-library(reticulate)
-use_condaenv(mlflow:::mlflow_conda_env_name())
-
-devtools::check_built(
-    path = package,
-    cran = TRUE,
-    remote = should_enable_cran_incoming_checks(),
-    error_on = "note",
-    args = "--no-tests"
-)
-# This runs checks that are disabled when `cran` is TRUE (e.g. unused import check).
-devtools::check_built(
-    path = package,
-    cran = FALSE,
-    error_on = "note",
-    args = "--no-tests"
-)
-source("testthat.R")
+source("testthat.R", echo = TRUE)
diff --git a/mlflow/R/mlflow/tests/testthat.R b/mlflow/R/mlflow/tests/testthat.R
index bc3af0a0ab83b..e8c59b301aaf5 100644
--- a/mlflow/R/mlflow/tests/testthat.R
+++ b/mlflow/R/mlflow/tests/testthat.R
@@ -21,7 +21,7 @@ library(mlflow)
 
 if (identical(Sys.getenv("NOT_CRAN"), "true")) {
   message("Current working directory: ", getwd())
-  mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../../.")
+  mlflow_home <- Sys.getenv("MLFLOW_HOME", "../../../..")
   message('MLFLOW_HOME: ', mlflow_home)
   test_check("mlflow")
 }
diff --git a/mlflow/R/mlflow/tests/testthat/test-model-h2o.R b/mlflow/R/mlflow/tests/testthat/test-model-h2o.R
index 916b5c4476979..3162517b5817c 100644
--- a/mlflow/R/mlflow/tests/testthat/test-model-h2o.R
+++ b/mlflow/R/mlflow/tests/testthat/test-model-h2o.R
@@ -40,7 +40,7 @@ test_that("can print model correctly after it is loaded", {
 })
 
 test_that("can load and predict with python pyfunct and h2o backend", {
-  pyfunc <- import("mlflow.pyfunc")
+  pyfunc <- reticulate::import("mlflow.pyfunc")
   py_model <- pyfunc$load_model(testthat_model_dir)
 
   expected <- as.data.frame(h2o::h2o.predict(model, h2o::as.h2o(test)))
@@ -50,9 +50,9 @@ test_that("can load and predict with python pyfunct and h2o backend", {
     as.data.frame(py_model$predict(test)), expected
   )
 
-  mlflow.h2o <- import("mlflow.h2o")
+  mlflow.h2o <- reticulate::import("mlflow.h2o")
   h2o_native_model <- mlflow.h2o$load_model(testthat_model_dir)
-  h2o <- import("h2o")
+  h2o <- reticulate::import("h2o")
 
   expect_equivalent(
     as.data.frame(
diff --git a/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R b/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R
index de0e7f5e8dfb5..2f934cb124200 100644
--- a/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R
+++ b/mlflow/R/mlflow/tests/testthat/test-model-xgboost.R
@@ -40,16 +40,16 @@ test_that("can load model and predict with rfunc backend", {
 })
 
 test_that("can load and predict with python pyfunct and xgboost backend", {
-  pyfunc <- import("mlflow.pyfunc")
+  pyfunc <- reticulate::import("mlflow.pyfunc")
   py_model <- pyfunc$load_model(testthat_model_dir)
   expect_equal(
     as.numeric(py_model$predict(test$data)),
     unname(predict(model, as.matrix(test$data)))
   )
 
-  mlflow.xgboost <- import("mlflow.xgboost")
+  mlflow.xgboost <- reticulate::import("mlflow.xgboost")
   xgboost_native_model <- mlflow.xgboost$load_model(testthat_model_dir)
-  xgboost <- import("xgboost")
+  xgboost <- reticulate::import("xgboost")
 
   expect_equivalent(
     as.numeric(xgboost_native_model$predict(xgboost$DMatrix(test$data))),