From f9b5745b68c815adb411453048317ee46ac52a31 Mon Sep 17 00:00:00 2001
From: Harutaka Kawamura <hkawamura0130@gmail.com>
Date: Wed, 15 Dec 2021 14:56:16 +0900
Subject: [PATCH] Drop mleap for R (#5166)

* Drop mleap R

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* Fix namespace

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>

* apply diff

Signed-off-by: harupy <17039389+harupy@users.noreply.github.com>
---
 docs/source/R-api.rst                         | 38 +++------
 mlflow/R/mlflow/DESCRIPTION                   |  3 -
 mlflow/R/mlflow/NAMESPACE                     |  3 -
 mlflow/R/mlflow/R/model-mleap.R               | 57 --------------
 mlflow/R/mlflow/man/mlflow_save_model.Rd      | 14 +---
 .../mlflow/tests/testthat/test-model-mleap.R  | 78 -------------------
 6 files changed, 10 insertions(+), 183 deletions(-)
 delete mode 100644 mlflow/R/mlflow/R/model-mleap.R
 delete mode 100644 mlflow/R/mlflow/tests/testthat/test-model-mleap.R

diff --git a/docs/source/R-api.rst b/docs/source/R-api.rst
index b664334e84b69..26c0b5cf7d7fb 100644
--- a/docs/source/R-api.rst
+++ b/docs/source/R-api.rst
@@ -1719,14 +1719,6 @@ model types.
    mlflow_save_model(model, path, model_spec = list(), ...)
    list(list("mlflow_save_model"), list("H2OModel"))(model, path, model_spec = list(), conda_env = NULL, ...)
    list(list("mlflow_save_model"), list("keras.engine.training.Model"))(model, path, model_spec = list(), conda_env = NULL, ...)
-   list(list("mlflow_save_model"), list("ml_pipeline_model"))(
-     model,
-     path,
-     model_spec = list(),
-     conda_env = NULL,
-     sample_input = NULL,
-     ...
-   )
    list(list("mlflow_save_model"), list("xgb.Booster"))(model, path, model_spec = list(), conda_env = NULL, ...)
 
 .. _arguments-40:
@@ -1734,27 +1726,15 @@ model types.
 Arguments
 ---------
 
-+-------------------------------+--------------------------------------+
-| Argument                      | Description                          |
-+===============================+======================================+
-| ``model``                     | The model that will perform a        |
-|                               | prediction.                          |
-+-------------------------------+--------------------------------------+
-| ``path``                      | Destination path where this MLflow   |
-|                               | compatible model will be saved.      |
-+-------------------------------+--------------------------------------+
-| ``model_spec``                | MLflow model config this model       |
-|                               | flavor is being added to.            |
-+-------------------------------+--------------------------------------+
-| ``...``                       | Optional additional arguments.       |
-+-------------------------------+--------------------------------------+
-| ``conda_env``                 | Path to Conda dependencies file.     |
-+-------------------------------+--------------------------------------+
-| ``sample_input``              | Sample Spark DataFrame input that    |
-|                               | the model can evaluate. This is      |
-|                               | required by MLeap for data schema    |
-|                               | inference.                           |
-+-------------------------------+--------------------------------------+
+============== ==================================================================
+Argument       Description
+============== ==================================================================
+``model``      The model that will perform a prediction.
+``path``       Destination path where this MLflow compatible model will be saved.
+``model_spec`` MLflow model config this model flavor is being added to.
+``...``        Optional additional arguments.
+``conda_env``  Path to Conda dependencies file.
+============== ==================================================================
 
 ``mlflow_search_runs``
 ======================
diff --git a/mlflow/R/mlflow/DESCRIPTION b/mlflow/R/mlflow/DESCRIPTION
index 8d88cba9aada3..9738a6af993e9 100644
--- a/mlflow/R/mlflow/DESCRIPTION
+++ b/mlflow/R/mlflow/DESCRIPTION
@@ -55,12 +55,10 @@ Suggests:
     h2o,
     keras,
     lintr,
-    mleap,
     sparklyr,
     stringi (< 1.4.4),
     testthat (>= 2.0.0),
     xgboost
-Additional_repositories: https://mlflow.github.io/drat
 Encoding: UTF-8
 RoxygenNote: 7.1.2
 Collate:
@@ -77,7 +75,6 @@ Collate:
     'model-utils.R'
     'model-h2o.R'
     'model-keras.R'
-    'model-mleap.R'
     'model-registry.R'
     'model-serve.R'
     'model-swagger.R'
diff --git a/mlflow/R/mlflow/NAMESPACE b/mlflow/R/mlflow/NAMESPACE
index dc6c48b130780..580a6e394378b 100644
--- a/mlflow/R/mlflow/NAMESPACE
+++ b/mlflow/R/mlflow/NAMESPACE
@@ -5,17 +5,14 @@ S3method(mlflow_id,mlflow_run)
 S3method(mlflow_load_flavor,mlflow_flavor_crate)
 S3method(mlflow_load_flavor,mlflow_flavor_h2o)
 S3method(mlflow_load_flavor,mlflow_flavor_keras)
-S3method(mlflow_load_flavor,mlflow_flavor_mleap)
 S3method(mlflow_load_flavor,mlflow_flavor_xgboost)
 S3method(mlflow_predict,H2OModel)
 S3method(mlflow_predict,crate)
 S3method(mlflow_predict,keras.engine.training.Model)
-S3method(mlflow_predict,mleap_transformer)
 S3method(mlflow_predict,xgb.Booster)
 S3method(mlflow_save_model,H2OModel)
 S3method(mlflow_save_model,crate)
 S3method(mlflow_save_model,keras.engine.training.Model)
-S3method(mlflow_save_model,ml_pipeline_model)
 S3method(mlflow_save_model,xgb.Booster)
 S3method(mlflow_ui,"NULL")
 S3method(mlflow_ui,mlflow_client)
diff --git a/mlflow/R/mlflow/R/model-mleap.R b/mlflow/R/mlflow/R/model-mleap.R
deleted file mode 100644
index e47e32a424cb0..0000000000000
--- a/mlflow/R/mlflow/R/model-mleap.R
+++ /dev/null
@@ -1,57 +0,0 @@
-#' @include model-utils.R
-NULL
-
-#' @rdname mlflow_save_model
-#' @param sample_input Sample Spark DataFrame input that the model can evaluate. This is required by MLeap for data schema inference.
-#'
-#' @export
-mlflow_save_model.ml_pipeline_model <- function(model,
-                                                path,
-                                                model_spec = list(),
-                                                conda_env = NULL,
-                                                sample_input = NULL,
-                                                ...) {
-  if (is.null(sample_input)) {
-    stop("`sample_input` is required by MLeap for data schema inference.")
-  }
-
-  assert_pkg_installed("mleap")
-
-  model_filename <- "model.zip"
-
-  if (dir.exists(path)) unlink(path, recursive = TRUE)
-  dir.create(path)
-  model_path <- file.path(path, model_filename)
-  mleap::ml_write_bundle(model, sample_input = sample_input, path = model_path)
-  version <- mleap::mleap_installed_versions()$mleap %>%
-    purrr::map(~ numeric_version(.x)) %>%
-    purrr::reduce(~ (if (.x > .y) .x else .y)) %>%
-    as.character()
-
-  conda_env <- create_default_conda_env_if_absent(
-    path, conda_env, default_pip_deps = list("mlflow", paste("mleap>=", version, sep = ""))
-  )
-  mleap_conf <- list(
-    mleap = list(mleap_version = version, model_data = model_filename)
-  )
-  model_spec$flavors <- append(model_spec$flavors, mleap_conf)
-
-
-  mlflow_write_model_spec(path, model_spec)
-}
-
-#' @export
-mlflow_load_flavor.mlflow_flavor_mleap <- function(flavor, model_path) {
-  assert_pkg_installed("mleap")
-  model_data <- attributes(flavor)$spec$model_data
-  if (is.null(model_data)) {
-    stop("'model_data' attribute is missing")
-  }
-  mleap::mleap_load_bundle(file.path(model_path, model_data))
-}
-
-#' @export
-mlflow_predict.mleap_transformer <- function(model, data, ...) {
-  assert_pkg_installed("mleap")
-  mleap::mleap_transform(model, data, ...)
-}
diff --git a/mlflow/R/mlflow/man/mlflow_save_model.Rd b/mlflow/R/mlflow/man/mlflow_save_model.Rd
index 68ad72b6879a7..bb70a4b09e292 100644
--- a/mlflow/R/mlflow/man/mlflow_save_model.Rd
+++ b/mlflow/R/mlflow/man/mlflow_save_model.Rd
@@ -1,12 +1,11 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/model-crate.R, R/model.R, R/model-h2o.R,
-%   R/model-keras.R, R/model-mleap.R, R/model-xgboost.R
+%   R/model-keras.R, R/model-xgboost.R
 \name{mlflow_save_model.crate}
 \alias{mlflow_save_model.crate}
 \alias{mlflow_save_model}
 \alias{mlflow_save_model.H2OModel}
 \alias{mlflow_save_model.keras.engine.training.Model}
-\alias{mlflow_save_model.ml_pipeline_model}
 \alias{mlflow_save_model.xgb.Booster}
 \title{Save Model for MLflow}
 \usage{
@@ -18,15 +17,6 @@ mlflow_save_model(model, path, model_spec = list(), ...)
 
 \method{mlflow_save_model}{keras.engine.training.Model}(model, path, model_spec = list(), conda_env = NULL, ...)
 
-\method{mlflow_save_model}{ml_pipeline_model}(
-  model,
-  path,
-  model_spec = list(),
-  conda_env = NULL,
-  sample_input = NULL,
-  ...
-)
-
 \method{mlflow_save_model}{xgb.Booster}(model, path, model_spec = list(), conda_env = NULL, ...)
 }
 \arguments{
@@ -40,8 +30,6 @@ will be saved.}
 \item{...}{Optional additional arguments.}
 
 \item{conda_env}{Path to Conda dependencies file.}
-
-\item{sample_input}{Sample Spark DataFrame input that the model can evaluate. This is required by MLeap for data schema inference.}
 }
 \description{
 Saves model in MLflow format that can later be used for prediction and serving. This method is
diff --git a/mlflow/R/mlflow/tests/testthat/test-model-mleap.R b/mlflow/R/mlflow/tests/testthat/test-model-mleap.R
deleted file mode 100644
index 9e988cbf55614..0000000000000
--- a/mlflow/R/mlflow/tests/testthat/test-model-mleap.R
+++ /dev/null
@@ -1,78 +0,0 @@
-context("Model mleap")
-
-library(mleap)
-
-for (i in 0:4){
-    tryCatch(
-        expr = {
-            config <- sparklyr::spark_config()
-            config$sparklyr.gateway.port <- httpuv::randomPort()
-            sc <- sparklyr::spark_connect(master = "local", version = "2.4.5", config=config)
-        },
-        error = function(e){
-            if (i == 4){
-                stop("Exhausted retries in getting SparkContext. Aborting.")
-            }
-            message("An error occured while getting a SparkContext:")
-            print(e)
-            sleep_duration = (2 * i) + 1
-            message(sprintf("\nSleeping for %s seconds and retrying...", sleep_duration))
-            Sys.sleep(sleep_duration)
-        },
-        warning = function(w){
-            message("A warning occurred:")
-            print(w)
-        }
-    )
-}
-
-testthat_model_dir <- basename(tempfile("model_"))
-
-teardown({
-  sparklyr::spark_disconnect(sc)
-  mlflow_clear_test_dir(testthat_model_dir)
-})
-
-mtcars_sdf <- sparklyr::copy_to(sc, mtcars, overwrite = TRUE)
-
-pipeline <- sparklyr::ml_pipeline(sc) %>%
-  sparklyr::ft_binarizer("hp", "high_hp", threshold = 100) %>%
-  sparklyr::ft_vector_assembler(c("high_hp", "wt", "qsec"), "features") %>%
-  sparklyr::ml_gbt_regressor(label_col = "mpg")
-
-model <- sparklyr::ml_fit(pipeline, mtcars_sdf)
-
-test_that("mlflow can save model", {
-  mlflow_save_model(model, testthat_model_dir, sample_input = mtcars_sdf)
-  expect_true(dir.exists(testthat_model_dir))
-})
-
-test_that("can load model and predict with `mlflow_predict()`", {
-  mleap_transformer <- mlflow_load_model(testthat_model_dir)
-  input <- mtcars[c("qsec", "hp", "wt")]
-  predictions <- mlflow_predict(mleap_transformer, input)
-
-  expect_equal(nrow(predictions), nrow(mtcars))
-  expect_true("high_hp" %in% colnames(predictions))
-  expect_true("prediction" %in% colnames(predictions))
-  # at the moment a GBT model can fit `mtcars` perfectly because it simply
-  # "memorized" the dataset
-  expect_equal(ifelse(mtcars$hp >= 100, 1, 0), predictions$high_hp)
-})
-
-test_that("can load model created by MLflow Java client and predict with `mlflow_predict()`", {
-  model_dir <- file.path(
-    "..", "..", "..", "..", "java", "scoring", "src", "test", "resources", "org", "mlflow", "mleap_model"
-  )
-  model <- mlflow_load_model(model_dir)
-
-  input <- jsonlite::fromJSON(file.path(model_dir, "sample_input.json"))
-  data <- as.data.frame(input$data)
-  colnames(data) <- input$columns
-  predictions <- mlflow_predict(model, data)
-
-  expect_equal(
-    colnames(predictions),
-    c("text", "topic", "label", "words", "features", "rawPrediction", "probability", "prediction")
-  )
-})