From f9b5745b68c815adb411453048317ee46ac52a31 Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Wed, 15 Dec 2021 14:56:16 +0900 Subject: [PATCH] Drop mleap for R (#5166) * Drop mleap R Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * Fix namespace Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> * apply diff Signed-off-by: harupy <17039389+harupy@users.noreply.github.com> --- docs/source/R-api.rst | 38 +++------ mlflow/R/mlflow/DESCRIPTION | 3 - mlflow/R/mlflow/NAMESPACE | 3 - mlflow/R/mlflow/R/model-mleap.R | 57 -------------- mlflow/R/mlflow/man/mlflow_save_model.Rd | 14 +--- .../mlflow/tests/testthat/test-model-mleap.R | 78 ------------------- 6 files changed, 10 insertions(+), 183 deletions(-) delete mode 100644 mlflow/R/mlflow/R/model-mleap.R delete mode 100644 mlflow/R/mlflow/tests/testthat/test-model-mleap.R diff --git a/docs/source/R-api.rst b/docs/source/R-api.rst index b664334e84b69..26c0b5cf7d7fb 100644 --- a/docs/source/R-api.rst +++ b/docs/source/R-api.rst @@ -1719,14 +1719,6 @@ model types. mlflow_save_model(model, path, model_spec = list(), ...) list(list("mlflow_save_model"), list("H2OModel"))(model, path, model_spec = list(), conda_env = NULL, ...) list(list("mlflow_save_model"), list("keras.engine.training.Model"))(model, path, model_spec = list(), conda_env = NULL, ...) - list(list("mlflow_save_model"), list("ml_pipeline_model"))( - model, - path, - model_spec = list(), - conda_env = NULL, - sample_input = NULL, - ... - ) list(list("mlflow_save_model"), list("xgb.Booster"))(model, path, model_spec = list(), conda_env = NULL, ...) .. _arguments-40: @@ -1734,27 +1726,15 @@ model types. Arguments --------- -+-------------------------------+--------------------------------------+ -| Argument | Description | -+===============================+======================================+ -| ``model`` | The model that will perform a | -| | prediction. | -+-------------------------------+--------------------------------------+ -| ``path`` | Destination path where this MLflow | -| | compatible model will be saved. | -+-------------------------------+--------------------------------------+ -| ``model_spec`` | MLflow model config this model | -| | flavor is being added to. | -+-------------------------------+--------------------------------------+ -| ``...`` | Optional additional arguments. | -+-------------------------------+--------------------------------------+ -| ``conda_env`` | Path to Conda dependencies file. | -+-------------------------------+--------------------------------------+ -| ``sample_input`` | Sample Spark DataFrame input that | -| | the model can evaluate. This is | -| | required by MLeap for data schema | -| | inference. | -+-------------------------------+--------------------------------------+ +============== ================================================================== +Argument Description +============== ================================================================== +``model`` The model that will perform a prediction. +``path`` Destination path where this MLflow compatible model will be saved. +``model_spec`` MLflow model config this model flavor is being added to. +``...`` Optional additional arguments. +``conda_env`` Path to Conda dependencies file. +============== ================================================================== ``mlflow_search_runs`` ====================== diff --git a/mlflow/R/mlflow/DESCRIPTION b/mlflow/R/mlflow/DESCRIPTION index 8d88cba9aada3..9738a6af993e9 100644 --- a/mlflow/R/mlflow/DESCRIPTION +++ b/mlflow/R/mlflow/DESCRIPTION @@ -55,12 +55,10 @@ Suggests: h2o, keras, lintr, - mleap, sparklyr, stringi (< 1.4.4), testthat (>= 2.0.0), xgboost -Additional_repositories: https://mlflow.github.io/drat Encoding: UTF-8 RoxygenNote: 7.1.2 Collate: @@ -77,7 +75,6 @@ Collate: 'model-utils.R' 'model-h2o.R' 'model-keras.R' - 'model-mleap.R' 'model-registry.R' 'model-serve.R' 'model-swagger.R' diff --git a/mlflow/R/mlflow/NAMESPACE b/mlflow/R/mlflow/NAMESPACE index dc6c48b130780..580a6e394378b 100644 --- a/mlflow/R/mlflow/NAMESPACE +++ b/mlflow/R/mlflow/NAMESPACE @@ -5,17 +5,14 @@ S3method(mlflow_id,mlflow_run) S3method(mlflow_load_flavor,mlflow_flavor_crate) S3method(mlflow_load_flavor,mlflow_flavor_h2o) S3method(mlflow_load_flavor,mlflow_flavor_keras) -S3method(mlflow_load_flavor,mlflow_flavor_mleap) S3method(mlflow_load_flavor,mlflow_flavor_xgboost) S3method(mlflow_predict,H2OModel) S3method(mlflow_predict,crate) S3method(mlflow_predict,keras.engine.training.Model) -S3method(mlflow_predict,mleap_transformer) S3method(mlflow_predict,xgb.Booster) S3method(mlflow_save_model,H2OModel) S3method(mlflow_save_model,crate) S3method(mlflow_save_model,keras.engine.training.Model) -S3method(mlflow_save_model,ml_pipeline_model) S3method(mlflow_save_model,xgb.Booster) S3method(mlflow_ui,"NULL") S3method(mlflow_ui,mlflow_client) diff --git a/mlflow/R/mlflow/R/model-mleap.R b/mlflow/R/mlflow/R/model-mleap.R deleted file mode 100644 index e47e32a424cb0..0000000000000 --- a/mlflow/R/mlflow/R/model-mleap.R +++ /dev/null @@ -1,57 +0,0 @@ -#' @include model-utils.R -NULL - -#' @rdname mlflow_save_model -#' @param sample_input Sample Spark DataFrame input that the model can evaluate. This is required by MLeap for data schema inference. -#' -#' @export -mlflow_save_model.ml_pipeline_model <- function(model, - path, - model_spec = list(), - conda_env = NULL, - sample_input = NULL, - ...) { - if (is.null(sample_input)) { - stop("`sample_input` is required by MLeap for data schema inference.") - } - - assert_pkg_installed("mleap") - - model_filename <- "model.zip" - - if (dir.exists(path)) unlink(path, recursive = TRUE) - dir.create(path) - model_path <- file.path(path, model_filename) - mleap::ml_write_bundle(model, sample_input = sample_input, path = model_path) - version <- mleap::mleap_installed_versions()$mleap %>% - purrr::map(~ numeric_version(.x)) %>% - purrr::reduce(~ (if (.x > .y) .x else .y)) %>% - as.character() - - conda_env <- create_default_conda_env_if_absent( - path, conda_env, default_pip_deps = list("mlflow", paste("mleap>=", version, sep = "")) - ) - mleap_conf <- list( - mleap = list(mleap_version = version, model_data = model_filename) - ) - model_spec$flavors <- append(model_spec$flavors, mleap_conf) - - - mlflow_write_model_spec(path, model_spec) -} - -#' @export -mlflow_load_flavor.mlflow_flavor_mleap <- function(flavor, model_path) { - assert_pkg_installed("mleap") - model_data <- attributes(flavor)$spec$model_data - if (is.null(model_data)) { - stop("'model_data' attribute is missing") - } - mleap::mleap_load_bundle(file.path(model_path, model_data)) -} - -#' @export -mlflow_predict.mleap_transformer <- function(model, data, ...) { - assert_pkg_installed("mleap") - mleap::mleap_transform(model, data, ...) -} diff --git a/mlflow/R/mlflow/man/mlflow_save_model.Rd b/mlflow/R/mlflow/man/mlflow_save_model.Rd index 68ad72b6879a7..bb70a4b09e292 100644 --- a/mlflow/R/mlflow/man/mlflow_save_model.Rd +++ b/mlflow/R/mlflow/man/mlflow_save_model.Rd @@ -1,12 +1,11 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/model-crate.R, R/model.R, R/model-h2o.R, -% R/model-keras.R, R/model-mleap.R, R/model-xgboost.R +% R/model-keras.R, R/model-xgboost.R \name{mlflow_save_model.crate} \alias{mlflow_save_model.crate} \alias{mlflow_save_model} \alias{mlflow_save_model.H2OModel} \alias{mlflow_save_model.keras.engine.training.Model} -\alias{mlflow_save_model.ml_pipeline_model} \alias{mlflow_save_model.xgb.Booster} \title{Save Model for MLflow} \usage{ @@ -18,15 +17,6 @@ mlflow_save_model(model, path, model_spec = list(), ...) \method{mlflow_save_model}{keras.engine.training.Model}(model, path, model_spec = list(), conda_env = NULL, ...) -\method{mlflow_save_model}{ml_pipeline_model}( - model, - path, - model_spec = list(), - conda_env = NULL, - sample_input = NULL, - ... -) - \method{mlflow_save_model}{xgb.Booster}(model, path, model_spec = list(), conda_env = NULL, ...) } \arguments{ @@ -40,8 +30,6 @@ will be saved.} \item{...}{Optional additional arguments.} \item{conda_env}{Path to Conda dependencies file.} - -\item{sample_input}{Sample Spark DataFrame input that the model can evaluate. This is required by MLeap for data schema inference.} } \description{ Saves model in MLflow format that can later be used for prediction and serving. This method is diff --git a/mlflow/R/mlflow/tests/testthat/test-model-mleap.R b/mlflow/R/mlflow/tests/testthat/test-model-mleap.R deleted file mode 100644 index 9e988cbf55614..0000000000000 --- a/mlflow/R/mlflow/tests/testthat/test-model-mleap.R +++ /dev/null @@ -1,78 +0,0 @@ -context("Model mleap") - -library(mleap) - -for (i in 0:4){ - tryCatch( - expr = { - config <- sparklyr::spark_config() - config$sparklyr.gateway.port <- httpuv::randomPort() - sc <- sparklyr::spark_connect(master = "local", version = "2.4.5", config=config) - }, - error = function(e){ - if (i == 4){ - stop("Exhausted retries in getting SparkContext. Aborting.") - } - message("An error occured while getting a SparkContext:") - print(e) - sleep_duration = (2 * i) + 1 - message(sprintf("\nSleeping for %s seconds and retrying...", sleep_duration)) - Sys.sleep(sleep_duration) - }, - warning = function(w){ - message("A warning occurred:") - print(w) - } - ) -} - -testthat_model_dir <- basename(tempfile("model_")) - -teardown({ - sparklyr::spark_disconnect(sc) - mlflow_clear_test_dir(testthat_model_dir) -}) - -mtcars_sdf <- sparklyr::copy_to(sc, mtcars, overwrite = TRUE) - -pipeline <- sparklyr::ml_pipeline(sc) %>% - sparklyr::ft_binarizer("hp", "high_hp", threshold = 100) %>% - sparklyr::ft_vector_assembler(c("high_hp", "wt", "qsec"), "features") %>% - sparklyr::ml_gbt_regressor(label_col = "mpg") - -model <- sparklyr::ml_fit(pipeline, mtcars_sdf) - -test_that("mlflow can save model", { - mlflow_save_model(model, testthat_model_dir, sample_input = mtcars_sdf) - expect_true(dir.exists(testthat_model_dir)) -}) - -test_that("can load model and predict with `mlflow_predict()`", { - mleap_transformer <- mlflow_load_model(testthat_model_dir) - input <- mtcars[c("qsec", "hp", "wt")] - predictions <- mlflow_predict(mleap_transformer, input) - - expect_equal(nrow(predictions), nrow(mtcars)) - expect_true("high_hp" %in% colnames(predictions)) - expect_true("prediction" %in% colnames(predictions)) - # at the moment a GBT model can fit `mtcars` perfectly because it simply - # "memorized" the dataset - expect_equal(ifelse(mtcars$hp >= 100, 1, 0), predictions$high_hp) -}) - -test_that("can load model created by MLflow Java client and predict with `mlflow_predict()`", { - model_dir <- file.path( - "..", "..", "..", "..", "java", "scoring", "src", "test", "resources", "org", "mlflow", "mleap_model" - ) - model <- mlflow_load_model(model_dir) - - input <- jsonlite::fromJSON(file.path(model_dir, "sample_input.json")) - data <- as.data.frame(input$data) - colnames(data) <- input$columns - predictions <- mlflow_predict(model, data) - - expect_equal( - colnames(predictions), - c("text", "topic", "label", "words", "features", "rawPrediction", "probability", "prediction") - ) -})