resolve conflicts

Signed-off-by: harupy <hkawamura0130@gmail.com>
mlflow · Jan 5, 2022 · 97f56d2 · 97f56d2
2 parents e2f8e0d + 2d8cb0c
commit 97f56d2
Show file tree

Hide file tree

Showing 7 changed files with 137 additions and 13 deletions.
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
@@ -83,13 +83,13 @@ jobs:
         # `os_name` will be like "Ubuntu-20.04.1-LTS"
         os_name=$(lsb_release -ds | sed 's/\s/-/g')
         echo "::set-output name=os-name::$os_name"
-    # - name: Cache R packages
-    #   uses: actions/cache@v2
-    #   with:
-    #     path: ${{ env.R_LIBS_USER }}
-    #     # We cache R dependencies based on a tuple of the current OS, the R version, and the list of
-    #     # R dependencies
-    #     key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
+    - name: Cache R packages
+      uses: actions/cache@v2
+      with:
+        path: ${{ env.R_LIBS_USER }}
+        # We cache R dependencies based on a tuple of the current OS, the R version, and the list of
+        # R dependencies
+        key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
     - name: Install system dependencies
       run: |
         sudo apt-get install -y libcurl4-openssl-dev

diff --git a/mlflow/R/mlflow/.Rprofile b/mlflow/R/mlflow/.Rprofile
@@ -1,3 +1,3 @@
 # https://packagemanager.rstudio.com provides pre-compiled binary packages for Linux
 # that can be installed significantly faster.
-options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/all/latest"))
+options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"))
diff --git a/mlflow/models/model.py b/mlflow/models/model.py
@@ -68,6 +68,24 @@ def get_input_schema(self):
     def get_output_schema(self):
         return self.signature.outputs if self.signature is not None else None
 
+    def load_input_example(self, path: str):
+        """
+        Load the input example saved along a model. Returns None if there is no example metadata
+        (i.e. the model was saved without example). Raises FileNotFoundError if there is model
+        metadata but the example file is missing.
+
+        :param path: Path to the model directory.
+
+        :return: Input example (NumPy ndarray, SciPy csc_matrix, SciPy csr_matrix,
+                 pandas DataFrame, dict) or None if the model has no example.
+        """
+
+        # Just-in-time import to only load example-parsing libraries (e.g. numpy, pandas, etc.) if
+        # example is requested.
+        from mlflow.models.utils import _read_example
+
+        return _read_example(self, path)
+
     def add_flavor(self, name, **params):
         """Add an entry for how to serve the model in a given format."""
         self.flavors[name] = params

diff --git a/mlflow/models/utils.py b/mlflow/models/utils.py
@@ -192,8 +192,8 @@ def _save_example(mlflow_model: Model, input_example: ModelInputExample, path: s
 def _read_example(mlflow_model: Model, path: str):
     """
     Read example from a model directory. Returns None if there is no example metadata (i.e. the
-    model was saved without example). Raises IO Exception if there is model metadata but the example
-    file is missing.
+    model was saved without example). Raises FileNotFoundError if there is model metadata but the
+    example file is missing.
 
     :param mlflow_model: Model metadata.
     :param path: Path to the model directory.

diff --git a/mlflow/store/artifact/databricks_models_artifact_repo.py b/mlflow/store/artifact/databricks_models_artifact_repo.py
@@ -71,8 +71,9 @@ def list_artifacts(self, path=None):
             json_body = self._make_json_body(path, page_token)
             response = self._call_endpoint(json_body, REGISTRY_LIST_ARTIFACTS_ENDPOINT)
             try:
+                response.raise_for_status()
                 json_response = json.loads(response.text)
-            except ValueError:
+            except Exception:
                 raise MlflowException(
                     "API request to list files under path `%s` failed with status code %s. "
                     "Response body: %s" % (path, response.status_code, response.text)

diff --git a/tests/models/test_model.py b/tests/models/test_model.py
@@ -1,4 +1,5 @@
 import os
+import pytest
 from datetime import date
 
 import mlflow
@@ -9,11 +10,12 @@
 from mlflow.models import Model
 from mlflow.models.signature import ModelSignature
 from mlflow.models.utils import _save_example
-from mlflow.types.schema import Schema, ColSpec
+from mlflow.types.schema import Schema, ColSpec, TensorSpec
 from mlflow.utils.file_utils import TempDir
 from mlflow.utils.proto_json_utils import _dataframe_from_json
 
 from unittest import mock
+from scipy.sparse import csc_matrix
 
 
 def test_model_save_load():
@@ -104,6 +106,10 @@ def test_model_log():
         assert x.to_dict(orient="records")[0] == input_example
         assert not hasattr(loaded_model, "databricks_runtime")
 
+        loaded_example = loaded_model.load_input_example(local_path)
+        assert isinstance(loaded_example, pd.DataFrame)
+        assert loaded_example.to_dict(orient="records")[0] == input_example
+
 
 def test_model_log_with_databricks_runtime():
     dbr = "8.3.x-snapshot-gpu-ml-scala2.12"
@@ -165,6 +171,78 @@ def test_model_log_with_input_example_succeeds():
         input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
         assert x.equals(input_example)
 
+        loaded_example = loaded_model.load_input_example(local_path)
+        assert isinstance(loaded_example, pd.DataFrame)
+        assert loaded_example.equals(input_example)
+
+
+def test_model_load_input_example_numpy():
+    with TempDir(chdr=True) as tmp:
+        input_example = np.array([[3, 4, 5]], dtype=np.int32)
+        sig = ModelSignature(
+            inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
+            outputs=Schema([ColSpec(name=None, type="double")]),
+        )
+
+        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
+        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
+        loaded_example = loaded_model.load_input_example(local_path)
+
+        assert isinstance(loaded_example, np.ndarray)
+        assert np.array_equal(input_example, loaded_example)
+
+
+def test_model_load_input_example_scipy():
+    with TempDir(chdr=True) as tmp:
+        input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8))
+        sig = ModelSignature(
+            inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]),
+            outputs=Schema([ColSpec(name=None, type="double")]),
+        )
+
+        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
+        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
+        loaded_example = loaded_model.load_input_example(local_path)
+
+        assert isinstance(loaded_example, csc_matrix)
+        assert np.array_equal(input_example.data, loaded_example.data)
+
+
+def test_model_load_input_example_failures():
+    with TempDir(chdr=True) as tmp:
+        input_example = np.array([[3, 4, 5]], dtype=np.int32)
+        sig = ModelSignature(
+            inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
+            outputs=Schema([ColSpec(name=None, type="double")]),
+        )
+
+        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
+        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
+        loaded_example = loaded_model.load_input_example(local_path)
+        assert loaded_example is not None
+
+        with pytest.raises(FileNotFoundError, match="No such file or directory"):
+            loaded_model.load_input_example(os.path.join(local_path, "folder_which_does_not_exist"))
+
+        path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"])
+        os.remove(path)
+        with pytest.raises(FileNotFoundError, match="No such file or directory"):
+            loaded_model.load_input_example(local_path)
+
+
+def test_model_load_input_example_no_signature():
+    with TempDir(chdr=True) as tmp:
+        input_example = np.array([[3, 4, 5]], dtype=np.int32)
+        sig = ModelSignature(
+            inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
+            outputs=Schema([ColSpec(name=None, type="double")]),
+        )
+
+        local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None)
+        loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
+        loaded_example = loaded_model.load_input_example(local_path)
+        assert loaded_example is None
+
 
 def _is_valid_uuid(val):
     import uuid

diff --git a/tests/store/artifact/test_databricks_models_artifact_repo.py b/tests/store/artifact/test_databricks_models_artifact_repo.py
@@ -142,15 +142,24 @@ def test_init_with_valid_uri_but_no_profile(self, valid_profileless_artifact_uri
                 DatabricksModelsArtifactRepository(valid_profileless_artifact_uri)
 
     def test_list_artifacts(self, databricks_model_artifact_repo):
+        status_code = 200
+
+        def _raise_for_status():
+            if status_code == 404:
+                raise Exception(
+                    "404 Client Error: Not Found for url: https://shard-uri/api/2.0/mlflow/model-versions/list-artifacts?name=model&version=1"
+                )
+
         list_artifact_dir_response_mock = mock.MagicMock()
-        list_artifact_dir_response_mock.status_code = 200
+        list_artifact_dir_response_mock.status_code = status_code
         list_artifact_dir_json_mock = {
             "files": [
                 {"path": "MLmodel", "is_dir": False, "file_size": 294},
                 {"path": "data", "is_dir": True, "file_size": None},
             ]
         }
         list_artifact_dir_response_mock.text = json.dumps(list_artifact_dir_json_mock)
+        list_artifact_dir_response_mock.raise_for_status.side_effect = _raise_for_status
         with mock.patch(
             DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint"
         ) as call_endpoint_mock:
@@ -166,6 +175,24 @@ def test_list_artifacts(self, databricks_model_artifact_repo):
             assert artifacts[1].file_size is None
             call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT)
 
+        # errors from API are propagated through to cli response
+        list_artifact_dir_bad_response_mock = mock.MagicMock()
+        status_code = 404
+        list_artifact_dir_bad_response_mock.status_code = status_code
+        list_artifact_dir_bad_response_mock.text = "An error occurred"
+        list_artifact_dir_bad_response_mock.raise_for_status.side_effect = _raise_for_status
+        with mock.patch(
+            DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint"
+        ) as call_endpoint_mock:
+            call_endpoint_mock.return_value = list_artifact_dir_bad_response_mock
+            with pytest.raises(
+                MlflowException,
+                match=r"API request to list files under path `` failed with status code 404. "
+                "Response body: An error occurred",
+            ):
+                databricks_model_artifact_repo.list_artifacts("")
+            call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT)
+
     def test_list_artifacts_for_single_file(self, databricks_model_artifact_repo):
         list_artifact_file_response_mock = mock.MagicMock()
         list_artifact_file_response_mock.status_code = 200