diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 088db5182fdb6..63493c10991c8 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -83,13 +83,13 @@ jobs: # `os_name` will be like "Ubuntu-20.04.1-LTS" os_name=$(lsb_release -ds | sed 's/\s/-/g') echo "::set-output name=os-name::$os_name" - # - name: Cache R packages - # uses: actions/cache@v2 - # with: - # path: ${{ env.R_LIBS_USER }} - # # We cache R dependencies based on a tuple of the current OS, the R version, and the list of - # # R dependencies - # key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + - name: Cache R packages + uses: actions/cache@v2 + with: + path: ${{ env.R_LIBS_USER }} + # We cache R dependencies based on a tuple of the current OS, the R version, and the list of + # R dependencies + key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - name: Install system dependencies run: | sudo apt-get install -y libcurl4-openssl-dev diff --git a/mlflow/R/mlflow/.Rprofile b/mlflow/R/mlflow/.Rprofile index eadd63f97f735..1bc8f15ab9731 100644 --- a/mlflow/R/mlflow/.Rprofile +++ b/mlflow/R/mlflow/.Rprofile @@ -1,3 +1,3 @@ # https://packagemanager.rstudio.com provides pre-compiled binary packages for Linux # that can be installed significantly faster. -options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/all/latest")) +options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/cran/__linux__/focal/latest")) diff --git a/mlflow/models/model.py b/mlflow/models/model.py index 806357c10ab4a..1e75c95dd19f9 100644 --- a/mlflow/models/model.py +++ b/mlflow/models/model.py @@ -68,6 +68,24 @@ def get_input_schema(self): def get_output_schema(self): return self.signature.outputs if self.signature is not None else None + def load_input_example(self, path: str): + """ + Load the input example saved along a model. Returns None if there is no example metadata + (i.e. the model was saved without example). Raises FileNotFoundError if there is model + metadata but the example file is missing. + + :param path: Path to the model directory. + + :return: Input example (NumPy ndarray, SciPy csc_matrix, SciPy csr_matrix, + pandas DataFrame, dict) or None if the model has no example. + """ + + # Just-in-time import to only load example-parsing libraries (e.g. numpy, pandas, etc.) if + # example is requested. + from mlflow.models.utils import _read_example + + return _read_example(self, path) + def add_flavor(self, name, **params): """Add an entry for how to serve the model in a given format.""" self.flavors[name] = params diff --git a/mlflow/models/utils.py b/mlflow/models/utils.py index 137a1eac0f0ab..206d0de6a128b 100644 --- a/mlflow/models/utils.py +++ b/mlflow/models/utils.py @@ -192,8 +192,8 @@ def _save_example(mlflow_model: Model, input_example: ModelInputExample, path: s def _read_example(mlflow_model: Model, path: str): """ Read example from a model directory. Returns None if there is no example metadata (i.e. the - model was saved without example). Raises IO Exception if there is model metadata but the example - file is missing. + model was saved without example). Raises FileNotFoundError if there is model metadata but the + example file is missing. :param mlflow_model: Model metadata. :param path: Path to the model directory. diff --git a/mlflow/store/artifact/databricks_models_artifact_repo.py b/mlflow/store/artifact/databricks_models_artifact_repo.py index 4f04e0f7f86cf..6670d8bd02ee9 100644 --- a/mlflow/store/artifact/databricks_models_artifact_repo.py +++ b/mlflow/store/artifact/databricks_models_artifact_repo.py @@ -71,8 +71,9 @@ def list_artifacts(self, path=None): json_body = self._make_json_body(path, page_token) response = self._call_endpoint(json_body, REGISTRY_LIST_ARTIFACTS_ENDPOINT) try: + response.raise_for_status() json_response = json.loads(response.text) - except ValueError: + except Exception: raise MlflowException( "API request to list files under path `%s` failed with status code %s. " "Response body: %s" % (path, response.status_code, response.text) diff --git a/tests/models/test_model.py b/tests/models/test_model.py index 131a70004b54a..67be4d7fb21c9 100644 --- a/tests/models/test_model.py +++ b/tests/models/test_model.py @@ -1,4 +1,5 @@ import os +import pytest from datetime import date import mlflow @@ -9,11 +10,12 @@ from mlflow.models import Model from mlflow.models.signature import ModelSignature from mlflow.models.utils import _save_example -from mlflow.types.schema import Schema, ColSpec +from mlflow.types.schema import Schema, ColSpec, TensorSpec from mlflow.utils.file_utils import TempDir from mlflow.utils.proto_json_utils import _dataframe_from_json from unittest import mock +from scipy.sparse import csc_matrix def test_model_save_load(): @@ -104,6 +106,10 @@ def test_model_log(): assert x.to_dict(orient="records")[0] == input_example assert not hasattr(loaded_model, "databricks_runtime") + loaded_example = loaded_model.load_input_example(local_path) + assert isinstance(loaded_example, pd.DataFrame) + assert loaded_example.to_dict(orient="records")[0] == input_example + def test_model_log_with_databricks_runtime(): dbr = "8.3.x-snapshot-gpu-ml-scala2.12" @@ -165,6 +171,78 @@ def test_model_log_with_input_example_succeeds(): input_example["d"] = input_example["d"].apply(lambda x: x.isoformat()) assert x.equals(input_example) + loaded_example = loaded_model.load_input_example(local_path) + assert isinstance(loaded_example, pd.DataFrame) + assert loaded_example.equals(input_example) + + +def test_model_load_input_example_numpy(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + + assert isinstance(loaded_example, np.ndarray) + assert np.array_equal(input_example, loaded_example) + + +def test_model_load_input_example_scipy(): + with TempDir(chdr=True) as tmp: + input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8)) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + + assert isinstance(loaded_example, csc_matrix) + assert np.array_equal(input_example.data, loaded_example.data) + + +def test_model_load_input_example_failures(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + assert loaded_example is not None + + with pytest.raises(FileNotFoundError, match="No such file or directory"): + loaded_model.load_input_example(os.path.join(local_path, "folder_which_does_not_exist")) + + path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"]) + os.remove(path) + with pytest.raises(FileNotFoundError, match="No such file or directory"): + loaded_model.load_input_example(local_path) + + +def test_model_load_input_example_no_signature(): + with TempDir(chdr=True) as tmp: + input_example = np.array([[3, 4, 5]], dtype=np.int32) + sig = ModelSignature( + inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]), + outputs=Schema([ColSpec(name=None, type="double")]), + ) + + local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None) + loaded_model = Model.load(os.path.join(local_path, "MLmodel")) + loaded_example = loaded_model.load_input_example(local_path) + assert loaded_example is None + def _is_valid_uuid(val): import uuid diff --git a/tests/store/artifact/test_databricks_models_artifact_repo.py b/tests/store/artifact/test_databricks_models_artifact_repo.py index bcce2d170044b..4049e7bbe4838 100644 --- a/tests/store/artifact/test_databricks_models_artifact_repo.py +++ b/tests/store/artifact/test_databricks_models_artifact_repo.py @@ -142,8 +142,16 @@ def test_init_with_valid_uri_but_no_profile(self, valid_profileless_artifact_uri DatabricksModelsArtifactRepository(valid_profileless_artifact_uri) def test_list_artifacts(self, databricks_model_artifact_repo): + status_code = 200 + + def _raise_for_status(): + if status_code == 404: + raise Exception( + "404 Client Error: Not Found for url: https://shard-uri/api/2.0/mlflow/model-versions/list-artifacts?name=model&version=1" + ) + list_artifact_dir_response_mock = mock.MagicMock() - list_artifact_dir_response_mock.status_code = 200 + list_artifact_dir_response_mock.status_code = status_code list_artifact_dir_json_mock = { "files": [ {"path": "MLmodel", "is_dir": False, "file_size": 294}, @@ -151,6 +159,7 @@ def test_list_artifacts(self, databricks_model_artifact_repo): ] } list_artifact_dir_response_mock.text = json.dumps(list_artifact_dir_json_mock) + list_artifact_dir_response_mock.raise_for_status.side_effect = _raise_for_status with mock.patch( DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint" ) as call_endpoint_mock: @@ -166,6 +175,24 @@ def test_list_artifacts(self, databricks_model_artifact_repo): assert artifacts[1].file_size is None call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT) + # errors from API are propagated through to cli response + list_artifact_dir_bad_response_mock = mock.MagicMock() + status_code = 404 + list_artifact_dir_bad_response_mock.status_code = status_code + list_artifact_dir_bad_response_mock.text = "An error occurred" + list_artifact_dir_bad_response_mock.raise_for_status.side_effect = _raise_for_status + with mock.patch( + DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint" + ) as call_endpoint_mock: + call_endpoint_mock.return_value = list_artifact_dir_bad_response_mock + with pytest.raises( + MlflowException, + match=r"API request to list files under path `` failed with status code 404. " + "Response body: An error occurred", + ): + databricks_model_artifact_repo.list_artifacts("") + call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT) + def test_list_artifacts_for_single_file(self, databricks_model_artifact_repo): list_artifact_file_response_mock = mock.MagicMock() list_artifact_file_response_mock.status_code = 200