Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: harupy <hkawamura0130@gmail.com>
  • Loading branch information
harupy committed Jan 5, 2022
2 parents e2f8e0d + 2d8cb0c commit 97f56d2
Show file tree
Hide file tree
Showing 7 changed files with 137 additions and 13 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/master.yml
Expand Up @@ -83,13 +83,13 @@ jobs:
# `os_name` will be like "Ubuntu-20.04.1-LTS"
os_name=$(lsb_release -ds | sed 's/\s/-/g')
echo "::set-output name=os-name::$os_name"
# - name: Cache R packages
# uses: actions/cache@v2
# with:
# path: ${{ env.R_LIBS_USER }}
# # We cache R dependencies based on a tuple of the current OS, the R version, and the list of
# # R dependencies
# key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
- name: Cache R packages
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
# We cache R dependencies based on a tuple of the current OS, the R version, and the list of
# R dependencies
key: ${{ steps.os-name.outputs.os-name }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
- name: Install system dependencies
run: |
sudo apt-get install -y libcurl4-openssl-dev
Expand Down
2 changes: 1 addition & 1 deletion mlflow/R/mlflow/.Rprofile
@@ -1,3 +1,3 @@
# https://packagemanager.rstudio.com provides pre-compiled binary packages for Linux
# that can be installed significantly faster.
options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/all/latest"))
options(repos = c(REPO_NAME = "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"))
18 changes: 18 additions & 0 deletions mlflow/models/model.py
Expand Up @@ -68,6 +68,24 @@ def get_input_schema(self):
def get_output_schema(self):
return self.signature.outputs if self.signature is not None else None

def load_input_example(self, path: str):
"""
Load the input example saved along a model. Returns None if there is no example metadata
(i.e. the model was saved without example). Raises FileNotFoundError if there is model
metadata but the example file is missing.
:param path: Path to the model directory.
:return: Input example (NumPy ndarray, SciPy csc_matrix, SciPy csr_matrix,
pandas DataFrame, dict) or None if the model has no example.
"""

# Just-in-time import to only load example-parsing libraries (e.g. numpy, pandas, etc.) if
# example is requested.
from mlflow.models.utils import _read_example

return _read_example(self, path)

def add_flavor(self, name, **params):
"""Add an entry for how to serve the model in a given format."""
self.flavors[name] = params
Expand Down
4 changes: 2 additions & 2 deletions mlflow/models/utils.py
Expand Up @@ -192,8 +192,8 @@ def _save_example(mlflow_model: Model, input_example: ModelInputExample, path: s
def _read_example(mlflow_model: Model, path: str):
"""
Read example from a model directory. Returns None if there is no example metadata (i.e. the
model was saved without example). Raises IO Exception if there is model metadata but the example
file is missing.
model was saved without example). Raises FileNotFoundError if there is model metadata but the
example file is missing.
:param mlflow_model: Model metadata.
:param path: Path to the model directory.
Expand Down
3 changes: 2 additions & 1 deletion mlflow/store/artifact/databricks_models_artifact_repo.py
Expand Up @@ -71,8 +71,9 @@ def list_artifacts(self, path=None):
json_body = self._make_json_body(path, page_token)
response = self._call_endpoint(json_body, REGISTRY_LIST_ARTIFACTS_ENDPOINT)
try:
response.raise_for_status()
json_response = json.loads(response.text)
except ValueError:
except Exception:
raise MlflowException(
"API request to list files under path `%s` failed with status code %s. "
"Response body: %s" % (path, response.status_code, response.text)
Expand Down
80 changes: 79 additions & 1 deletion tests/models/test_model.py
@@ -1,4 +1,5 @@
import os
import pytest
from datetime import date

import mlflow
Expand All @@ -9,11 +10,12 @@
from mlflow.models import Model
from mlflow.models.signature import ModelSignature
from mlflow.models.utils import _save_example
from mlflow.types.schema import Schema, ColSpec
from mlflow.types.schema import Schema, ColSpec, TensorSpec
from mlflow.utils.file_utils import TempDir
from mlflow.utils.proto_json_utils import _dataframe_from_json

from unittest import mock
from scipy.sparse import csc_matrix


def test_model_save_load():
Expand Down Expand Up @@ -104,6 +106,10 @@ def test_model_log():
assert x.to_dict(orient="records")[0] == input_example
assert not hasattr(loaded_model, "databricks_runtime")

loaded_example = loaded_model.load_input_example(local_path)
assert isinstance(loaded_example, pd.DataFrame)
assert loaded_example.to_dict(orient="records")[0] == input_example


def test_model_log_with_databricks_runtime():
dbr = "8.3.x-snapshot-gpu-ml-scala2.12"
Expand Down Expand Up @@ -165,6 +171,78 @@ def test_model_log_with_input_example_succeeds():
input_example["d"] = input_example["d"].apply(lambda x: x.isoformat())
assert x.equals(input_example)

loaded_example = loaded_model.load_input_example(local_path)
assert isinstance(loaded_example, pd.DataFrame)
assert loaded_example.equals(input_example)


def test_model_load_input_example_numpy():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)

assert isinstance(loaded_example, np.ndarray)
assert np.array_equal(input_example, loaded_example)


def test_model_load_input_example_scipy():
with TempDir(chdr=True) as tmp:
input_example = csc_matrix(np.arange(0, 12, 0.5).reshape(3, 8))
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.data.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)

assert isinstance(loaded_example, csc_matrix)
assert np.array_equal(input_example.data, loaded_example.data)


def test_model_load_input_example_failures():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)
assert loaded_example is not None

with pytest.raises(FileNotFoundError, match="No such file or directory"):
loaded_model.load_input_example(os.path.join(local_path, "folder_which_does_not_exist"))

path = os.path.join(local_path, loaded_model.saved_input_example_info["artifact_path"])
os.remove(path)
with pytest.raises(FileNotFoundError, match="No such file or directory"):
loaded_model.load_input_example(local_path)


def test_model_load_input_example_no_signature():
with TempDir(chdr=True) as tmp:
input_example = np.array([[3, 4, 5]], dtype=np.int32)
sig = ModelSignature(
inputs=Schema([TensorSpec(type=input_example.dtype, shape=input_example.shape)]),
outputs=Schema([ColSpec(name=None, type="double")]),
)

local_path, _ = _log_model_with_signature_and_example(tmp, sig, input_example=None)
loaded_model = Model.load(os.path.join(local_path, "MLmodel"))
loaded_example = loaded_model.load_input_example(local_path)
assert loaded_example is None


def _is_valid_uuid(val):
import uuid
Expand Down
29 changes: 28 additions & 1 deletion tests/store/artifact/test_databricks_models_artifact_repo.py
Expand Up @@ -142,15 +142,24 @@ def test_init_with_valid_uri_but_no_profile(self, valid_profileless_artifact_uri
DatabricksModelsArtifactRepository(valid_profileless_artifact_uri)

def test_list_artifacts(self, databricks_model_artifact_repo):
status_code = 200

def _raise_for_status():
if status_code == 404:
raise Exception(
"404 Client Error: Not Found for url: https://shard-uri/api/2.0/mlflow/model-versions/list-artifacts?name=model&version=1"
)

list_artifact_dir_response_mock = mock.MagicMock()
list_artifact_dir_response_mock.status_code = 200
list_artifact_dir_response_mock.status_code = status_code
list_artifact_dir_json_mock = {
"files": [
{"path": "MLmodel", "is_dir": False, "file_size": 294},
{"path": "data", "is_dir": True, "file_size": None},
]
}
list_artifact_dir_response_mock.text = json.dumps(list_artifact_dir_json_mock)
list_artifact_dir_response_mock.raise_for_status.side_effect = _raise_for_status
with mock.patch(
DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint"
) as call_endpoint_mock:
Expand All @@ -166,6 +175,24 @@ def test_list_artifacts(self, databricks_model_artifact_repo):
assert artifacts[1].file_size is None
call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT)

# errors from API are propagated through to cli response
list_artifact_dir_bad_response_mock = mock.MagicMock()
status_code = 404
list_artifact_dir_bad_response_mock.status_code = status_code
list_artifact_dir_bad_response_mock.text = "An error occurred"
list_artifact_dir_bad_response_mock.raise_for_status.side_effect = _raise_for_status
with mock.patch(
DATABRICKS_MODEL_ARTIFACT_REPOSITORY + "._call_endpoint"
) as call_endpoint_mock:
call_endpoint_mock.return_value = list_artifact_dir_bad_response_mock
with pytest.raises(
MlflowException,
match=r"API request to list files under path `` failed with status code 404. "
"Response body: An error occurred",
):
databricks_model_artifact_repo.list_artifacts("")
call_endpoint_mock.assert_called_once_with(ANY, REGISTRY_LIST_ARTIFACTS_ENDPOINT)

def test_list_artifacts_for_single_file(self, databricks_model_artifact_repo):
list_artifact_file_response_mock = mock.MagicMock()
list_artifact_file_response_mock.status_code = 200
Expand Down

0 comments on commit 97f56d2

Please sign in to comment.