forked from mlflow/mlflow
-
Notifications
You must be signed in to change notification settings - Fork 1
/
environment.py
317 lines (261 loc) · 11.2 KB
/
environment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import yaml
import os
import logging
from mlflow.utils import PYTHON_VERSION
from mlflow.utils.requirements_utils import _parse_requirements, _infer_requirements
from packaging.requirements import Requirement, InvalidRequirement
_logger = logging.getLogger(__name__)
_conda_header = """\
name: mlflow-env
channels:
- conda-forge
"""
_CONDA_ENV_FILE_NAME = "conda.yaml"
_REQUIREMENTS_FILE_NAME = "requirements.txt"
_CONSTRAINTS_FILE_NAME = "constraints.txt"
def _mlflow_conda_env(
path=None,
additional_conda_deps=None,
additional_pip_deps=None,
additional_conda_channels=None,
install_mlflow=True,
):
"""
Creates a Conda environment with the specified package channels and dependencies. If there are
any pip dependencies, including from the install_mlflow parameter, then pip will be added to
the conda dependencies. This is done to ensure that the pip inside the conda environment is
used to install the pip dependencies.
:param path: Local filesystem path where the conda env file is to be written. If unspecified,
the conda env will not be written to the filesystem; it will still be returned
in dictionary format.
:param additional_conda_deps: List of additional conda dependencies passed as strings.
:param additional_pip_deps: List of additional pip dependencies passed as strings.
:param additional_conda_channels: List of additional conda channels to search when resolving
packages.
:return: ``None`` if ``path`` is specified. Otherwise, the a dictionary representation of the
Conda environment.
"""
pip_deps = (["mlflow"] if install_mlflow else []) + (
additional_pip_deps if additional_pip_deps else []
)
conda_deps = (additional_conda_deps if additional_conda_deps else []) + (
["pip"] if pip_deps else []
)
env = yaml.safe_load(_conda_header)
env["dependencies"] = ["python={}".format(PYTHON_VERSION)]
if conda_deps is not None:
env["dependencies"] += conda_deps
env["dependencies"].append({"pip": pip_deps})
if additional_conda_channels is not None:
env["channels"] += additional_conda_channels
if path is not None:
with open(path, "w") as out:
yaml.safe_dump(env, stream=out, default_flow_style=False)
return None
else:
return env
def _mlflow_additional_pip_env(pip_deps, path=None):
requirements = "\n".join(pip_deps)
if path is not None:
with open(path, "w") as out:
out.write(requirements)
return None
else:
return requirements
def _is_pip_deps(dep):
"""
Returns True if `dep` is a dict representing pip dependencies
"""
return isinstance(dep, dict) and "pip" in dep
def _get_pip_deps(conda_env):
"""
:return: The pip dependencies from the conda env
"""
if conda_env is not None:
for dep in conda_env["dependencies"]:
if _is_pip_deps(dep):
return dep["pip"]
return []
def _overwrite_pip_deps(conda_env, new_pip_deps):
"""
Overwrites the pip dependencies section in the given conda env dictionary.
{
"name": "env",
"channels": [...],
"dependencies": [
...,
"pip",
{"pip": [...]}, <- Overwrite this
],
}
"""
deps = conda_env.get("dependencies", [])
new_deps = []
contains_pip_deps = False
for dep in deps:
if _is_pip_deps(dep):
contains_pip_deps = True
new_deps.append({"pip": new_pip_deps})
else:
new_deps.append(dep)
if not contains_pip_deps:
new_deps.append({"pip": new_pip_deps})
return {**conda_env, "dependencies": new_deps}
def _log_pip_requirements(conda_env, path, requirements_file=_REQUIREMENTS_FILE_NAME):
pip_deps = _get_pip_deps(conda_env)
_mlflow_additional_pip_env(pip_deps, path=os.path.join(path, requirements_file))
def _parse_pip_requirements(pip_requirements):
"""
Parses an iterable of pip requirement strings or a pip requirements file.
:param pip_requirements: Either an iterable of pip requirement strings
(e.g. ``["scikit-learn", "-r requirements.txt"]``) or the string path to a pip requirements
file on the local filesystem (e.g. ``"requirements.txt"``). If ``None``, an empty list will
be returned.
:return: A tuple of parsed requirements and constraints.
"""
if pip_requirements is None:
return [], []
def _is_string(x):
return isinstance(x, str)
def _is_iterable(x):
try:
iter(x)
return True
except Exception:
return False
if _is_string(pip_requirements):
with open(pip_requirements) as f:
return _parse_pip_requirements(f.read().splitlines())
elif _is_iterable(pip_requirements) and all(map(_is_string, pip_requirements)):
requirements = []
constraints = []
for req_or_con in _parse_requirements(pip_requirements, is_constraint=False):
if req_or_con.is_constraint:
constraints.append(req_or_con.req_str)
else:
requirements.append(req_or_con.req_str)
return requirements, constraints
else:
raise TypeError(
"`pip_requirements` must be either a string path to a pip requirements file on the "
"local filesystem or an iterable of pip requirement strings, but got `{}`".format(
type(pip_requirements)
)
)
_INFER_PIP_REQUIREMENTS_FALLBACK_MESSAGE = (
"Encountered an unexpected error while inferring pip requirements (model URI: %s, flavor: %s),"
" fall back to return %s. Set logging level to DEBUG to see the full traceback."
)
def infer_pip_requirements(model_uri, flavor, fallback=None):
"""
Infers the pip requirements of the specified model by creating a subprocess and loading
the model in it to determine which packages are imported.
:param model_uri: The URI of the model.
:param flavor: The flavor name of the model.
:param fallback: If provided, an unexpected error during the inference procedure is swallowed
and the value of ``fallback`` is returned. Otherwise, the error is raised.
:return: A list of inferred pip requirements (e.g. ``["scikit-learn==0.24.2", ...]``).
"""
try:
return _infer_requirements(model_uri, flavor)
except Exception:
if fallback is not None:
_logger.warning(_INFER_PIP_REQUIREMENTS_FALLBACK_MESSAGE, model_uri, flavor, fallback)
_logger.debug("", exc_info=True)
return fallback
raise
def _validate_env_arguments(conda_env, pip_requirements, extra_pip_requirements):
"""
Validates that only one or none of `conda_env`, `pip_requirements`, and
`extra_pip_requirements` is specified.
"""
args = [
conda_env,
pip_requirements,
extra_pip_requirements,
]
specified = [arg for arg in args if arg is not None]
if len(specified) > 1:
raise ValueError(
"Only one of `conda_env`, `pip_requirements`, and "
"`extra_pip_requirements` can be specified"
)
# PIP requirement parser inspired from https://github.com/pypa/pip/blob/b392833a0f1cff1bbee1ac6dbe0270cccdd0c11f/src/pip/_internal/req/req_file.py#L400
def _get_pip_requirement_specifier(requirement_string):
tokens = requirement_string.split(" ")
for idx, token in enumerate(tokens):
if token.startswith("-"):
return " ".join(tokens[:idx])
return requirement_string
def _is_mlflow_requirement(requirement_string):
"""
Returns True if `requirement_string` represents a requirement for mlflow (e.g. 'mlflow==1.2.3').
"""
try:
# `Requirement` throws an `InvalidRequirement` exception if `requirement_string` doesn't
# conform to PEP 508 (https://www.python.org/dev/peps/pep-0508).
return Requirement(requirement_string).name.lower() == "mlflow"
except InvalidRequirement:
# A local file path or URL falls into this branch.
# `Requirement` throws an `InvalidRequirement` exception if `requirement_string` contains
# per-requirement options (ex: package hashes)
# GitHub issue: https://github.com/pypa/packaging/issues/488
# Per-requirement-option spec: https://pip.pypa.io/en/stable/reference/requirements-file-format/#per-requirement-options
requirement_specifier = _get_pip_requirement_specifier(requirement_string)
try:
# Try again with the per-requirement options removed
return Requirement(requirement_specifier).name.lower() == "mlflow"
except InvalidRequirement:
return False
# TODO: Return True if `requirement_string` represents a project directory for MLflow
# (e.g. '/path/to/mlflow') or git repository URL (e.g. 'https://github.com/mlflow/mlflow').
return False
def _contains_mlflow_requirement(requirements):
"""
Returns True if `requirements` contains a requirement for mlflow (e.g. 'mlflow==1.2.3').
"""
return any(map(_is_mlflow_requirement, requirements))
def _process_pip_requirements(
default_pip_requirements, pip_requirements=None, extra_pip_requirements=None
):
"""
Processes `pip_requirements` and `extra_pip_requirements` passed to `mlflow.*.save_model` or
`mlflow.*.log_model`, and returns a tuple of (conda_env, pip_requirements, pip_constraints).
"""
constraints = []
if pip_requirements is not None:
pip_reqs, constraints = _parse_pip_requirements(pip_requirements)
elif extra_pip_requirements is not None:
extra_pip_requirements, constraints = _parse_pip_requirements(extra_pip_requirements)
pip_reqs = default_pip_requirements + extra_pip_requirements
else:
pip_reqs = default_pip_requirements
if not _contains_mlflow_requirement(pip_reqs):
pip_reqs.insert(0, "mlflow")
if constraints:
pip_reqs.append(f"-c {_CONSTRAINTS_FILE_NAME}")
# Set `install_mlflow` to False because `pip_reqs` already contains `mlflow`
conda_env = _mlflow_conda_env(additional_pip_deps=pip_reqs, install_mlflow=False)
return conda_env, pip_reqs, constraints
def _process_conda_env(conda_env):
"""
Processes `conda_env` passed to `mlflow.*.save_model` or `mlflow.*.log_model`, and returns
a tuple of (conda_env, pip_requirements, pip_constraints).
"""
if isinstance(conda_env, str):
with open(conda_env, "r") as f:
conda_env = yaml.safe_load(f)
elif not isinstance(conda_env, dict):
raise TypeError(
"Expected a string path to a conda env yaml file or a `dict` representing a conda env, "
"but got `{}`".format(type(conda_env).__name__)
)
# User-specified `conda_env` may contain requirements/constraints file references
pip_reqs = _get_pip_deps(conda_env)
pip_reqs, constraints = _parse_pip_requirements(pip_reqs)
if not _contains_mlflow_requirement(pip_reqs):
pip_reqs.insert(0, "mlflow")
if constraints:
pip_reqs.append(f"-c {_CONSTRAINTS_FILE_NAME}")
conda_env = _overwrite_pip_deps(conda_env, pip_reqs)
return conda_env, pip_reqs, constraints