diff --git a/bentoml/_internal/configuration/__init__.py b/bentoml/_internal/configuration/__init__.py index bbc11441088..db8bd79c3d9 100644 --- a/bentoml/_internal/configuration/__init__.py +++ b/bentoml/_internal/configuration/__init__.py @@ -30,6 +30,7 @@ class version_mod: DEBUG_ENV_VAR = "BENTOML_DEBUG" QUIET_ENV_VAR = "BENTOML_QUIET" CONFIG_ENV_VAR = "BENTOML_CONFIG" +CONFIG_OVERRIDE_ENV_VAR = "BENTOML_CONFIG_OPTIONS" # https://github.com/grpc/grpc/blob/master/doc/environment_variables.md GRPC_DEBUG_ENV_VAR = "GRPC_VERBOSITY" @@ -97,6 +98,13 @@ def get_bentoml_config_file_from_env() -> t.Optional[str]: return None +def get_bentoml_override_config_from_env() -> t.Optional[str]: + if CONFIG_OVERRIDE_ENV_VAR in os.environ: + # User local config options for customizing bentoml + return os.environ.get(CONFIG_OVERRIDE_ENV_VAR, None) + return None + + def set_debug_mode(enabled: bool) -> None: os.environ[DEBUG_ENV_VAR] = str(enabled) os.environ[GRPC_DEBUG_ENV_VAR] = "DEBUG" @@ -147,6 +155,7 @@ def load_global_config(bentoml_config_file: t.Optional[str] = None): bentoml_configuration = BentoMLConfiguration( override_config_file=bentoml_config_file, + override_config_values=get_bentoml_override_config_from_env(), ) BentoMLContainer.config.set(bentoml_configuration.as_dict()) diff --git a/bentoml/_internal/configuration/containers.py b/bentoml/_internal/configuration/containers.py index b83c728d89c..9aa585f58dd 100644 --- a/bentoml/_internal/configuration/containers.py +++ b/bentoml/_internal/configuration/containers.py @@ -21,10 +21,12 @@ from deepmerge.merger import Merger from . import expand_env_var +from ..utils import split_with_quotes from ..utils import validate_or_create_dir from ..context import component_context from ..resource import system_resources from ...exceptions import BentoMLConfigException +from ..utils.unflatten import unflatten if TYPE_CHECKING: from bentoml._internal.models import ModelStore @@ -214,6 +216,7 @@ class BentoMLConfiguration: def __init__( self, override_config_file: t.Optional[str] = None, + override_config_values: t.Optional[str] = None, validate_schema: bool = True, ): # Load default configuration @@ -223,15 +226,6 @@ def __init__( with open(default_config_file, "rb") as f: self.config: t.Dict[str, t.Any] = yaml.safe_load(f) - if validate_schema: - try: - SCHEMA.validate(self.config) - except SchemaError as e: - raise BentoMLConfigException( - "Default configuration 'default_configuration.yml' does not" - " conform to the required schema." - ) from e - # User override configuration if override_config_file is not None: logger.info("Applying user config override from %s" % override_config_file) @@ -271,26 +265,54 @@ def __init__( config_merger.merge(self.config, override_config) - global_runner_cfg = {k: self.config["runners"][k] for k in RUNNER_CFG_KEYS} - for key in self.config["runners"]: - if key not in RUNNER_CFG_KEYS: - runner_cfg = self.config["runners"][key] + if override_config_values is not None: + logger.info( + "Applying user config override from ENV VAR: %s", override_config_values + ) + lines = split_with_quotes( + override_config_values, + sep=r"\s+", + quote='"', + use_regex=True, + ) + override_config_map = { + k: yaml.safe_load(v) + for k, v in [ + split_with_quotes(line, sep="=", quote='"') for line in lines + ] + } + try: + override_config = unflatten(override_config_map) + except ValueError as e: + raise BentoMLConfigException( + f'Failed to parse config options from the env var: {e}. \n *** Note: You can use " to quote the key if it contains special characters. ***' + ) from None + config_merger.merge(self.config, override_config) - # key is a runner name - if runner_cfg.get("resources") == "system": - runner_cfg["resources"] = system_resources() + if override_config_file is not None or override_config_values is not None: + self._finalize() - self.config["runners"][key] = config_merger.merge( - deepcopy(global_runner_cfg), runner_cfg - ) + if validate_schema: + try: + SCHEMA.validate(self.config) + except SchemaError as e: + raise BentoMLConfigException( + "Default configuration 'default_configuration.yml' does not" + " conform to the required schema." + ) from e - if validate_schema: - try: - SCHEMA.validate(self.config) - except SchemaError as e: - raise BentoMLConfigException( - "Invalid configuration file was given." - ) from e + def _finalize(self): + global_runner_cfg = {k: self.config["runners"][k] for k in RUNNER_CFG_KEYS} + for key in self.config["runners"]: + if key not in RUNNER_CFG_KEYS: + runner_cfg = self.config["runners"][key] + # key is a runner name + if runner_cfg.get("resources") == "system": + runner_cfg["resources"] = system_resources() + self.config["runners"][key] = config_merger.merge( + deepcopy(global_runner_cfg), + runner_cfg, + ) def override(self, keys: t.List[str], value: t.Any): if keys is None: diff --git a/bentoml/_internal/utils/__init__.py b/bentoml/_internal/utils/__init__.py index 71ad30f259a..df5e4acfa49 100644 --- a/bentoml/_internal/utils/__init__.py +++ b/bentoml/_internal/utils/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import re import sys import uuid import random @@ -171,6 +172,44 @@ def _(*args: P.args, **kwargs: P.kwargs) -> t.Optional[_T_co]: return _ +def split_with_quotes( + s: str, + sep: str = ",", + quote: str = '"', + use_regex: bool = False, +) -> list[str]: + """ + Split a string with quotes, e.g.: + >>> split_with_quotes('a,b,"c,d",e') + ['a', 'b', 'c,d', 'e'] + """ + if use_regex: + assert ( + "(" not in sep and ")" not in sep + ), "sep cannot contain '(' or ')' when using regex" + reg = "({quote}[^{quote}]*{quote})|({sep})".format( + quote=quote, + sep=sep, + ) + else: + reg = "({quote}[^{quote}]*{quote})|({sep})".format( + quote=re.escape(quote), + sep=re.escape(sep), + ) + raw_parts = re.split(reg, s) + parts: list[str] = [] + part_begin = 0 + for i in range(0, len(raw_parts), 3): + if i + 2 > len(raw_parts): + parts.append("".join(filter(None, raw_parts[part_begin : i + 2]))) + continue + if raw_parts[i + 2] is not None: + parts.append("".join(filter(None, raw_parts[part_begin : i + 2]))) + part_begin = i + 3 + continue + return parts + + @contextlib.contextmanager def reserve_free_port( host: str = "localhost", diff --git a/bentoml/_internal/utils/unflatten.py b/bentoml/_internal/utils/unflatten.py new file mode 100644 index 00000000000..f0e4b468e6f --- /dev/null +++ b/bentoml/_internal/utils/unflatten.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +""" Unflatten nested dict/array data +*** This is a modified version of the original unflatten.py from https://github.com/dairiki/unflatten, which is +published under the license *** + +Copyright (C) 2018 Geoffrey T. Dairiki +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +3. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +""" +from __future__ import annotations +from __future__ import absolute_import + +import re +import sys +import typing as t +from operator import itemgetter + +if sys.version_info[0] == 2: + string_type = basestring # noqa: F821 +else: + string_type = str + + +def unflatten(arg: dict[str, t.Any]) -> dict[str, t.Any]: + """Unflatten nested dict/array data. + + This function takes a single argument which may either be a + ``dict`` (or any object having a dict-like ``.items()`` or + ``.iteritems()`` method) or a sequence of ``(key, value)`` pairs. + The keys in the ``dict`` or sequence should must all be strings. + + Examples + -------- + + Nested ``dict``\\s:: + + >>> unflatten({'foo.bar': 'val'}) + {'foo': {'bar': 'val'}} + + Nested ``list``:: + + >>> unflatten({'foo[0]': 'val', 'foo[1]': 'bar'}) + {'foo': ['val', 'bar']} + + Nested ``list``\\s:: + + >>> unflatten({'foo[0][0]': 'val'}) + {'foo': [['val']]} + + Lists of ``dict``\\s:: + + >>> unflatten({'foo[0].bar': 'val', + ... 'foo[1].baz': 'x'}) + {'foo': [{'bar': 'val'}, {'baz': 'x'}]} + + """ + if hasattr(arg, "items"): + items = arg.items() + else: + items = arg + + data: dict[str, t.Any] = {} + holders: list[t.Any] = [] + for flat_key, val in items: + parsed_key = _parse_key(flat_key) + obj = data + for depth, (key, next_key) in enumerate(zip(parsed_key, parsed_key[1:]), 1): + if isinstance(next_key, string_type): + holder_type = _dict_holder + else: + holder_type = _list_holder + + if key not in obj: + obj[key] = holder_type(_unparse_key(parsed_key[:depth])) + holders.append((obj, key)) + elif not isinstance(obj[key], holder_type): + raise ValueError( + "conflicting types %s and %s for key %r" + % ( + _node_type(obj[key]), + holder_type.node_type, + _unparse_key(parsed_key[:depth]), + ) + ) + obj = obj[key] + + last_key = parsed_key[-1] + if isinstance(obj.get(last_key), _holder): + raise ValueError( + "conflicting types %s and terminal for key %r" + % (_node_type(obj[last_key]), flat_key) + ) + obj[last_key] = val + + for obj, key in reversed(holders): + obj[key] = obj[key].getvalue() + + return data + + +def _node_type(value: _holder) -> tuple[object] | t.Literal["terminal"]: + if isinstance(value, _holder): + return (value.node_type,) + else: + return "terminal" + + +class _holder(dict): + node_type: type + + def __init__(self, flat_key: str): + self.flat_key = flat_key + self.data: dict[t.Any, t.Any] = {} + + def __contains__(self, key: t.Any): + return key in self.data + + def __getitem__(self, key: t.Any): + return self.data[key] + + def get(self, key: t.Any): + return self.data.get(key) + + def __setitem__(self, key: t.Any, value: t.Any): + self.data[key] = value + + +class _dict_holder(_holder): + node_type = dict + + def getvalue(self): + return self.data + + +class _list_holder(_holder): + node_type = list + + def getvalue(self) -> list[t.Any]: + items = sorted(self.data.items(), key=itemgetter(0)) + value: list[t.Any] = [] + for n, (key, val) in enumerate(items): + if key != n: + assert key > n + missing_key = "%s[%d]" % (self.flat_key, n) + raise ValueError("missing key %r" % missing_key) + value.append(val) + return value + + +_dot_or_indexes_re = re.compile(r"(\.?\"[^\"]*\")|(\[\d+\])|(\.\w*)|(^\w*)") + + +def _parse_key(flat_key: str): + if not isinstance(flat_key, string_type): + raise TypeError("keys must be strings") + + split_key = _dot_or_indexes_re.split(flat_key) + parts: list[t.Any] = [""] if flat_key.startswith(".") else [] + + for i in range(0, len(split_key), 5): + sep = split_key[i] + + if sep != "": + raise ValueError("invalid separator %r in key %r" % (sep, flat_key)) + + if len(split_key) < i + 4: + break + + if split_key[i + 1] is not None: + # quoted first string + string = split_key[i + 1] + if len(string) == 2: + string = "" + elif i == 0: + string = string[2:-1] if string.startswith(".") else string[1:-1] + else: + if string[0] != ".": + raise ValueError("invalid string %r in key %r" % (string, flat_key)) + string = string[2:-1] + parts.append(string) + + elif split_key[i + 2] is not None: + # index + parts.append(int(split_key[i + 2][1:-1])) + + elif split_key[i + 3] is not None or split_key[i + 4] is not None: + # unquoted string + string = split_key[i + 3] or split_key[i + 4] or "" + if len(string) == 0: + string = "" + elif i == 0: + string = string[1:] if string.startswith(".") else string + else: + if string[0] != ".": + raise ValueError("invalid string %r in key %r" % (string, flat_key)) + string = string[1:] + parts.append(string) + else: + assert False + + if len(parts) > 0 and isinstance(parts[0], int): + parts.insert(0, "") + return parts + + +def _unparse_key(parsed: list[t.Any]) -> str: + bits: list[str] = [] + for part in parsed: + if isinstance(part, string_type): + if part.isidentifier(): + fmt = ".%s" if bits else "%s" + elif part == "": + fmt = ".%s" if bits else "%s" + else: + fmt = '."%s"' if bits else '"%s"' + else: + fmt = "[%d]" + bits.append(fmt % part) + return "".join(bits) diff --git a/docs/source/_static/img/configuration-override-env.png b/docs/source/_static/img/configuration-override-env.png new file mode 100644 index 00000000000..82f6cd53263 Binary files /dev/null and b/docs/source/_static/img/configuration-override-env.png differ diff --git a/docs/source/guides/configuration.rst b/docs/source/guides/configuration.rst index 77fc2e29421..a8b2d28774f 100644 --- a/docs/source/guides/configuration.rst +++ b/docs/source/guides/configuration.rst @@ -33,6 +33,36 @@ the BentoML configuration template defined in :github:`default_configuration.yml + +Overrding configuration with environment variables +-------------------------------------------------- + +Users can also override configuration fields with environment variables. by defining +an oneline value of a "flat" JSON via ``BENTOML_CONFIG_OPTIONS``: + +.. code-block:: yaml + + $ BENTOML_CONFIG_OPTIONS='runners.pytorch_mnist.resources."nvidia.com/gpu"[0]=0 runners.pytorch_mnist.resources."nvidia.com/gpu"[1]=2' \ + bentoml serve pytorch_mnist_demo:latest --production + +Which the override configuration will be intepreted as: + +.. code-block:: yaml + + runners: + pytorch_mnist: + resources: + nvidia.com/gpu: [0, 2] + +.. note:: + + For fields that represents a iterable type, the override array must have a space + separating each element: + + .. image:: /_static/img/configuration-override-env.png + :alt: Configuration override environment variable + + Docker Deployment -----------------