microsoft · Mai0313 · May 1, 2024 · May 1, 2024 · May 2, 2024 · May 6, 2024
diff --git a/OAI_CONFIG_LIST_yaml_sample b/OAI_CONFIG_LIST_yaml_sample
@@ -0,0 +1,21 @@
+# Please modify the content, remove these four lines of comment and rename this file to OAI_CONFIG_LIST to run the sample code.
+# If using pyautogen v0.1.x with Azure OpenAI, please replace "base_url" with "api_base" (line 13 and line 19 below). Use "pip list" to check version of pyautogen installed.
+
+# NOTE: This configuration lists GPT-4 as the default model, as this represents our current recommendation, and is known to work well with AutoGen. If you use a model other than GPT-4, you may need to revise various system prompts (especially if using weaker models like GPT-3.5-turbo). Moreover, if you use models other than those hosted by OpenAI or Azure, you may incur additional risks related to alignment and safety. Proceed with caution if updating this default.
+- model: gpt-4
+  api_key: <your OpenAI API key here>
+  tags:
+    - gpt-4
+    - tool
+
+- model: <your Azure OpenAI deployment name>
+  api_key: <your Azure OpenAI API key here>
+  base_url: <your Azure OpenAI API base here>
+  api_type: azure
+  api_version: 2024-02-15-preview
+
+- model: <your Azure OpenAI deployment name>
+  api_key: <your Azure OpenAI API key here>
+  base_url: <your Azure OpenAI API base here>
+  api_type: azure
+  api_version: 2024-02-15-preview
diff --git a/README.md b/README.md
@@ -100,10 +100,10 @@ The easiest way to start playing is
 
     [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/autogen?quickstart=1)
 
- 2. Copy OAI_CONFIG_LIST_sample to ./notebook folder, name to OAI_CONFIG_LIST, and set the correct configuration.
+ 2. Copy OAI_CONFIG_LIST_sample or OAI_CONFIG_LIST_yaml_sample to ./notebook folder, name to OAI_CONFIG_LIST, and set the correct configuration.
  3. Start playing with the notebooks!
 
-*NOTE*: OAI_CONFIG_LIST_sample lists GPT-4 as the default model, as this represents our current recommendation, and is known to work well with AutoGen. If you use a model other than GPT-4, you may need to revise various system prompts (especially if using weaker models like GPT-3.5-turbo). Moreover, if you use models other than those hosted by OpenAI or Azure, you may incur additional risks related to alignment and safety. Proceed with caution if updating this default.
+*NOTE*: OAI_CONFIG_LIST_sample/OAI_CONFIG_LIST_yaml_sample lists GPT-4 as the default model, as this represents our current recommendation, and is known to work well with AutoGen. If you use a model other than GPT-4, you may need to revise various system prompts (especially if using weaker models like GPT-3.5-turbo). Moreover, if you use models other than those hosted by OpenAI or Azure, you may incur additional risks related to alignment and safety. Proceed with caution if updating this default.
 
 <p align="right" style="font-size: 14px; color: #555; margin-top: 20px;">
   <a href="#readme-top" style="text-decoration: none; color: blue; font-weight: bold;">
@@ -162,7 +162,7 @@ For [example](https://github.com/microsoft/autogen/blob/main/test/twoagent.py),
 from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
 # Load LLM inference endpoints from an env variable or a file
 # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
-# and OAI_CONFIG_LIST_sample
+# and OAI_CONFIG_LIST_sample/OAI_CONFIG_LIST_yaml_sample
 config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
 # You can also set config_list directly as a list, for example, config_list = [{'model': 'gpt-4', 'api_key': '<your OpenAI API key here>'},]
 assistant = AssistantAgent("assistant", llm_config={"config_list": config_list})

diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
@@ -6,7 +6,7 @@
 import tempfile
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Callable, Dict, List, Optional, Set, Union
 
 from dotenv import find_dotenv, load_dotenv
 from openai import OpenAI
@@ -65,6 +65,16 @@
 }
 
 
+def load_yaml_json(env_or_file: Union[str, bytes]) -> dict:
+    try:
+        import yaml
+
+        load_func = yaml.safe_load(env_or_file)
+    except ImportError:
+        load_func = json.loads(env_or_file)
+    return load_func
+
+
 def get_key(config: Dict[str, Any]) -> str:
     """Get a unique identifier of a configuration.
 
@@ -461,10 +471,10 @@ def config_list_from_json(
     filter_dict: Optional[Dict[str, Union[List[Union[str, None]], Set[Union[str, None]]]]] = None,
 ) -> List[Dict[str, Any]]:
     """
-    Retrieves a list of API configurations from a JSON stored in an environment variable or a file.
+    Retrieves a list of API configurations from a JSON/YAML stored in an environment variable or a file.
 
-    This function attempts to parse JSON data from the given `env_or_file` parameter. If `env_or_file` is an
-    environment variable containing JSON data, it will be used directly. Otherwise, it is assumed to be a filename,
+    This function attempts to parse JSON/YAML data from the given `env_or_file` parameter. If `env_or_file` is an
+    environment variable containing JSON/YAML data, it will be used directly. Otherwise, it is assumed to be a filename,
     and the function will attempt to read the file from the specified `file_location`.
 
     The `filter_dict` parameter allows for filtering the configurations based on specified criteria. Each key in the
@@ -474,21 +484,38 @@ def config_list_from_json(
 
     Args:
         env_or_file (str): The name of the environment variable, the filename, or the environment variable of the filename
-            that containing the JSON data.
+            that containing the JSON/YAML data; if you want to load from a yaml file, you need to do `pip install pyyaml`.
         file_location (str, optional): The directory path where the file is located, if `env_or_file` is a filename.
         filter_dict (dict, optional): A dictionary specifying the filtering criteria for the configurations, with
             keys representing field names and values being lists or sets of acceptable values for those fields.
 
     Example:
-    ```python
-    # Suppose we have an environment variable 'CONFIG_JSON' with the following content:
-    # '[{"model": "gpt-3.5-turbo", "api_type": "azure"}, {"model": "gpt-4"}]'
 
-    # We can retrieve a filtered list of configurations like this:
-    filter_criteria = {"model": ["gpt-3.5-turbo"]}
-    configs = config_list_from_json('CONFIG_JSON', filter_dict=filter_criteria)
-    # The 'configs' variable will now contain only the configurations that match the filter criteria.
-    ```
+    JSON:
+        ```python
+        # Suppose we have an environment variable 'CONFIG_JSON' with the following content:
+        # '[{"model": "gpt-3.5-turbo", "api_type": "azure"}, {"model": "gpt-4"}]'
+
+        # We can retrieve a filtered list of configurations like this:
+        filter_criteria = {"model": ["gpt-3.5-turbo"]}
+        configs = config_list_from_json('CONFIG_JSON', filter_dict=filter_criteria)
+        # The 'configs' variable will now contain only the configurations that match the filter criteria.
+        ```
+
+    YAML:
+        ```python
+        # Suppose we have an environment variable 'CONFIG_YAML' with the following content:
+        '''
+        - model: gpt-3.5-turbo
+          api_type: azure
+        - model: gpt-4
+        '''
+
+        # We can retrieve a filtered list of configurations like this:
+        filter_criteria = {"model": ["gpt-3.5-turbo"]}
+        configs = config_list_from_json('CONFIG_YAML', filter_dict=filter_criteria)
+        # The 'configs' variable will now contain only the configurations that match the filter criteria.
+        ```
 
     Returns:
         List[Dict]: A list of configuration dictionaries that match the filtering criteria specified in `filter_dict`.
@@ -503,11 +530,11 @@ def config_list_from_json(
         if os.path.exists(env_str):
             # It is a file location, and we need to load the json from the file.
             with open(env_str, "r") as file:
-                json_str = file.read()
+                json_or_yaml_str = file.read()
         else:
             # Else, it should be a JSON string by itself.
-            json_str = env_str
-        config_list = json.loads(json_str)
+            json_or_yaml_str = env_str
+        config_list = load_yaml_json(json_or_yaml_str)
     else:
         # The environment variable does not exist.
         # So, `env_or_file` is a filename. We should use the file location.
@@ -517,7 +544,7 @@ def config_list_from_json(
             config_list_path = env_or_file
 
         with open(config_list_path) as json_file:
-            config_list = json.load(json_file)
+            config_list = load_yaml_json(json_file)
     return filter_config(config_list, filter_dict)
 
 

diff --git a/setup.py b/setup.py
@@ -29,6 +29,7 @@
     # Disallowing 2.6.0 can be removed when this is fixed https://github.com/pydantic/pydantic/issues/8705
     "pydantic>=1.10,<3,!=2.6.0",  # could be both V1 and V2
     "docker",
+    "pyyaml",
     "packaging",
 ]
 

diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py
@@ -9,6 +9,7 @@
 from unittest.mock import patch
 
 import pytest
+import yaml
 from conftest import MOCK_OPEN_AI_API_KEY
 
 import autogen  # noqa: E402
@@ -68,6 +69,24 @@
 ]
 """
 
+YAML_SAMPLE = """
+- model: gpt-3.5-turbo
+  api_type: openai
+- model: gpt-4
+  api_type: openai
+- model: gpt-35-turbo-v0301
+  tags:
+    - gpt-3.5-turbo
+    - gpt35_turbo
+  api_key: "111113fc7e8a46419bfac511bb301111"
+  base_url: "https://1111.openai.azure.com"
+  api_type: azure
+  api_version: "2024-02-15-preview"
+- model: gpt
+  api_key: not-needed
+  base_url: "http://localhost:1234/v1"
+"""
+
 JSON_SAMPLE_DICT = json.loads(JSON_SAMPLE)
 
 
@@ -113,23 +132,24 @@ def test_filter_config(test_case):
     assert _compare_lists_of_dicts(config_list, expected)
 
 
-def test_config_list_from_json():
+@pytest.mark.parametrize("config_example", [JSON_SAMPLE, YAML_SAMPLE], ids=["from_json", "from_yaml"])
+def test_config_list_from_json(config_example):
     with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp_file:
-        json_data = json.loads(JSON_SAMPLE)
-        tmp_file.write(JSON_SAMPLE)
+        config_data = yaml.safe_load(config_example)
+        tmp_file.write(config_example)
         tmp_file.flush()
         config_list = autogen.config_list_from_json(tmp_file.name)
 
-        assert len(config_list) == len(json_data)
+        assert len(config_list) == len(config_data)
         i = 0
         for config in config_list:
             assert isinstance(config, dict)
             for key in config:
-                assert key in json_data[i]
-                assert config[key] == json_data[i][key]
+                assert key in config_data[i]
+                assert config[key] == config_data[i][key]
             i += 1
 
-        os.environ["config_list_test"] = JSON_SAMPLE
+        os.environ["config_list_test"] = config_example
         config_list_2 = autogen.config_list_from_json("config_list_test")
         assert config_list == config_list_2
 

diff --git a/website/blog/2023-10-26-TeachableAgent/index.mdx b/website/blog/2023-10-26-TeachableAgent/index.mdx
@@ -55,7 +55,7 @@ from autogen import ConversableAgent  # As an example
 ```python
 # Load LLM inference endpoints from an env variable or a file
 # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
-# and OAI_CONFIG_LIST_sample
+# and OAI_CONFIG_LIST_sample/OAI_CONFIG_LIST_yaml_sample
 filter_dict = {"model": ["gpt-4"]}  # GPT-3.5 is less reliable than GPT-4 at learning from user feedback.
 config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
 llm_config={"config_list": config_list, "timeout": 120}

diff --git a/website/docs/topics/llm_configuration.ipynb b/website/docs/topics/llm_configuration.ipynb
@@ -116,6 +116,57 @@
     "</Tabs>\n",
     "````\n",
     "\n",
+    "There is another way to specify the `config_list` using a YAML file. This is useful when you have multiple models and want to keep the configuration separate from the code. The YAML file should be formatted as follows:\n",
+    "\n",
+    "````{=mdx}\n",
+    "import Tabs from '@theme/Tabs';\n",
+    "import TabItem from '@theme/TabItem';\n",
+    "\n",
+    "<Tabs>\n",
+    "  <TabItem value=\"openai\" label=\"OpenAI\" default>\n",
+    "    - `model` (str, required): The identifier of the model to be used, such as 'gpt-4', 'gpt-3.5-turbo'.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```yaml\n",
+    "    - model: gpt-4\n",
+    "      api_key: <your OpenAI API key here>\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "  <TabItem value=\"azureopenai\" label=\"Azure OpenAI\">\n",
+    "    - `model` (str, required): The deployment to be used. The model corresponds to the deployment name on Azure OpenAI.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `api_type`: `azure`\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `api_version` (str, optional): The version of the Azure API you wish to use.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```yaml\n",
+    "    - model: my-gpt-4-deployment\n",
+    "      api_type: azure\n",
+    "      api_key: <your Azure OpenAI API key here>\n",
+    "      base_url: https://ENDPOINT.openai.azure.com/\n",
+    "      api_version: 2024-02-15-preview\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "  <TabItem value=\"other\" label=\"Other OpenAI compatible\">\n",
+    "    - `model` (str, required): The identifier of the model to be used, such as 'llama-7B'.\n",
+    "    - `api_key` (str, optional): The API key required for authenticating requests to the model's API endpoint.\n",
+    "    - `base_url` (str, optional): The base URL of the API endpoint. This is the root address where API calls are directed.\n",
+    "    - `tags` (List[str], optional): Tags which can be used for filtering.\n",
+    "\n",
+    "    Example:\n",
+    "    ```yaml\n",
+    "    - model: llama-7B\n",
+    "      base_url: http://localhost:1234\n",
+    "    ```\n",
+    "  </TabItem>\n",
+    "</Tabs>\n",
+    "````\n",
+    "\n",
     "---\n",
     "\n",
     "````{=mdx}\n",
@@ -126,9 +177,16 @@
     "\n",
     "### `OAI_CONFIG_LIST` pattern\n",
     "\n",
-    "A common, useful pattern used is to define this `config_list` is via JSON (specified as a file or an environment variable set to a JSON-formatted string) and then use the [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json) helper function to load it:"
+    "A common, useful pattern used is to define this `config_list` is via JSON/YAML (specified as a file or an environment variable set to a JSON-formatted string) and then use the [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json) helper function to load it:"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,