microsoft · D-W- · May 8, 2024 · Apr 30, 2024 · Apr 30, 2024 · Apr 30, 2024
@@ -0,0 +1,110 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: samples_flex_flows_eval_criteria_with_langchain
+on:
+  schedule:
+    - cron: "21 20 * * *" # Every day starting at 4:21 BJT
+  pull_request:
+    branches: [ main ]
+    paths: [ examples/flex-flows/eval-criteria-with-langchain/**, examples/*requirements.txt, .github/workflows/samples_flex_flows_eval_criteria_with_langchain.yml ]
+  workflow_dispatch:
+
+env:
+  IS_IN_CI_PIPELINE: "true"
+
+jobs:
+  samples_flex_flows_eval_criteria_with_langchain:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Setup Python 3.9 environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+      - name: Prepare requirements
+        working-directory: examples
+        run: |
+          if [[ -e requirements.txt ]]; then
+            python -m pip install --upgrade pip
+            pip install -r requirements.txt
+          fi
+      - name: Prepare dev requirements
+        working-directory: examples
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r dev_requirements.txt
+      - name: Refine .env file
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          AOAI_API_KEY=${{ secrets.AOAI_API_KEY_TEST }}
+          AOAI_API_ENDPOINT=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          AOAI_API_ENDPOINT=$(echo ${AOAI_API_ENDPOINT//\//\\/})
+          if [[ -e .env.example ]]; then
+            echo "env replacement"
+            sed -i -e "s/<your_AOAI_key>/$AOAI_API_KEY/g" -e "s/<your_AOAI_endpoint>/$AOAI_API_ENDPOINT/g" .env.example
+            mv .env.example .env
+          fi
+          if [[ -e ../.env.example ]]; then
+            echo "env replacement"
+            sed -i -e "s/<your_AOAI_key>/$AOAI_API_KEY/g" -e "s/<your_AOAI_endpoint>/$AOAI_API_ENDPOINT/g" ../.env.example
+            mv ../.env.example ../.env
+          fi
+      - name: Create run.yml
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          gpt_base=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          gpt_base=$(echo ${gpt_base//\//\\/})
+          if [[ -e run.yml ]]; then
+            sed -i -e "s/\${azure_open_ai_connection.api_key}/${{ secrets.AOAI_API_KEY_TEST }}/g" -e "s/\${azure_open_ai_connection.api_base}/$gpt_base/g" run.yml
+          fi
+      - name: Azure Login
+        uses: azure/login@v1
+        with:
+          creds: ${{ secrets.AZURE_CREDENTIALS }}
+      - name: Extract Steps examples/flex-flows/eval-criteria-with-langchain/README.md
+        working-directory: ${{ github.workspace }}
+        run: |
+          python scripts/readme/extract_steps_from_readme.py -f examples/flex-flows/eval-criteria-with-langchain/README.md -o examples/flex-flows/eval-criteria-with-langchain
+      - name: Cat script
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          cat bash_script.sh
+      - name: Run scripts against canary workspace (scheduled runs only)
+        if: github.event_name == 'schedule'
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          export aoai_api_key=${{secrets.AOAI_API_KEY_TEST }}
+          export aoai_api_endpoint=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          export AZURE_OPENAI_API_KEY=${{secrets.AOAI_API_KEY_TEST }}
+          export AZURE_OPENAI_ENDPOINT=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          export test_workspace_sub_id=${{ secrets.TEST_WORKSPACE_SUB_ID }}
+          export test_workspace_rg=${{ secrets.TEST_WORKSPACE_RG }}
+          export test_workspace_name=${{ secrets.TEST_WORKSPACE_NAME_CANARY }}
+          bash bash_script.sh
+      - name: Run scripts against production workspace
+        if: github.event_name != 'schedule'
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          export aoai_api_key=${{secrets.AOAI_API_KEY_TEST }}
+          export aoai_api_endpoint=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          export AZURE_OPENAI_API_KEY=${{secrets.AOAI_API_KEY_TEST }}
+          export AZURE_OPENAI_ENDPOINT=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          export test_workspace_sub_id=${{ secrets.TEST_WORKSPACE_SUB_ID }}
+          export test_workspace_rg=${{ secrets.TEST_WORKSPACE_RG }}
+          export test_workspace_name=${{ secrets.TEST_WORKSPACE_NAME_PROD }}
+          bash bash_script.sh
+      - name: Pip List for Debug
+        if : ${{ always() }}
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          pip list
+      - name: Upload artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: artifact
+          path: examples/flex-flows/eval-criteria-with-langchain/bash_script.sh
@@ -0,0 +1,64 @@
+# This code is autogenerated.
+# Code is generated by running custom script: python3 readme.py
+# Any manual changes to this file may cause incorrect behavior.
+# Any manual changes will be overwritten if the code is regenerated.
+
+name: samples_flexflows_evalcriteriawithlangchain_langchaineval
+on:
+  schedule:
+    - cron: "42 20 * * *" # Every day starting at 4:42 BJT
+  pull_request:
+    branches: [ main ]
+    paths: [ examples/flex-flows/eval-criteria-with-langchain/**, examples/*requirements.txt, .github/workflows/samples_flexflows_evalcriteriawithlangchain_langchaineval.yml ]
+  workflow_dispatch:
+
+env:
+  IS_IN_CI_PIPELINE: "true"
+
+jobs:
+  samples_flexflows_evalcriteriawithlangchain_langchaineval:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Azure Login
+        uses: azure/login@v1
+        with:
+          creds: ${{ secrets.AZURE_CREDENTIALS }}
+      - name: Setup Python 3.9 environment
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.9"
+      - name: Prepare requirements
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r ${{ github.workspace }}/examples/requirements.txt
+          pip install -r ${{ github.workspace }}/examples/dev_requirements.txt
+      - name: setup .env file
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          AOAI_API_KEY=${{ secrets.AOAI_API_KEY_TEST }}
+          AOAI_API_ENDPOINT=${{ secrets.AOAI_API_ENDPOINT_TEST }}
+          AOAI_API_ENDPOINT=$(echo ${AOAI_API_ENDPOINT//\//\\/})
+          if [[ -e .env.example ]]; then
+            echo "env replacement"
+            sed -i -e "s/<your_AOAI_key>/$AOAI_API_KEY/g" -e "s/<your_AOAI_endpoint>/$AOAI_API_ENDPOINT/g" .env.example
+            mv .env.example .env
+          fi
+          if [[ -e ../.env.example ]]; then
+            echo "env replacement"
+            sed -i -e "s/<your_AOAI_key>/$AOAI_API_KEY/g" -e "s/<your_AOAI_endpoint>/$AOAI_API_ENDPOINT/g" ../.env.example
+            mv ../.env.example ../.env
+          fi
+      - name: Create Aoai Connection
+        run: pf connection create -f ${{ github.workspace }}/examples/connections/azure_openai.yml --set api_key="${{ secrets.AOAI_API_KEY_TEST }}" api_base="${{ secrets.AOAI_API_ENDPOINT_TEST }}"
+      - name: Test Notebook
+        working-directory: examples/flex-flows/eval-criteria-with-langchain
+        run: |
+          papermill -k python langchain-eval.ipynb langchain-eval.output.ipynb
+      - name: Upload artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@v3
+        with:
+          name: artifact
+          path: examples/flex-flows/eval-criteria-with-langchain
@@ -56,6 +56,7 @@
 | [chat-stream](flex-flows/chat-stream/README.md) | [![samples_flex_flows_chat_stream](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_chat_stream.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_chat_stream.yml) |  A chat flow defined using class entry that return output in stream mode |
 | [eval-checklist](flex-flows/eval-checklist/README.md) | [![samples_flex_flows_eval_checklist](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_checklist.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_checklist.yml) |  A example flow defined using class entry which demos how to evaluate the answer pass user specified check list |
 | [eval-code-quality](flex-flows/eval-code-quality/README.md) | [![samples_flex_flows_eval_code_quality](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_code_quality.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_code_quality.yml) |  A example flow defined using class based entry which leverages model config to evaluate the quality of code snippet |
+| [eval-criteria-with-langchain](flex-flows/eval-criteria-with-langchain/README.md) | [![samples_flex_flows_eval_criteria_with_langchain](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_criteria_with_langchain.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flex_flows_eval_criteria_with_langchain.yml) |  A example flow of converting LangChain criteria evaluator application to flex flow |
 
 
 ### Flows ([flows](flows))
@@ -146,6 +147,7 @@
 | [chat-with-class-based-flow-azure.ipynb](flex-flows/chat-basic/chat-with-class-based-flow-azure.ipynb) | [![samples_flexflows_chatbasic_chatwithclassbasedflowazure](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatbasic_chatwithclassbasedflowazure.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatbasic_chatwithclassbasedflowazure.yml) | A quickstart tutorial to run a class based flex flow and evaluate it in azure. |
 | [chat-with-class-based-flow.ipynb](flex-flows/chat-basic/chat-with-class-based-flow.ipynb) | [![samples_flexflows_chatbasic_chatwithclassbasedflow](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatbasic_chatwithclassbasedflow.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatbasic_chatwithclassbasedflow.yml) | A quickstart tutorial to run a class based flex flow and evaluate it. |
 | [chat-stream-with-flex-flow.ipynb](flex-flows/chat-stream/chat-stream-with-flex-flow.ipynb) | [![samples_flexflows_chatstream_chatstreamwithflexflow](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatstream_chatstreamwithflexflow.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_chatstream_chatstreamwithflexflow.yml) | A quickstart tutorial to run a class based flex flow in stream mode and evaluate it. |
+| [langchain-eval.ipynb](flex-flows/eval-criteria-with-langchain/langchain-eval.ipynb) | [![samples_flexflows_evalcriteriawithlangchain_langchaineval](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_evalcriteriawithlangchain_langchaineval.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_flexflows_evalcriteriawithlangchain_langchaineval.yml) | A tutorial to converting LangChain criteria evaluator application to flex flow. |
 | [prompty-quickstart.ipynb](prompty/basic/prompty-quickstart.ipynb) | [![samples_prompty_basic_promptyquickstart](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_basic_promptyquickstart.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_basic_promptyquickstart.yml) | A quickstart tutorial to run a prompty and evaluate it. |
 | [chat-with-prompty.ipynb](prompty/chat-basic/chat-with-prompty.ipynb) | [![samples_prompty_chatbasic_chatwithprompty](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_chatbasic_chatwithprompty.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_chatbasic_chatwithprompty.yml) | A quickstart tutorial to run a chat prompty and evaluate it. |
 | [prompty-output-format.ipynb](prompty/format-output/prompty-output-format.ipynb) | [![samples_prompty_formatoutput_promptyoutputformat](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_formatoutput_promptyoutputformat.yml/badge.svg?branch=main)](https://github.com/microsoft/promptflow/actions/workflows/samples_prompty_formatoutput_promptyoutputformat.yml) |  |

@@ -0,0 +1,88 @@
+# Eval Conciseness Criteria with LangChain
+
+A example flow of converting [LangChain criteria evaluator](https://python.langchain.com/docs/guides/productionization/evaluation/string/criteria_eval_chain/) application to flex flow.
+
+## Prerequisites
+
+Install promptflow sdk and other dependencies:
+```bash
+pip install -r requirements.txt
+```
+
+## Run flow
+
+- Prepare your Azure Open AI resource follow this [instruction](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal) and get your `api_key` if you don't have one.
+- Or prepare your Anthropic resource follow this [instruction](https://python.langchain.com/docs/integrations/platforms/anthropic/) and get your `api_key` if you don't have one.
+
+- Setup connection
+
+Go to "Prompt flow" "Connections" tab. Click on "Create" button, select one of LLM tool supported connection types and fill in the configurations.
+
+Or use CLI to create connection:
+
+```bash
+# Override keys with --set to avoid yaml file changes
+pf connection create --file ../../connections/custom.yml --set secrets.openai_api_key=<your_api_key> secrets.azure_endpoint=<your_api_base> --name my_llm_connection
+```
+
+Note in [flow.flex.yaml](flow.flex.yaml) we are using connection named `my_llm_connection`.
+```bash
+# show registered connection
+pf connection show --name my_llm_connection
+```
+
+- Run as normal Python file
+```bash
+python eval_conciseness.py
+```
+
+- Test flow
+```bash
+pf flow test --flow . --inputs input="What's 2+2?" prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four." --init custom_connection=my_llm_connection
+```
+
+- Create run with multiple lines data
+
+```bash
+pf run create --flow . --data ./data.jsonl --init custom_connection=my_llm_connection --stream
+```
+
+Reference [here](https://aka.ms/pf/column-mapping) for default behavior when `column-mapping` not provided in CLI.
+
+- List and show run meta
+
+```bash
+# list created run
+pf run list
+
+# get a sample run name
+
+name=$(pf run list -r 10 | jq '.[] | select(.name | contains("eval_criteria_with_langchain_")) | .name'| head -n 1 | tr -d '"')
+# show specific run detail
+pf run show --name $name
+
+# show output
+pf run show-details --name $name
+
+# show metrics
+pf run show-metrics --name $name
+
+# visualize run in browser
+pf run visualize --name $name
+```
+
+## Run flow in cloud
+
+- Assume we already have a connection named `open_ai_connection` in workspace.
+
+```bash
+# set default workspace
+az account set -s <your_subscription_id>
+az configure --defaults group=<your_resource_group_name> workspace=<your_workspace_name>
+```
+
+- Create run
+
+```bash
+# run with environment variable reference connection in azureml workspace
+pfazure run create --flow . --init init.json --data ./data.jsonl --stream
@@ -0,0 +1,2 @@
+{"input": "What's 2+2?",  "prediction": "What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four."}
+{"input": "What's 2x2?",  "prediction": "What's 2x2? That's an elementary question. The answer you're looking for is that two and two is four."}
@@ -0,0 +1,65 @@
+from dataclasses import dataclass
+
+from langchain.evaluation import load_evaluator
+from langchain_community.chat_models import AzureChatOpenAI, ChatAnthropic
+
+from promptflow.client import PFClient
+from promptflow.connections import CustomConnection
+from promptflow.tracing import trace
+
+
+@dataclass
+class Result:
+    reasoning: str
+    value: str
+    score: float
+
+
+class LangChainEvaluator:
+    def __init__(self, custom_connection: CustomConnection):
+        self.custom_connection = custom_connection
+
+        # create llm according to the secrets in custom connection
+        if "anthropic_api_key" in self.custom_connection.secrets:
+            self.llm = ChatAnthropic(
+                temperature=0, anthropic_api_key=self.custom_connection.secrets["anthropic_api_key"]
+            )
+        elif "openai_api_key" in self.custom_connection.secrets:
+            self.llm = AzureChatOpenAI(
+                deployment_name="gpt-35-turbo",
+                openai_api_key=self.custom_connection.secrets["openai_api_key"],
+                azure_endpoint=self.custom_connection.secrets["azure_endpoint"],
+                openai_api_type="azure",
+                openai_api_version="2023-07-01-preview",
+                temperature=0,
+            )
+        else:
+            raise ValueError("No valid API key found in the connection.")
+        # evaluate with langchain evaluator for conciseness
+        self.evaluator = load_evaluator("criteria", llm=self.llm, criteria="conciseness")
+
+    @trace
+    def __call__(
+        self,
+        input: str,
+        prediction: str,
+    ) -> Result:
+        """Evaluate with langchain evaluator."""
+
+        eval_result = self.evaluator.evaluate_strings(prediction=prediction, input=input)
+        return Result(**eval_result)
+
+
+if __name__ == "__main__":
+    from promptflow.tracing import start_trace
+
+    start_trace()
+    pf = PFClient()
+    connection = pf.connections.get(name="my_llm_connection", with_secrets=True)
+    evaluator = LangChainEvaluator(custom_connection=connection)
+    result = evaluator(
+        prediction="What's 2+2? That's an elementary question. "
+        "The answer you're looking for is that two and two is four.",
+        input="What's 2+2?",
+    )
+    print(result)
@@ -0,0 +1,5 @@
+$schema: https://azuremlschemas.azureedge.net/promptflow/latest/Flow.schema.json
+entry: eval_conciseness:LangChainEvaluator
+environment:
+  # image: mcr.microsoft.com/azureml/promptflow/promptflow-python
+  python_requirements_txt: requirements.txt