Upgrade gpt-4-32k to GPT-4-Turbo (128K context length) for cost red…

…uction (#313) ### Description - it is 6 times cheaper to use gpt-4 turbo the newer model with 128k context length than gpt-4-32k - it is also more performant - Changed model temperature as well since high temperature does not make sense. We do not need extract entropy or "creativity" in the response for RAG apps closes #314
astronomer · Mar 7, 2024 · 7211a5e · 7211a5e
1 parent cc76b19
commit 7211a5e
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/api/ask_astro/settings.py b/api/ask_astro/settings.py
@@ -15,10 +15,10 @@
 
 # Environment variables for ConversationalRetrievalChain Load QA Chain
 CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_TEMPERATURE = float(
-    os.environ.get("CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_TEMPERATURE", "0.5")
+    os.environ.get("CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_TEMPERATURE", "0.3")
 )
 CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_DEPLOYMENT_NAME = os.environ.get(
-    "CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_DEPLOYMENT_NAME", "gpt-4-32k"
+    "CONVERSATIONAL_RETRIEVAL_LOAD_QA_CHAIN_DEPLOYMENT_NAME", "gpt-4-128k"
 )
 
 SHOW_SERVICE_MAINTENANCE_BANNER = os.environ.get("SHOW_SERVICE_MAINTENANCE_BANNER", "False").upper() == "TRUE"