From 3f6de7968a96760309a50069faf87cb2a248db47 Mon Sep 17 00:00:00 2001
From: Evan Lohn <evan@danswer.ai>
Date: Mon, 3 Feb 2025 10:30:51 -0800
Subject: [PATCH] prompt improvements for wekaer models

---
 .../deep_search/main/nodes/extract_entities_terms.py      | 8 ++++++--
 backend/onyx/prompts/agent_search.py                      | 6 +++++-
 2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
index 67ce6877917..dbb4a6fd86c 100644
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@@ -1,4 +1,3 @@
-import re
 from datetime import datetime
 from typing import cast
 
@@ -80,7 +79,12 @@ def extract_entities_terms(
         prompt=msg,
     )
 
-    cleaned_response = re.sub(r"```json\n|\n```", "", str(llm_response.content))
+    cleaned_response = (
+        str(llm_response.content).replace("```json\n", "").replace("\n```", "")
+    )
+    first_bracket = cleaned_response.find("{")
+    last_bracket = cleaned_response.rfind("}")
+    cleaned_response = cleaned_response[first_bracket : last_bracket + 1]
 
     try:
         entity_extraction_result = EntityExtractionResult.model_validate_json(
diff --git a/backend/onyx/prompts/agent_search.py b/backend/onyx/prompts/agent_search.py
index 072a25581d2..12236b79b49 100644
--- a/backend/onyx/prompts/agent_search.py
+++ b/backend/onyx/prompts/agent_search.py
@@ -139,6 +139,7 @@ INITIAL_QUESTION_DECOMPOSITION_PROMPT = (
     "{question}\n"
     f"{SEPARATOR_LINE}\n\n"
     "{history}\n\n"
+    "Do NOT include any text in your answer outside of the list of sub-questions!"
     "Please formulate your answer as a newline-separated list of questions like so:\n"
     " <sub-question>\n"
     " <sub-question>\n"
@@ -148,6 +149,7 @@ INITIAL_QUESTION_DECOMPOSITION_PROMPT = (
 ).strip()
 
 
+# TODO: combine shared pieces with INITIAL_QUESTION_DECOMPOSITION_PROMPT
 INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = (
     "Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the"
     " original question. The purpose for this decomposition may be to:\n"
@@ -175,6 +177,7 @@ INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = (
     "{question}\n"
     f"{SEPARATOR_LINE}\n\n"
     "{history}\n\n"
+    "Do NOT include any text in your answer outside of the list of sub-questions!"
     "Please formulate your answer as a newline-separated list of questions like so:\n"
     " <sub-question>\n"
     " <sub-question>\n"
@@ -194,6 +197,7 @@ QUERY_REWRITING_PROMPT = (
     f"{SEPARATOR_LINE}\n"
     "{question}\n"
     f"{SEPARATOR_LINE}\n\n"
+    "Do NOT include any text in your answer outside of the list of queries!"
     "Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:\n"
     "<query 1>\n"
     "<query 2>\n"
@@ -221,7 +225,7 @@ DOCUMENT_VERIFICATION_PROMPT = (
     f"{SEPARATOR_LINE}\n"
     "{question}\n"
     f"{SEPARATOR_LINE}\n\n"
-    "Please answer with exactly and only a 'yes' or 'no':\n\n"
+    "Please answer with exactly and only a 'yes' or 'no'. Do NOT include any other text in your response:\n\n"
     "Answer:"
 ).strip()