Partial Prompt Updates (#3880)

2025-05-02 16:00:34 +02:00 · 2025-02-02 14:21:23 -08:00 · 2025-02-02 14:21:23 -08:00 · a067b32467
commit a067b32467
parent 9b6e51b4fe
16 changed files with 280 additions and 310 deletions
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py
@ -12,12 +12,12 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
    SubQuestionAnswerCheckUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_ANSWER_CHECK_PROMPT
 from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 def check_sub_answer(
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py
@ -16,7 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_sub_question_answer_prompt,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS
 from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@ -31,6 +30,7 @@ from onyx.chat.models import StreamStopInfo
 from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.prompts.agent_search import NO_RECOVERED_DOCS
 from onyx.utils.logger import setup_logger
 logger = setup_logger()
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py
@ -30,16 +30,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResul
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    SUB_QUESTION_ANSWER_TEMPLATE,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
@ -57,6 +47,16 @@ from onyx.chat.models import ExtendedToolResponse
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.context.search.models import InferenceSection
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py
@ -24,17 +24,15 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import (
    QuestionRetrievalResult,
 )
 from onyx.context.search.models import InferenceSection
 ### States ###
 class SubQuestionRetrievalInput(CoreState):
-    pass
+    exploratory_search_results: list[InferenceSection]
 ## Graph State
 class SubQuestionRetrievalState(
    # This includes the core state
    SubQuestionRetrievalInput,
@ -48,8 +46,6 @@ class SubQuestionRetrievalState(
    base_raw_search_result: Annotated[list[QuestionRetrievalResult], add]
-## Graph Output State - presently not used
+## Graph Output State
 class SubQuestionRetrievalOutput(TypedDict):
    log_messages: list[str]
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py
@ -22,12 +22,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
@ -38,6 +32,15 @@ from onyx.chat.models import StreamStopReason
 from onyx.chat.models import StreamType
 from onyx.chat.models import SubQuestionPiece
 from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
 from onyx.prompts.agent_search import (
    INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
 )
 from onyx.prompts.agent_search import (
    INITIAL_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.utils.logger import setup_logger
 logger = setup_logger()
 def decompose_orig_question(
@ -63,6 +66,12 @@ def decompose_orig_question(
    # Initial search to inform decomposition. Just get top 3 fits
    if perform_initial_search_decomposition:
        # Due to unfortunate state representation in LangGraph, we need here to double check that the retrieval has
        # happened prior to this point, allowing silent failure here since it is not critical for decomposition in
        # all queries.
        if not state.exploratory_search_results:
            logger.error("Initial search for decomposition failed")
        sample_doc_str = "\n\n".join(
            [
                doc.combined_content
--- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py
+++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py
@ -10,17 +10,15 @@ from onyx.agents.agent_search.deep_search.main.states import (
 from onyx.agents.agent_search.deep_search.main.states import (
    SubQuestionResultsUpdate,
 )
 from onyx.context.search.models import InferenceSection
 ### States ###
 class SubQuestionAnsweringInput(CoreState):
-    pass
+    exploratory_search_results: list[InferenceSection]
 ## Graph State
 class SubQuestionAnsweringState(
    # This includes the core state
    SubQuestionAnsweringInput,
@ -31,8 +29,6 @@ class SubQuestionAnsweringState(
    pass
-## Graph Output State - presently not used
+## Graph Output State
 class SubQuestionAnsweringOutput(TypedDict):
    log_messages: list[str]
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py
@ -10,14 +10,14 @@ from onyx.agents.agent_search.deep_search.main.states import (
 )
 from onyx.agents.agent_search.deep_search.main.states import MainState
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.chat.models import RefinedAnswerImprovement
 from onyx.prompts.agent_search import (
    INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
 )
 def compare_answers(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py
@ -20,9 +20,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    build_history_prompt,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    format_entity_term_extraction,
@ -32,6 +29,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
 from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
 from onyx.prompts.agent_search import (
    REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
 )
 from onyx.tools.models import ToolCallKickoff
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py
@ -18,19 +18,14 @@ from onyx.agents.agent_search.shared_graph_utils.models import EntityExtractionR
 from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import Relationship
 from onyx.agents.agent_search.shared_graph_utils.models import Term
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    ENTITY_TERM_EXTRACTION_PROMPT,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.configs.constants import NUM_EXPLORATORY_DOCS
 from onyx.prompts.agent_search import (
    ENTITY_TERM_EXTRACTION_PROMPT,
 )
 def extract_entities_terms(
--- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py
@ -28,16 +28,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
 from onyx.agents.agent_search.shared_graph_utils.operators import (
    dedup_inference_sections,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    dispatch_main_answer_stop_info,
 )
@ -55,6 +45,16 @@ from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
 from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
 from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
 )
 from onyx.prompts.agent_search import (
    SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
 )
 from onyx.prompts.agent_search import UNKNOWN_ANSWER
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py
@ -16,14 +16,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
    QueryExpansionUpdate,
 )
 from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    QUERY_REWRITING_PROMPT,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_langgraph_node_log_string,
 )
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.prompts.agent_search import (
    QUERY_REWRITING_PROMPT,
 )
 def expand_queries(
--- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
+++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py
@ -13,7 +13,7 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
    trim_prompt_piece,
 )
-from onyx.agents.agent_search.shared_graph_utils.prompts import (
+from onyx.prompts.agent_search import (
    DOCUMENT_VERIFICATION_PROMPT,
 )
--- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py
@ -7,8 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig
 from onyx.agents.agent_search.shared_graph_utils.models import (
    AgentPromptEnrichmentComponents,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import HISTORY_FRAMING_PROMPT
 from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_QUESTION_RAG_PROMPT
 from onyx.agents.agent_search.shared_graph_utils.utils import (
    get_persona_agent_prompt_expressions,
 )
@ -20,6 +18,8 @@ from onyx.llm.interfaces import LLMConfig
 from onyx.llm.utils import get_max_input_tokens
 from onyx.natural_language_processing.utils import get_tokenizer
 from onyx.natural_language_processing.utils import tokenizer_trim_content
 from onyx.prompts.agent_search import HISTORY_FRAMING_PROMPT
 from onyx.prompts.agent_search import SUB_QUESTION_RAG_PROMPT
 from onyx.prompts.prompt_utils import build_date_time_string
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@ -24,15 +24,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    ASSISTANT_SYSTEM_PROMPT_PERSONA,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import (
    HISTORY_CONTEXT_SUMMARY_PROMPT,
 )
 from onyx.chat.models import AnswerPacket
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
@ -56,6 +47,15 @@ from onyx.db.engine import get_session_context_manager
 from onyx.db.persona import get_persona_by_id
 from onyx.db.persona import Persona
 from onyx.llm.interfaces import LLM
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_DEFAULT,
 )
 from onyx.prompts.agent_search import (
    ASSISTANT_SYSTEM_PROMPT_PERSONA,
 )
 from onyx.prompts.agent_search import (
    HISTORY_CONTEXT_SUMMARY_PROMPT,
 )
 from onyx.tools.force import ForceUseTool
 from onyx.tools.tool_constructor import SearchToolConfig
 from onyx.tools.tool_implementations.search.search_tool import (
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@ -8,7 +8,7 @@ AGENT_DEFAULT_RERANKING_HITS = 10
 AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
 AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
 AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
-AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3
+AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
 AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
@ -59,7 +59,7 @@ AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
 AGENT_EXPLORATORY_SEARCH_RESULTS = int(
    os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
    or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
-)  # 3
+)  # 5
 AGENT_MIN_ORIG_QUESTION_DOCS = int(
    os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")
--- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
@ -1,55 +1,53 @@
 # The prompts for the agentic framework. The order follows approximately the order
 # of the actions in the graph
 # Standards
-
+SEPARATOR_LINE = "-------"
 UNKNOWN_ANSWER = "I do not have enough information to answer this question."
 NO_RECOVERED_DOCS = "No relevant information recovered"
-
+DATE_PROMPT = "Today is {date}.\n\n"
 DATE_PROMPT = """Today is {date}.\n\n"""
 SUB_CHECK_YES = "yes"
 SUB_CHECK_NO = "no"
 # Framing/Support/Template Prompts
-
+HISTORY_FRAMING_PROMPT = f"""
 HISTORY_FRAMING_PROMPT = """\n
 For more context, here is the history of the conversation so far that preceded this question:
-\n-------\n
+{SEPARATOR_LINE}
-{history}
+{{history}}
-\n-------\n\n
+{SEPARATOR_LINE}
-"""
+""".strip()
 ASSISTANT_SYSTEM_PROMPT_DEFAULT = """
 You are an assistant for question-answering tasks."""
-ASSISTANT_SYSTEM_PROMPT_PERSONA = """
+ASSISTANT_SYSTEM_PROMPT_DEFAULT = (
    """You are an assistant for question-answering tasks."""
 )
 ASSISTANT_SYSTEM_PROMPT_PERSONA = f"""
 You are an assistant for question-answering tasks. Here is more information about you:
-\n-------\n
+{SEPARATOR_LINE}
-{persona_prompt}
+{{persona_prompt}}
-\n-------\n
+{SEPARATOR_LINE}
-"""
+""".strip()
 SUB_QUESTION_ANSWER_TEMPLATE = """\n
 Sub-Question: Q{sub_question_num}\n  Sub-Question:\n  - \n{sub_question}\n  --\nAnswer:\n  -\n {sub_answer}\n\n
 """
-SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n
+
-Sub-Question: Q{sub_question_num}\n
+SUB_QUESTION_ANSWER_TEMPLATE_REFINED = f"""
 Sub-Question: Q{{sub_question_num}}\n
 Type:
-\n----\n
+{SEPARATOR_LINE}
-{sub_question_type}
+{{sub_question_type}}
-\n----\n
+{SEPARATOR_LINE}
 Sub-Question:
-\n----\n
+{SEPARATOR_LINE}
-{sub_question}
+{{sub_question}}
-\n----\n
+{SEPARATOR_LINE}
-\nAnswer:
+Answer:
-\n----\n
+{SEPARATOR_LINE}
-{sub_answer}
+{{sub_answer}}
-\n----\n
+{SEPARATOR_LINE}
-\n
+""".strip()
 """
 SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n
@ -58,230 +56,206 @@ Sub-Question: Q{sub_question_num}\n  Type: {sub_question_type}\n Sub-Question:\n
    """
-# Setap/Util Prompts
+# Step/Utility Prompts
-
+ENTITY_TERM_EXTRACTION_PROMPT = f"""
 ENTITY_TERM_EXTRACTION_PROMPT = """ \n
 Based on the original question and some context retrieved from a dataset, please generate a list of
 entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
 (e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
 \n\n
 Here is the original question:
-\n-------\n
+{SEPARATOR_LINE}
-{question}
+{{question}}
-\n-------\n
+{SEPARATOR_LINE}
 And here is the context retrieved:
-\n-------\n
+{SEPARATOR_LINE}
-{context}
+{{context}}
-\n-------\n
+{SEPARATOR_LINE}
 Please format your answer as a json object in the following format:
-
+{{
-{{"retrieved_entities_relationships": {{
+    "retrieved_entities_relationships": {{
-    "entities": [{{
+        "entities": [
-        "entity_name": <assign a name for the entity>,
+            {{
-        "entity_type": <specify a short type name for the entity, such as 'company', 'location',...>
+                "entity_name": "<assign a name for the entity>",
-    }}],
+                "entity_type": "<specify a short type name for the entity, such as 'company', 'location',...>"
-    "relationships": [{{
+            }}
-        "relationship_name": <assign a name for the relationship>,
+        ],
-        "relationship_type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>,
+        "relationships": [
-        "relationship_entities": [<related entity name 1>, <related entity name 2>, ...]
+            {{
-    }}],
+                "relationship_name": "<assign a name for the relationship>",
-    "terms": [{{
+                "relationship_type": "<specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>",
-        "term_name": <assign a name for the term>,
+                "relationship_entities": ["<related entity name 1>", "<related entity name 2>", "..."]
-        "term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>,
+            }}
-        "term_similar_to": <list terms that are similar to this term>
+        ],
-    }}]
+        "terms": [
            {{
                "term_name": "<assign a name for the term>",
                "term_type": "<specify a short type name for the term, such as 'revenue', 'market_share',...>",
                "term_similar_to": ["<list terms that are similar to this term>"]
            }}
        ]
    }}
 }}
-}}
+""".strip()
   """
 HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
 {persona_specification}
-Your task now is to summarize the key parts of the history of a conversation between a user and an agent. The
+HISTORY_CONTEXT_SUMMARY_PROMPT = (
-summary has two purposes:
+    "{persona_specification}\n\n"
-  1) providing the suitable context for a new question, and
+    "Your task now is to summarize the key parts of the history of a conversation between a user and an agent."
-  2) To capture the key information that was discussed and that the user may have a follow-up question about.
+    " The summary has two purposes:\n"
-\n-------\n
+    "  1) providing the suitable context for a new question, and\n"
-{question}
+    "  2) To capture the key information that was discussed and that the user may have a follow-up question about.\n\n"
-\n-------\n
+    "Here is the question:\n"
    f"{SEPARATOR_LINE}\n"
    "{question}\n"
    f"{SEPARATOR_LINE}\n\n"
    "And here is the history:\n"
    f"{SEPARATOR_LINE}\n"
    "{history}\n"
    f"{SEPARATOR_LINE}\n\n"
    "Please provide a summarized context from the history so that the question makes sense and can"
    " - with suitable extra information - be answered.\n\n"
    "Do not use more than three or four sentences.\n\n"
    "History summary:"
 ).strip()
 And here is the history:
 \n-------\n
 {history}
 \n-------\n
 Please provide a summarized context from the history so that the question makes sense and can - with
 suitable extra information - be answered.
 Please do not use more than three or four sentences.
 History summary:
 """
 # INITIAL PHASE
 # Sub-question
 # Intentionally left a copy in case we want to modify this one differently
 INITIAL_QUESTION_DECOMPOSITION_PROMPT = (
    "Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the"
    " original question. The purpose for this decomposition may be to:\n"
    "  1) isolate individual entities (i.e., 'compare sales of company A and company B' ->"
    " ['what are sales for company A', 'what are sales for company B'])\n"
    "  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->"
    " ['what are our sales with company A','what is our market share with company A',"
    " 'is company A a reference customer for us', etc.])\n"
    "  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you"
    " are generally familiar with the entity, then you can decompose the question into sub-questions that are more"
    " specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve"
    " scalability of product X', 'what do we do to improve stability of product X', ...])\n"
    "  4) research an area that could really help to answer the question.\n\n"
    "Here is the initial question to decompose:\n"
    f"{SEPARATOR_LINE}\n"
    "{question}\n"
    f"{SEPARATOR_LINE}\n\n"
    "{history}\n\n"
    "Please formulate your answer as a newline-separated list of questions like so:\n"
    " <sub-question>\n"
    " <sub-question>\n"
    " <sub-question>\n"
    " ...\n\n"
    "Answer:"
 ).strip()
-## Sub-question
+INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = (
    "Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the"
    " original question. The purpose for this decomposition may be to:\n"
    "  1) isolate individual entities (i.e., 'compare sales of company A and company B' ->"
    " ['what are sales for company A', 'what are sales for company B'])\n"
    "  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->"
    " ['what are our sales with company A','what is our market share with company A',"
    " 'is company A a reference customer for us', etc.])\n"
    "  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you"
    " are generally familiar with the entity, then you can decompose the question into sub-questions that are more"
    " specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve"
    " scalability of product X', 'what do we do to improve stability of product X', ...])\n"
    "  4) research an area that could really help to answer the question.\n\n"
    "To give you some context, you will see below also some documents that may relate to the question. Please only"
    " use this information to learn what the question is approximately asking about, but do not focus on the details"
    " to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may"
    " not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions!"
    " Decomposition and disambiguations are most important!\n\n"
    "Here are the sample docs to give you some context:\n"
    f"{SEPARATOR_LINE}\n"
    "{sample_doc_str}\n"
    f"{SEPARATOR_LINE}\n\n"
    "And here is the initial question to decompose:\n"
    f"{SEPARATOR_LINE}\n"
    "{question}\n"
    f"{SEPARATOR_LINE}\n\n"
    "{history}\n\n"
    "Please formulate your answer as a newline-separated list of questions like so:\n"
    " <sub-question>\n"
    " <sub-question>\n"
    " <sub-question>\n"
    " ...\n\n"
    "Answer:"
 ).strip()
 INITIAL_QUESTION_DECOMPOSITION_PROMPT = """
 If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
 answer the original question. The purpose for this decomposition may be to
  1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
     'what are sales for company B')]
  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
      'what is our market share with company A', 'is company A a reference customer for us', etc.])
  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
    familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
     (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
     'what do we do to improve stability of product X', ...])
  4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
 If you think that a decomposition is not needed or helpful, please just return an empty string. That is ok too.
 Here is the initial question:
 \n-------\n
 {question}
 \n-------\n
 {history}
 Please formulate your answer as a newline-separated list of questions like so:
 <sub-question>
 <sub-question>
 <sub-question>
 Answer:"""
 INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = """
 If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
 answer the original question. The purpose for this decomposition may be to
  1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
     'what are sales for company B')]
  2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
      'what is our market share with company A', 'is company A a reference customer for us', etc.])
  3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
    familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
     (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
     'what do we do to improve stability of product X', ...])
  4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
 Here are some other rules:
 1) To give you some context, you will see below also some documents that relate to the question. Please only
 use this information to learn what the question is approximately asking about, but do not focus on the details
 to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may
 not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! Decomposition and
 disambiguations are most important!
 2) If you think that a decomposition is not needed or helpful, please just return an empty string. That is very much ok too.
 Here are the sample docs to give you some context:
 \n-------\n
 {sample_doc_str}
 \n-------\n
 And here is the initial question that you should think about decomposing:
 \n-------\n
 {question}
 \n-------\n
 {history}
 Please formulate your answer as a newline-separated list of questions like so:
 <sub-question>
 <sub-question>
 <sub-question>
 ...
 Answer:"""
 # Retrieval
 QUERY_REWRITING_PROMPT = (
    "Please convert the initial user question into a 2-3 more appropriate short and pointed search queries for"
    " retrieval from a document store. Particularly, try to think about resolving ambiguities and make the search"
    " queries more specific, enabling the system to search more broadly.\n"
    "Also, try to make the search queries not redundant, i.e. not too similar!\n\n"
    "Here is the initial question:\n"
    f"{SEPARATOR_LINE}\n"
    "{question}\n"
    f"{SEPARATOR_LINE}\n\n"
    "Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:\n"
    "<query 1>\n"
    "<query 2>\n"
    "...\n\n"
    "Queries:"
 )
 QUERY_REWRITING_PROMPT = """ \n
 Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrivel from a
 document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
 enabling the system to search more broadly.
 Also, try to make the search queries not redundant, i.e. not too similar! \n\n
 Here is the initial question:
 \n-------\n
 {question}
 \n-------\n
 Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:
 <query 1>
 <query 2>
 ...
 queries: """
-DOCUMENT_VERIFICATION_PROMPT = """
+DOCUMENT_VERIFICATION_PROMPT = (
-You are supposed to judge whether a document text contains data or information that is potentially relevant
+    "Determine whether the following document text contains data or information that is potentially relevant "
-for a question. It does not have to be fully relevant, but check whether it has some information that
+    "for a question. It does not have to be fully relevant, but check whether it has some information that "
-would help - possibly in conjunction with other documents - to address the question.
+    "would help - possibly in conjunction with other documents - to address the question.\n\n"
    "Be careful that you do not use a document where you are not sure whether the text applies to the objects "
    "or entities that are relevant for the question. For example, a book about chess could have long passage "
    "discussing the psychology of chess without - within the passage - mentioning chess. If now a question "
    "is asked about the psychology of football, one could be tempted to use the document as it does discuss "
    "psychology in sports. However, it is NOT about football and should not be deemed relevant. Please "
    "consider this logic.\n\n"
    "DOCUMENT TEXT:\n"
    f"{SEPARATOR_LINE}\n"
    "{document_content}\n"
    f"{SEPARATOR_LINE}\n\n"
    "Do you think that this document text is useful and relevant to answer the following question?\n\n"
    "QUESTION:\n"
    f"{SEPARATOR_LINE}\n"
    "{question}\n"
    f"{SEPARATOR_LINE}\n\n"
    "Please answer with exactly and only a 'yes' or 'no':\n\n"
    "Answer:"
 ).strip()
 Be careful that you do not use a document where you are not sure whether the text applies to the objects
 or entities that are relevant for the question. For example, a book about chess could have long passage
 discussing the psychology of chess without - within the passage - mentioning chess. If now a question
 is asked about the psychology of football, one could be tempted to use the document as it does discuss
 psychology in sports. However, it is NOT about football and should not be deemed relevant. Please
 consider this logic.
 Here is a document text that you can take as a fact:
 DOCUMENT INFORMATION:
 \n-------\n
 {document_content}
 \n-------\n
 Do you think that this document text is useful and relevant to answer the following question?
 QUESTION:
 \n-------\n
 {question}
 \n-------\n
 Please answer with 'yes' or 'no':
 Answer:
 """
 # Sub-Question Anser Generation
 SUB_QUESTION_RAG_PROMPT = (
-    """ \n
+    "Use the context provided below - and only the provided context - to answer the given question. "
-{date_prompt}
+    "(Note that the answer is in service of answering a broader question, given below as 'motivation'.)\n\n"
-Use the context provided below - and only the
+    "Again, only use the provided context and do not use your internal knowledge! If you cannot answer the "
-provided context - to answer the given question. (Note that the answer is in service of answering a broader
+    f'question based on the context, say "{UNKNOWN_ANSWER}". It is a matter of life and death that you do NOT '
-question, given below as 'motivation'.)
+    "use your internal knowledge, just the provided information!\n\n"
-
+    "Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. "
-Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
+    "(But keep other details as well.)\n\n"
-question based on the context, say """
+    "It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! "
-    + f'"{UNKNOWN_ANSWER}"'
+    "It is important that the citation is close to the information it supports. "
-    + """. It is a matter of life and death that you do NOT
+    "Proper citations are very important to the user!\n\n"
-use your internal knowledge, just the provided information!
+    "For your general information, here is the ultimate motivation:\n"
-
+    f"{SEPARATOR_LINE}\n"
-Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
+    "{original_question}\n"
-(But keep other details as well.)
+    f"{SEPARATOR_LINE}\n\n"
-
+    "And here is the actual question I want you to answer based on the context above (with the motivation in mind):\n"
-It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
+    f"{SEPARATOR_LINE}\n"
-It is important that the citation is close to the information it supports.
+    "{question}\n"
-Proper citations are very important to the user!\n\n\n
+    f"{SEPARATOR_LINE}\n\n"
-
+    "Here is the context:\n"
-For your general information, here is the ultimate motivation:
+    f"{SEPARATOR_LINE}\n"
-\n--\n
+    "{context}\n"
-{original_question}
+    f"{SEPARATOR_LINE}\n\n"
-\n--\n
+    "Please keep your answer brief and concise, and focus on facts and data.\n\n"
-\n
+    "Answer:"
-And here is the actual question I want you to answer based on the context above (with the motivation in mind):
+).strip()
 \n--\n {question} \n--\n
 Here is the context:
 \n\n\n--\n {context} \n--\n
 Please keep your answer brief and concise, and focus on facts and data.
 Answer:
 """
 )
 SUB_ANSWER_CHECK_PROMPT = (
@ -301,8 +275,8 @@ Does the suggested answer address the question? Please answer with """
    + f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".'
 )
 # Initial Answer Generation
 # Initial Answer Generation
 INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS = (
    """ \n
 {persona_specification}