From f2aeeb7b3c98157fea06a5b1c1a809f865c3dfe3 Mon Sep 17 00:00:00 2001
From: joachim-danswer <joachim@danswer.ai>
Date: Mon, 27 Jan 2025 14:02:19 -0800
Subject: [PATCH] Optimizations: docs for context & history

 - summarize history if long
- introduced cited_docs from SQ as those must be provided to answer generations
- limit number of docs

TODO: same for refined flow
---
 .../nodes/answer_generation.py                |  7 +-
 .../nodes/format_answer.py                    |  1 +
 .../answer_initial_sub_question/states.py     |  1 +
 .../nodes/generate_initial_answer.py          | 26 +++++-
 .../ingest_initial_sub_question_answers.py    |  4 +-
 .../agent_search/deep_search_a/main/states.py |  1 +
 .../agent_search/shared_graph_utils/models.py |  1 +
 .../shared_graph_utils/prompts.py             | 82 ++++++++++++-------
 .../agent_search/shared_graph_utils/utils.py  | 25 ++++++
 backend/onyx/configs/agent_configs.py         | 33 ++++++++
 10 files changed, 148 insertions(+), 33 deletions(-)

diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py
index 930f0cc82..de26d2cdf 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py
@@ -23,6 +23,7 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import (
     ASSISTANT_SYSTEM_PROMPT_PERSONA,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS
+from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.chat.models import AgentAnswerPiece
@@ -71,7 +72,7 @@ def answer_generation(
         msg = build_sub_question_answer_prompt(
             question=question,
             original_question=agent_search_config.search_request.query,
-            docs=docs,
+            docs=context_docs,
             persona_specification=persona_specification,
             config=fast_llm.config,
         )
@@ -99,6 +100,9 @@ def answer_generation(
 
         answer_str = merge_message_runs(response, chunk_separator="")[0].content
 
+    answer_citation_ids = get_answer_citation_ids(answer_str)
+    cited_docs = [context_docs[id] for id in answer_citation_ids]
+
     stop_event = StreamStopInfo(
         stop_reason=StreamStopReason.FINISHED,
         stream_type="sub_answer",
@@ -110,6 +114,7 @@ def answer_generation(
     now_end = datetime.now()
     return QAGenerationUpdate(
         answer=answer_str,
+        cited_docs=cited_docs,
         log_messages=[
             f"{now_start} -- Answer generation SQ-{level} - Q{question_nr} - Time taken: {now_end - now_start}"
         ],
diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py
index 454953836..aa97b4d2b 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py
@@ -22,6 +22,7 @@ def format_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
                 expanded_retrieval_results=state.expanded_retrieval_results,
                 documents=state.documents,
                 context_documents=state.context_documents,
+                cited_docs=state.cited_docs,
                 sub_question_retrieval_stats=state.sub_question_retrieval_stats,
             )
         ],
diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py
index fff26b70c..4c03aa43f 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py
@@ -24,6 +24,7 @@ class QACheckUpdate(BaseModel):
 class QAGenerationUpdate(BaseModel):
     answer: str = ""
     log_messages: list[str] = []
+    cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
     # answer_stat: AnswerStats
 
 
diff --git a/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py b/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py
index d40829142..662c2c8f2 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py
@@ -52,8 +52,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH
+from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.context.search.models import InferenceSection
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
 
 
@@ -69,14 +74,29 @@ def generate_initial_answer(
     persona_prompt = get_persona_prompt(agent_a_config.search_request.persona)
 
     history = build_history_prompt(agent_a_config.prompt_builder)
+
     date_str = get_today_prompt()
 
     sub_question_docs = state.context_documents
+    sub_questions_cited_docs = state.cited_docs
     all_original_question_documents = state.all_original_question_documents
 
+    consolidated_context_docs: list[InferenceSection] = []
+    counter = 0
+    for original_doc_number, original_doc in enumerate(all_original_question_documents):
+        if original_doc_number not in sub_questions_cited_docs:
+            if (
+                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
+                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
+            ):
+                consolidated_context_docs.append(original_doc)
+                counter += 1
+
+    # sort docs by their scores - though the scores refer to different questions
     relevant_docs = dedup_inference_sections(
-        sub_question_docs, all_original_question_documents
+        consolidated_context_docs, consolidated_context_docs
     )
+
     decomp_questions = []
 
     # Use the query info from the base document retrieval
@@ -171,6 +191,10 @@ def generate_initial_answer(
 
         model = agent_a_config.fast_llm
 
+        # summarize the history iff too long
+        if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH:
+            history = summarize_history(history, question, persona_specification, model)
+
         doc_context = format_docs(relevant_docs)
         doc_context = trim_prompt_piece(
             model.config,
diff --git a/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py b/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py
index e8b937f5f..1ba2755ad 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py
@@ -18,11 +18,12 @@ def ingest_initial_sub_question_answers(
     logger.debug(f"--------{now_start}--------INGEST ANSWERS---")
     documents = []
     context_documents = []
+    cited_docs = []
     answer_results = state.answer_results if hasattr(state, "answer_results") else []
     for answer_result in answer_results:
         documents.extend(answer_result.documents)
         context_documents.extend(answer_result.context_documents)
-
+        cited_docs.extend(answer_result.cited_docs)
     now_end = datetime.now()
 
     logger.debug(
@@ -34,6 +35,7 @@ def ingest_initial_sub_question_answers(
         # so we might not need to dedup here
         documents=dedup_inference_sections(documents, []),
         context_documents=dedup_inference_sections(context_documents, []),
+        cited_docs=dedup_inference_sections(cited_docs, []),
         decomp_answer_results=answer_results,
         log_messages=[
             f"{now_start} -- Main - Ingest initial processed sub questions,  Time taken: {now_end - now_start}"
diff --git a/backend/onyx/agents/agent_search/deep_search_a/main/states.py b/backend/onyx/agents/agent_search/deep_search_a/main/states.py
index c55a6ceca..dc10e8fc8 100644
--- a/backend/onyx/agents/agent_search/deep_search_a/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/main/states.py
@@ -98,6 +98,7 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
 class DecompAnswersUpdate(LoggerUpdate):
     documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
     context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
     decomp_answer_results: Annotated[
         list[QuestionAnswerResults], dedup_question_answer_results
     ] = []
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/models.py b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
index bae6191e6..10ec49172 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@@ -103,6 +103,7 @@ class QuestionAnswerResults(BaseModel):
     expanded_retrieval_results: list[QueryResult]
     documents: list[InferenceSection]
     context_documents: list[InferenceSection]
+    cited_docs: list[InferenceSection]
     sub_question_retrieval_stats: AgentChunkStats
 
 
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
index b91e468a9..bbee25bd0 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
@@ -65,36 +65,36 @@ BASE_RAG_PROMPT = (
 
 BASE_RAG_PROMPT_v2 = (
     """ \n
-    {date_prompt}
-    Use the context provided below - and only the
-    provided context - to answer the given question. (Note that the answer is in service of answering a broader
-    question, given below as 'motivation'.)
+{date_prompt}
+Use the context provided below - and only the
+provided context - to answer the given question. (Note that the answer is in service of answering a broader
+question, given below as 'motivation'.)
 
-    Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
-    question based on the context, say """
+Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
+question based on the context, say """
     + f'"{UNKNOWN_ANSWER}"'
     + """. It is a matter of life and death that you do NOT
-    use your internal knowledge, just the provided information!
+use your internal knowledge, just the provided information!
 
-    Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
-    (But keep other details as well.)
+Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
+(But keep other details as well.)
 
-    Please remember to provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
-     It is important that the citation is close to the information it supports.
-    Proper citations are very important to the user!\n\n\n
+It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is important that the citation is close to the information it supports.
+Proper citations are very important to the user!\n\n\n
 
-    For your general information, here is the ultimate motivation:
-    \n--\n {original_question} \n--\n
-    \n\n
-    And here is the actual question I want you to answer based on the context above (with the motivation in mind):
-    \n--\n {question} \n--\n
+For your general information, here is the ultimate motivation:
+\n--\n {original_question} \n--\n
+\n\n
+And here is the actual question I want you to answer based on the context above (with the motivation in mind):
+\n--\n {question} \n--\n
 
-    Here is the context:
-    \n\n\n--\n {context} \n--\n
-    Please keep your answer brief and concise, and focus on facts and data.
+Here is the context:
+\n\n\n--\n {context} \n--\n
+Please keep your answer brief and concise, and focus on facts and data.
 
-    Answer:
-    """
+Answer:
+"""
 )
 
 SUB_CHECK_YES = "yes"
@@ -759,7 +759,7 @@ IMPORTANT RULES:
  - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
  - The answers to the subquestions should help you to structure your thoughts in order to answer the question.
 
-Please provide inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
+It is critical that you provide proper inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
 It is important that the citation is close to the information it supports.  If you have multiple citations,
 please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to cite documents in addition
 to the sub-questions! Proper citations are important for the final answer to be verifiable! \n\n\n
@@ -810,9 +810,9 @@ answer
 
 {history}
 
-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.! It is important that the citation
-is close to the information it supports. If you have multiple citations that support a fact, please cite for example
-as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
+It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
+It is important that the citation is close to the information it supports. If you have multiple citations that support
+a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
 Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
 citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
 Again, please NEVER cite sub-questions without a document citation!
@@ -872,8 +872,8 @@ IMPORTANT RULES:
 
 Again, you should be sure that the answer is supported by the information provided!
 
-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!  It is important that the citation
-is close to the information it supports. If you have multiple
+It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the
 user!
 
@@ -909,7 +909,7 @@ the main question. Note that the sub-questions have a type, 'initial' and 'revis
     3) a number of documents that were deemed relevant for the question. This the is the context that you use largey for
 citations (see below).
 
-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
  It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
 Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
@@ -974,7 +974,7 @@ The information provided below consists of:
     1) an initial answer that was given but found to be lacking in some way.
     2) a number of documents that were also deemed relevant for the question.
 
-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is critical that you provide proper] inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
  It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user!\n\n
 
@@ -1084,3 +1084,25 @@ With these criteria in mind, is the refined answer substantially better than the
 
 Please answer with a simple 'yes' or 'no'.
 """
+HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
+{persona_specification}
+You need to summarize the key parts of the history of a conversation between a user and an agent
+strictly for the purposed of providing the suitable context for a question.
+
+Here is the question:
+\n--\n
+{question}
+\n--\n
+
+And here is the history:
+\n--\n
+{history}
+\n--\n
+
+Please provide a summarized context from the history so that the question makes sense and can - with
+suitable extra information - be answered.
+
+Please do not use more than three or four sentences.
+
+Context summary:
+"""
diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
index 2cd71488b..1697daa7c 100644
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@@ -20,6 +20,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
     EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import DATE_PROMPT
+from onyx.agents.agent_search.shared_graph_utils.prompts import (
+    HISTORY_CONTEXT_SUMMARY_PROMPT,
+)
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
 from onyx.chat.models import DocumentPruningConfig
@@ -325,3 +328,25 @@ def retrieve_search_docs(
                 break
 
     return retrieved_docs
+
+
+def get_answer_citation_ids(answer_str: str) -> list[int]:
+    citation_ids = re.findall(r"\[\[D(\d+)\]\]", answer_str)
+    return list(set([(int(id) - 1) for id in citation_ids]))
+
+
+def summarize_history(
+    history: str, question: str, persona_specification: str, model: LLM
+) -> str:
+    history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format(
+        persona_specification=persona_specification, question=question, history=history
+    )
+
+    history_response = model.invoke(history_context_prompt)
+
+    if isinstance(history_response.content, str):
+        history_context_response_str = history_response.content
+    else:
+        history_context_response_str = ""
+
+    return history_context_response_str
diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py
index 0e8ad1d6a..25f78e52f 100644
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@@ -92,3 +92,36 @@ except ValueError:
     raise ValueError(
         f"AGENT_EXPLORATORY_SEARCH_RESULTS must be an integer, got {AGENT_EXPLORATORY_SEARCH_RESULTS_OS}"
     )
+
+AGENT_MIN_ORIG_QUESTION_DOCS_OS: int | str = os.environ.get(
+    "AGENT_MIN_ORIG_QUESTION_DOCS", "5"
+)
+
+try:
+    AGENT_MIN_ORIG_QUESTION_DOCS = int(AGENT_MIN_ORIG_QUESTION_DOCS_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MIN_ORIG_QUESTION_DOCS must be an integer, got {AGENT_MIN_ORIG_QUESTION_DOCS_OS}"
+    )
+
+AGENT_MAX_ANSWER_CONTEXT_DOCS_OS: int | str = os.environ.get(
+    "AGENT_MAX_ANSWER_CONTEXT_DOCS", "30"
+)
+
+try:
+    AGENT_MAX_ANSWER_CONTEXT_DOCS = int(AGENT_MAX_ANSWER_CONTEXT_DOCS_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MAX_ANSWER_CONTEXT_DOCS must be an integer, got {AGENT_MAX_ANSWER_CONTEXT_DOCS_OS}"
+    )
+
+AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS: int | str = os.environ.get(
+    "AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "100"
+)
+
+try:
+    AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH must be an integer, got {AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS}"
+    )