Optimizations: docs for context & history

- summarize history if long - introduced cited_docs from SQ as those must be provided to answer generations - limit number of docs TODO: same for refined flow
2025-06-01 10:40:47 +02:00 · 2025-01-27 14:02:19 -08:00 · 2025-01-27 14:02:19 -08:00 · f2aeeb7b3c
commit f2aeeb7b3c
parent 110c9f7e1b
10 changed files with 148 additions and 33 deletions
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py
@ -23,6 +23,7 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import (
    ASSISTANT_SYSTEM_PROMPT_PERSONA,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS
+from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
 from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
 from onyx.chat.models import AgentAnswerPiece
@ -71,7 +72,7 @@ def answer_generation(
        msg = build_sub_question_answer_prompt(
            question=question,
            original_question=agent_search_config.search_request.query,
-            docs=docs,
+            docs=context_docs,
            persona_specification=persona_specification,
            config=fast_llm.config,
        )
@ -99,6 +100,9 @@ def answer_generation(

        answer_str = merge_message_runs(response, chunk_separator="")[0].content

+    answer_citation_ids = get_answer_citation_ids(answer_str)
+    cited_docs = [context_docs[id] for id in answer_citation_ids]
+
    stop_event = StreamStopInfo(
        stop_reason=StreamStopReason.FINISHED,
        stream_type="sub_answer",
@ -110,6 +114,7 @@ def answer_generation(
    now_end = datetime.now()
    return QAGenerationUpdate(
        answer=answer_str,
+        cited_docs=cited_docs,
        log_messages=[
            f"{now_start} -- Answer generation SQ-{level} - Q{question_nr} - Time taken: {now_end - now_start}"
        ],
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py
@ -22,6 +22,7 @@ def format_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
                expanded_retrieval_results=state.expanded_retrieval_results,
                documents=state.documents,
                context_documents=state.context_documents,
+                cited_docs=state.cited_docs,
                sub_question_retrieval_stats=state.sub_question_retrieval_stats,
            )
        ],
--- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py
@ -24,6 +24,7 @@ class QACheckUpdate(BaseModel):
 class QAGenerationUpdate(BaseModel):
    answer: str = ""
    log_messages: list[str] = []
+    cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
    # answer_stat: AnswerStats


--- a/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py
@ -52,8 +52,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
 from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
 from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
+from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
 from onyx.chat.models import AgentAnswerPiece
 from onyx.chat.models import ExtendedToolResponse
+from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
+from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH
+from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
+from onyx.context.search.models import InferenceSection
 from onyx.tools.tool_implementations.search.search_tool import yield_search_responses


@ -69,14 +74,29 @@ def generate_initial_answer(
    persona_prompt = get_persona_prompt(agent_a_config.search_request.persona)

    history = build_history_prompt(agent_a_config.prompt_builder)
+
    date_str = get_today_prompt()

    sub_question_docs = state.context_documents
+    sub_questions_cited_docs = state.cited_docs
    all_original_question_documents = state.all_original_question_documents

+    consolidated_context_docs: list[InferenceSection] = []
+    counter = 0
+    for original_doc_number, original_doc in enumerate(all_original_question_documents):
+        if original_doc_number not in sub_questions_cited_docs:
+            if (
+                counter <= AGENT_MIN_ORIG_QUESTION_DOCS
+                or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
+            ):
+                consolidated_context_docs.append(original_doc)
+                counter += 1
+
+    # sort docs by their scores - though the scores refer to different questions
    relevant_docs = dedup_inference_sections(
-        sub_question_docs, all_original_question_documents
+        consolidated_context_docs, consolidated_context_docs
    )
+
    decomp_questions = []

    # Use the query info from the base document retrieval
@ -171,6 +191,10 @@ def generate_initial_answer(

        model = agent_a_config.fast_llm

+        # summarize the history iff too long
+        if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH:
+            history = summarize_history(history, question, persona_specification, model)
+
        doc_context = format_docs(relevant_docs)
        doc_context = trim_prompt_piece(
            model.config,
--- a/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py
@ -18,11 +18,12 @@ def ingest_initial_sub_question_answers(
    logger.debug(f"--------{now_start}--------INGEST ANSWERS---")
    documents = []
    context_documents = []
+    cited_docs = []
    answer_results = state.answer_results if hasattr(state, "answer_results") else []
    for answer_result in answer_results:
        documents.extend(answer_result.documents)
        context_documents.extend(answer_result.context_documents)
-
+        cited_docs.extend(answer_result.cited_docs)
    now_end = datetime.now()

    logger.debug(
@ -34,6 +35,7 @@ def ingest_initial_sub_question_answers(
        # so we might not need to dedup here
        documents=dedup_inference_sections(documents, []),
        context_documents=dedup_inference_sections(context_documents, []),
+        cited_docs=dedup_inference_sections(cited_docs, []),
        decomp_answer_results=answer_results,
        log_messages=[
            f"{now_start} -- Main - Ingest initial processed sub questions,  Time taken: {now_end - now_start}"
--- a/backend/onyx/agents/agent_search/deep_search_a/main/states.py
+++ b/backend/onyx/agents/agent_search/deep_search_a/main/states.py
@ -98,6 +98,7 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
 class DecompAnswersUpdate(LoggerUpdate):
    documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
    context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
+    cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
    decomp_answer_results: Annotated[
        list[QuestionAnswerResults], dedup_question_answer_results
    ] = []
--- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py
@ -103,6 +103,7 @@ class QuestionAnswerResults(BaseModel):
    expanded_retrieval_results: list[QueryResult]
    documents: list[InferenceSection]
    context_documents: list[InferenceSection]
+    cited_docs: list[InferenceSection]
    sub_question_retrieval_stats: AgentChunkStats


--- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py
@ -65,36 +65,36 @@ BASE_RAG_PROMPT = (

 BASE_RAG_PROMPT_v2 = (
    """ \n
-    {date_prompt}
-    Use the context provided below - and only the
-    provided context - to answer the given question. (Note that the answer is in service of answering a broader
-    question, given below as 'motivation'.)
+{date_prompt}
+Use the context provided below - and only the
+provided context - to answer the given question. (Note that the answer is in service of answering a broader
+question, given below as 'motivation'.)

-    Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
-    question based on the context, say """
+Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
+question based on the context, say """
    + f'"{UNKNOWN_ANSWER}"'
    + """. It is a matter of life and death that you do NOT
-    use your internal knowledge, just the provided information!
+use your internal knowledge, just the provided information!

-    Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
-    (But keep other details as well.)
+Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
+(But keep other details as well.)

-    Please remember to provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
-     It is important that the citation is close to the information it supports.
-    Proper citations are very important to the user!\n\n\n
+It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is important that the citation is close to the information it supports.
+Proper citations are very important to the user!\n\n\n

-    For your general information, here is the ultimate motivation:
-    \n--\n {original_question} \n--\n
-    \n\n
-    And here is the actual question I want you to answer based on the context above (with the motivation in mind):
-    \n--\n {question} \n--\n
+For your general information, here is the ultimate motivation:
+\n--\n {original_question} \n--\n
+\n\n
+And here is the actual question I want you to answer based on the context above (with the motivation in mind):
+\n--\n {question} \n--\n

-    Here is the context:
-    \n\n\n--\n {context} \n--\n
-    Please keep your answer brief and concise, and focus on facts and data.
+Here is the context:
+\n\n\n--\n {context} \n--\n
+Please keep your answer brief and concise, and focus on facts and data.

-    Answer:
-    """
+Answer:
+"""
 )

 SUB_CHECK_YES = "yes"
@ -759,7 +759,7 @@ IMPORTANT RULES:
 - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
 - The answers to the subquestions should help you to structure your thoughts in order to answer the question.

-Please provide inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
+It is critical that you provide proper inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
 It is important that the citation is close to the information it supports.  If you have multiple citations,
 please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to cite documents in addition
 to the sub-questions! Proper citations are important for the final answer to be verifiable! \n\n\n
@ -810,9 +810,9 @@ answer

 {history}

-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.! It is important that the citation
-is close to the information it supports. If you have multiple citations that support a fact, please cite for example
-as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
+It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
+It is important that the citation is close to the information it supports. If you have multiple citations that support
+a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
 Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
 citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
 Again, please NEVER cite sub-questions without a document citation!
@ -872,8 +872,8 @@ IMPORTANT RULES:

 Again, you should be sure that the answer is supported by the information provided!

-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!  It is important that the citation
-is close to the information it supports. If you have multiple
+It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the
 user!

@ -909,7 +909,7 @@ the main question. Note that the sub-questions have a type, 'initial' and 'revis
    3) a number of documents that were deemed relevant for the question. This the is the context that you use largey for
 citations (see below).

-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
 It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
 Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
@ -974,7 +974,7 @@ The information provided below consists of:
    1) an initial answer that was given but found to be lacking in some way.
    2) a number of documents that were also deemed relevant for the question.

-Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
+It is critical that you provide proper] inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
 It is important that the citation is close to the information it supports. If you have multiple
 citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user!\n\n

@ -1084,3 +1084,25 @@ With these criteria in mind, is the refined answer substantially better than the

 Please answer with a simple 'yes' or 'no'.
 """
+HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
+{persona_specification}
+You need to summarize the key parts of the history of a conversation between a user and an agent
+strictly for the purposed of providing the suitable context for a question.
+
+Here is the question:
+\n--\n
+{question}
+\n--\n
+
+And here is the history:
+\n--\n
+{history}
+\n--\n
+
+Please provide a summarized context from the history so that the question makes sense and can - with
+suitable extra information - be answered.
+
+Please do not use more than three or four sentences.
+
+Context summary:
+"""
--- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
+++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py
@ -20,6 +20,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
    EntityRelationshipTermExtraction,
 )
 from onyx.agents.agent_search.shared_graph_utils.prompts import DATE_PROMPT
+from onyx.agents.agent_search.shared_graph_utils.prompts import (
+    HISTORY_CONTEXT_SUMMARY_PROMPT,
+)
 from onyx.chat.models import AnswerStyleConfig
 from onyx.chat.models import CitationConfig
 from onyx.chat.models import DocumentPruningConfig
@ -325,3 +328,25 @@ def retrieve_search_docs(
                break

    return retrieved_docs
+
+
+def get_answer_citation_ids(answer_str: str) -> list[int]:
+    citation_ids = re.findall(r"\[\[D(\d+)\]\]", answer_str)
+    return list(set([(int(id) - 1) for id in citation_ids]))
+
+
+def summarize_history(
+    history: str, question: str, persona_specification: str, model: LLM
+) -> str:
+    history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format(
+        persona_specification=persona_specification, question=question, history=history
+    )
+
+    history_response = model.invoke(history_context_prompt)
+
+    if isinstance(history_response.content, str):
+        history_context_response_str = history_response.content
+    else:
+        history_context_response_str = ""
+
+    return history_context_response_str
--- a/backend/onyx/configs/agent_configs.py
+++ b/backend/onyx/configs/agent_configs.py
@ -92,3 +92,36 @@ except ValueError:
    raise ValueError(
        f"AGENT_EXPLORATORY_SEARCH_RESULTS must be an integer, got {AGENT_EXPLORATORY_SEARCH_RESULTS_OS}"
    )
+
+AGENT_MIN_ORIG_QUESTION_DOCS_OS: int | str = os.environ.get(
+    "AGENT_MIN_ORIG_QUESTION_DOCS", "5"
+)
+
+try:
+    AGENT_MIN_ORIG_QUESTION_DOCS = int(AGENT_MIN_ORIG_QUESTION_DOCS_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MIN_ORIG_QUESTION_DOCS must be an integer, got {AGENT_MIN_ORIG_QUESTION_DOCS_OS}"
+    )
+
+AGENT_MAX_ANSWER_CONTEXT_DOCS_OS: int | str = os.environ.get(
+    "AGENT_MAX_ANSWER_CONTEXT_DOCS", "30"
+)
+
+try:
+    AGENT_MAX_ANSWER_CONTEXT_DOCS = int(AGENT_MAX_ANSWER_CONTEXT_DOCS_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MAX_ANSWER_CONTEXT_DOCS must be an integer, got {AGENT_MAX_ANSWER_CONTEXT_DOCS_OS}"
+    )
+
+AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS: int | str = os.environ.get(
+    "AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "100"
+)
+
+try:
+    AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS)
+except ValueError:
+    raise ValueError(
+        f"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH must be an integer, got {AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS}"
+    )