Optimizations: docs for context & history

- summarize history if long
- introduced cited_docs from SQ as those must be provided to answer generations
- limit number of docs

TODO: same for refined flow
This commit is contained in:
joachim-danswer 2025-01-27 14:02:19 -08:00 committed by Evan Lohn
parent 110c9f7e1b
commit f2aeeb7b3c
10 changed files with 148 additions and 33 deletions

View File

@ -23,6 +23,7 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import (
ASSISTANT_SYSTEM_PROMPT_PERSONA,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS
from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.chat.models import AgentAnswerPiece
@ -71,7 +72,7 @@ def answer_generation(
msg = build_sub_question_answer_prompt(
question=question,
original_question=agent_search_config.search_request.query,
docs=docs,
docs=context_docs,
persona_specification=persona_specification,
config=fast_llm.config,
)
@ -99,6 +100,9 @@ def answer_generation(
answer_str = merge_message_runs(response, chunk_separator="")[0].content
answer_citation_ids = get_answer_citation_ids(answer_str)
cited_docs = [context_docs[id] for id in answer_citation_ids]
stop_event = StreamStopInfo(
stop_reason=StreamStopReason.FINISHED,
stream_type="sub_answer",
@ -110,6 +114,7 @@ def answer_generation(
now_end = datetime.now()
return QAGenerationUpdate(
answer=answer_str,
cited_docs=cited_docs,
log_messages=[
f"{now_start} -- Answer generation SQ-{level} - Q{question_nr} - Time taken: {now_end - now_start}"
],

View File

@ -22,6 +22,7 @@ def format_answer(state: AnswerQuestionState) -> AnswerQuestionOutput:
expanded_retrieval_results=state.expanded_retrieval_results,
documents=state.documents,
context_documents=state.context_documents,
cited_docs=state.cited_docs,
sub_question_retrieval_stats=state.sub_question_retrieval_stats,
)
],

View File

@ -24,6 +24,7 @@ class QACheckUpdate(BaseModel):
class QAGenerationUpdate(BaseModel):
answer: str = ""
log_messages: list[str] = []
cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
# answer_stat: AnswerStats

View File

@ -52,8 +52,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt
from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.context.search.models import InferenceSection
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses
@ -69,14 +74,29 @@ def generate_initial_answer(
persona_prompt = get_persona_prompt(agent_a_config.search_request.persona)
history = build_history_prompt(agent_a_config.prompt_builder)
date_str = get_today_prompt()
sub_question_docs = state.context_documents
sub_questions_cited_docs = state.cited_docs
all_original_question_documents = state.all_original_question_documents
consolidated_context_docs: list[InferenceSection] = []
counter = 0
for original_doc_number, original_doc in enumerate(all_original_question_documents):
if original_doc_number not in sub_questions_cited_docs:
if (
counter <= AGENT_MIN_ORIG_QUESTION_DOCS
or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS
):
consolidated_context_docs.append(original_doc)
counter += 1
# sort docs by their scores - though the scores refer to different questions
relevant_docs = dedup_inference_sections(
sub_question_docs, all_original_question_documents
consolidated_context_docs, consolidated_context_docs
)
decomp_questions = []
# Use the query info from the base document retrieval
@ -171,6 +191,10 @@ def generate_initial_answer(
model = agent_a_config.fast_llm
# summarize the history iff too long
if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH:
history = summarize_history(history, question, persona_specification, model)
doc_context = format_docs(relevant_docs)
doc_context = trim_prompt_piece(
model.config,

View File

@ -18,11 +18,12 @@ def ingest_initial_sub_question_answers(
logger.debug(f"--------{now_start}--------INGEST ANSWERS---")
documents = []
context_documents = []
cited_docs = []
answer_results = state.answer_results if hasattr(state, "answer_results") else []
for answer_result in answer_results:
documents.extend(answer_result.documents)
context_documents.extend(answer_result.context_documents)
cited_docs.extend(answer_result.cited_docs)
now_end = datetime.now()
logger.debug(
@ -34,6 +35,7 @@ def ingest_initial_sub_question_answers(
# so we might not need to dedup here
documents=dedup_inference_sections(documents, []),
context_documents=dedup_inference_sections(context_documents, []),
cited_docs=dedup_inference_sections(cited_docs, []),
decomp_answer_results=answer_results,
log_messages=[
f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}"

View File

@ -98,6 +98,7 @@ class RequireRefinedAnswerUpdate(LoggerUpdate):
class DecompAnswersUpdate(LoggerUpdate):
documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = []
cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = []
decomp_answer_results: Annotated[
list[QuestionAnswerResults], dedup_question_answer_results
] = []

View File

@ -103,6 +103,7 @@ class QuestionAnswerResults(BaseModel):
expanded_retrieval_results: list[QueryResult]
documents: list[InferenceSection]
context_documents: list[InferenceSection]
cited_docs: list[InferenceSection]
sub_question_retrieval_stats: AgentChunkStats

View File

@ -65,36 +65,36 @@ BASE_RAG_PROMPT = (
BASE_RAG_PROMPT_v2 = (
""" \n
{date_prompt}
Use the context provided below - and only the
provided context - to answer the given question. (Note that the answer is in service of answering a broader
question, given below as 'motivation'.)
{date_prompt}
Use the context provided below - and only the
provided context - to answer the given question. (Note that the answer is in service of answering a broader
question, given below as 'motivation'.)
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
question based on the context, say """
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
question based on the context, say """
+ f'"{UNKNOWN_ANSWER}"'
+ """. It is a matter of life and death that you do NOT
use your internal knowledge, just the provided information!
use your internal knowledge, just the provided information!
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
(But keep other details as well.)
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
(But keep other details as well.)
Please remember to provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is important that the citation is close to the information it supports.
Proper citations are very important to the user!\n\n\n
It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is important that the citation is close to the information it supports.
Proper citations are very important to the user!\n\n\n
For your general information, here is the ultimate motivation:
\n--\n {original_question} \n--\n
\n\n
And here is the actual question I want you to answer based on the context above (with the motivation in mind):
\n--\n {question} \n--\n
For your general information, here is the ultimate motivation:
\n--\n {original_question} \n--\n
\n\n
And here is the actual question I want you to answer based on the context above (with the motivation in mind):
\n--\n {question} \n--\n
Here is the context:
\n\n\n--\n {context} \n--\n
Please keep your answer brief and concise, and focus on facts and data.
Here is the context:
\n\n\n--\n {context} \n--\n
Please keep your answer brief and concise, and focus on facts and data.
Answer:
"""
Answer:
"""
)
SUB_CHECK_YES = "yes"
@ -759,7 +759,7 @@ IMPORTANT RULES:
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
- The answers to the subquestions should help you to structure your thoughts in order to answer the question.
Please provide inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
It is critical that you provide proper inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.!
It is important that the citation is close to the information it supports. If you have multiple citations,
please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to cite documents in addition
to the sub-questions! Proper citations are important for the final answer to be verifiable! \n\n\n
@ -810,9 +810,9 @@ answer
{history}
Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.! It is important that the citation
is close to the information it supports. If you have multiple citations that support a fact, please cite for example
as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
It is important that the citation is close to the information it supports. If you have multiple citations that support
a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
Again, please NEVER cite sub-questions without a document citation!
@ -872,8 +872,8 @@ IMPORTANT RULES:
Again, you should be sure that the answer is supported by the information provided!
Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! It is important that the citation
is close to the information it supports. If you have multiple
It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is important that the citation is close to the information it supports. If you have multiple
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the
user!
@ -909,7 +909,7 @@ the main question. Note that the sub-questions have a type, 'initial' and 'revis
3) a number of documents that were deemed relevant for the question. This the is the context that you use largey for
citations (see below).
Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is important that the citation is close to the information it supports. If you have multiple
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
@ -974,7 +974,7 @@ The information provided below consists of:
1) an initial answer that was given but found to be lacking in some way.
2) a number of documents that were also deemed relevant for the question.
Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is critical that you provide proper] inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
It is important that the citation is close to the information it supports. If you have multiple
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user!\n\n
@ -1084,3 +1084,25 @@ With these criteria in mind, is the refined answer substantially better than the
Please answer with a simple 'yes' or 'no'.
"""
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
{persona_specification}
You need to summarize the key parts of the history of a conversation between a user and an agent
strictly for the purposed of providing the suitable context for a question.
Here is the question:
\n--\n
{question}
\n--\n
And here is the history:
\n--\n
{history}
\n--\n
Please provide a summarized context from the history so that the question makes sense and can - with
suitable extra information - be answered.
Please do not use more than three or four sentences.
Context summary:
"""

View File

@ -20,6 +20,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
EntityRelationshipTermExtraction,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import DATE_PROMPT
from onyx.agents.agent_search.shared_graph_utils.prompts import (
HISTORY_CONTEXT_SUMMARY_PROMPT,
)
from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import CitationConfig
from onyx.chat.models import DocumentPruningConfig
@ -325,3 +328,25 @@ def retrieve_search_docs(
break
return retrieved_docs
def get_answer_citation_ids(answer_str: str) -> list[int]:
citation_ids = re.findall(r"\[\[D(\d+)\]\]", answer_str)
return list(set([(int(id) - 1) for id in citation_ids]))
def summarize_history(
history: str, question: str, persona_specification: str, model: LLM
) -> str:
history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format(
persona_specification=persona_specification, question=question, history=history
)
history_response = model.invoke(history_context_prompt)
if isinstance(history_response.content, str):
history_context_response_str = history_response.content
else:
history_context_response_str = ""
return history_context_response_str

View File

@ -92,3 +92,36 @@ except ValueError:
raise ValueError(
f"AGENT_EXPLORATORY_SEARCH_RESULTS must be an integer, got {AGENT_EXPLORATORY_SEARCH_RESULTS_OS}"
)
AGENT_MIN_ORIG_QUESTION_DOCS_OS: int | str = os.environ.get(
"AGENT_MIN_ORIG_QUESTION_DOCS", "5"
)
try:
AGENT_MIN_ORIG_QUESTION_DOCS = int(AGENT_MIN_ORIG_QUESTION_DOCS_OS)
except ValueError:
raise ValueError(
f"AGENT_MIN_ORIG_QUESTION_DOCS must be an integer, got {AGENT_MIN_ORIG_QUESTION_DOCS_OS}"
)
AGENT_MAX_ANSWER_CONTEXT_DOCS_OS: int | str = os.environ.get(
"AGENT_MAX_ANSWER_CONTEXT_DOCS", "30"
)
try:
AGENT_MAX_ANSWER_CONTEXT_DOCS = int(AGENT_MAX_ANSWER_CONTEXT_DOCS_OS)
except ValueError:
raise ValueError(
f"AGENT_MAX_ANSWER_CONTEXT_DOCS must be an integer, got {AGENT_MAX_ANSWER_CONTEXT_DOCS_OS}"
)
AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS: int | str = os.environ.get(
"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "100"
)
try:
AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS)
except ValueError:
raise ValueError(
f"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH must be an integer, got {AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS}"
)