From f2aeeb7b3c98157fea06a5b1c1a809f865c3dfe3 Mon Sep 17 00:00:00 2001 From: joachim-danswer Date: Mon, 27 Jan 2025 14:02:19 -0800 Subject: [PATCH] Optimizations: docs for context & history - summarize history if long - introduced cited_docs from SQ as those must be provided to answer generations - limit number of docs TODO: same for refined flow --- .../nodes/answer_generation.py | 7 +- .../nodes/format_answer.py | 1 + .../answer_initial_sub_question/states.py | 1 + .../nodes/generate_initial_answer.py | 26 +++++- .../ingest_initial_sub_question_answers.py | 4 +- .../agent_search/deep_search_a/main/states.py | 1 + .../agent_search/shared_graph_utils/models.py | 1 + .../shared_graph_utils/prompts.py | 82 ++++++++++++------- .../agent_search/shared_graph_utils/utils.py | 25 ++++++ backend/onyx/configs/agent_configs.py | 33 ++++++++ 10 files changed, 148 insertions(+), 33 deletions(-) diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py index 930f0cc82..de26d2cdf 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py +++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/answer_generation.py @@ -23,6 +23,7 @@ from onyx.agents.agent_search.shared_graph_utils.prompts import ( ASSISTANT_SYSTEM_PROMPT_PERSONA, ) from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS +from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id from onyx.chat.models import AgentAnswerPiece @@ -71,7 +72,7 @@ def answer_generation( msg = build_sub_question_answer_prompt( question=question, original_question=agent_search_config.search_request.query, - docs=docs, + docs=context_docs, persona_specification=persona_specification, config=fast_llm.config, ) @@ -99,6 +100,9 @@ def answer_generation( answer_str = merge_message_runs(response, chunk_separator="")[0].content + answer_citation_ids = get_answer_citation_ids(answer_str) + cited_docs = [context_docs[id] for id in answer_citation_ids] + stop_event = StreamStopInfo( stop_reason=StreamStopReason.FINISHED, stream_type="sub_answer", @@ -110,6 +114,7 @@ def answer_generation( now_end = datetime.now() return QAGenerationUpdate( answer=answer_str, + cited_docs=cited_docs, log_messages=[ f"{now_start} -- Answer generation SQ-{level} - Q{question_nr} - Time taken: {now_end - now_start}" ], diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py index 454953836..aa97b4d2b 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py +++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/nodes/format_answer.py @@ -22,6 +22,7 @@ def format_answer(state: AnswerQuestionState) -> AnswerQuestionOutput: expanded_retrieval_results=state.expanded_retrieval_results, documents=state.documents, context_documents=state.context_documents, + cited_docs=state.cited_docs, sub_question_retrieval_stats=state.sub_question_retrieval_stats, ) ], diff --git a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py index fff26b70c..4c03aa43f 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py +++ b/backend/onyx/agents/agent_search/deep_search_a/answer_initial_sub_question/states.py @@ -24,6 +24,7 @@ class QACheckUpdate(BaseModel): class QAGenerationUpdate(BaseModel): answer: str = "" log_messages: list[str] = [] + cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = [] # answer_stat: AnswerStats diff --git a/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py b/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py index d40829142..662c2c8f2 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py +++ b/backend/onyx/agents/agent_search/deep_search_a/initial_search_sq_subgraph/nodes/generate_initial_answer.py @@ -52,8 +52,13 @@ from onyx.agents.agent_search.shared_graph_utils.utils import format_docs from onyx.agents.agent_search.shared_graph_utils.utils import get_persona_prompt from onyx.agents.agent_search.shared_graph_utils.utils import get_today_prompt from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id +from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import ExtendedToolResponse +from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS +from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH +from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS +from onyx.context.search.models import InferenceSection from onyx.tools.tool_implementations.search.search_tool import yield_search_responses @@ -69,14 +74,29 @@ def generate_initial_answer( persona_prompt = get_persona_prompt(agent_a_config.search_request.persona) history = build_history_prompt(agent_a_config.prompt_builder) + date_str = get_today_prompt() sub_question_docs = state.context_documents + sub_questions_cited_docs = state.cited_docs all_original_question_documents = state.all_original_question_documents + consolidated_context_docs: list[InferenceSection] = [] + counter = 0 + for original_doc_number, original_doc in enumerate(all_original_question_documents): + if original_doc_number not in sub_questions_cited_docs: + if ( + counter <= AGENT_MIN_ORIG_QUESTION_DOCS + or len(consolidated_context_docs) < AGENT_MAX_ANSWER_CONTEXT_DOCS + ): + consolidated_context_docs.append(original_doc) + counter += 1 + + # sort docs by their scores - though the scores refer to different questions relevant_docs = dedup_inference_sections( - sub_question_docs, all_original_question_documents + consolidated_context_docs, consolidated_context_docs ) + decomp_questions = [] # Use the query info from the base document retrieval @@ -171,6 +191,10 @@ def generate_initial_answer( model = agent_a_config.fast_llm + # summarize the history iff too long + if len(history) > AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH: + history = summarize_history(history, question, persona_specification, model) + doc_context = format_docs(relevant_docs) doc_context = trim_prompt_piece( model.config, diff --git a/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py b/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py index e8b937f5f..1ba2755ad 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py +++ b/backend/onyx/agents/agent_search/deep_search_a/initial_sub_question_answering/nodes/ingest_initial_sub_question_answers.py @@ -18,11 +18,12 @@ def ingest_initial_sub_question_answers( logger.debug(f"--------{now_start}--------INGEST ANSWERS---") documents = [] context_documents = [] + cited_docs = [] answer_results = state.answer_results if hasattr(state, "answer_results") else [] for answer_result in answer_results: documents.extend(answer_result.documents) context_documents.extend(answer_result.context_documents) - + cited_docs.extend(answer_result.cited_docs) now_end = datetime.now() logger.debug( @@ -34,6 +35,7 @@ def ingest_initial_sub_question_answers( # so we might not need to dedup here documents=dedup_inference_sections(documents, []), context_documents=dedup_inference_sections(context_documents, []), + cited_docs=dedup_inference_sections(cited_docs, []), decomp_answer_results=answer_results, log_messages=[ f"{now_start} -- Main - Ingest initial processed sub questions, Time taken: {now_end - now_start}" diff --git a/backend/onyx/agents/agent_search/deep_search_a/main/states.py b/backend/onyx/agents/agent_search/deep_search_a/main/states.py index c55a6ceca..dc10e8fc8 100644 --- a/backend/onyx/agents/agent_search/deep_search_a/main/states.py +++ b/backend/onyx/agents/agent_search/deep_search_a/main/states.py @@ -98,6 +98,7 @@ class RequireRefinedAnswerUpdate(LoggerUpdate): class DecompAnswersUpdate(LoggerUpdate): documents: Annotated[list[InferenceSection], dedup_inference_sections] = [] context_documents: Annotated[list[InferenceSection], dedup_inference_sections] = [] + cited_docs: Annotated[list[InferenceSection], dedup_inference_sections] = [] decomp_answer_results: Annotated[ list[QuestionAnswerResults], dedup_question_answer_results ] = [] diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/models.py b/backend/onyx/agents/agent_search/shared_graph_utils/models.py index bae6191e6..10ec49172 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/models.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/models.py @@ -103,6 +103,7 @@ class QuestionAnswerResults(BaseModel): expanded_retrieval_results: list[QueryResult] documents: list[InferenceSection] context_documents: list[InferenceSection] + cited_docs: list[InferenceSection] sub_question_retrieval_stats: AgentChunkStats diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py index b91e468a9..bbee25bd0 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py @@ -65,36 +65,36 @@ BASE_RAG_PROMPT = ( BASE_RAG_PROMPT_v2 = ( """ \n - {date_prompt} - Use the context provided below - and only the - provided context - to answer the given question. (Note that the answer is in service of answering a broader - question, given below as 'motivation'.) +{date_prompt} +Use the context provided below - and only the +provided context - to answer the given question. (Note that the answer is in service of answering a broader +question, given below as 'motivation'.) - Again, only use the provided context and do not use your internal knowledge! If you cannot answer the - question based on the context, say """ +Again, only use the provided context and do not use your internal knowledge! If you cannot answer the +question based on the context, say """ + f'"{UNKNOWN_ANSWER}"' + """. It is a matter of life and death that you do NOT - use your internal knowledge, just the provided information! +use your internal knowledge, just the provided information! - Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. - (But keep other details as well.) +Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. +(But keep other details as well.) - Please remember to provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! - It is important that the citation is close to the information it supports. - Proper citations are very important to the user!\n\n\n +It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! +It is important that the citation is close to the information it supports. +Proper citations are very important to the user!\n\n\n - For your general information, here is the ultimate motivation: - \n--\n {original_question} \n--\n - \n\n - And here is the actual question I want you to answer based on the context above (with the motivation in mind): - \n--\n {question} \n--\n +For your general information, here is the ultimate motivation: +\n--\n {original_question} \n--\n +\n\n +And here is the actual question I want you to answer based on the context above (with the motivation in mind): +\n--\n {question} \n--\n - Here is the context: - \n\n\n--\n {context} \n--\n - Please keep your answer brief and concise, and focus on facts and data. +Here is the context: +\n\n\n--\n {context} \n--\n +Please keep your answer brief and concise, and focus on facts and data. - Answer: - """ +Answer: +""" ) SUB_CHECK_YES = "yes" @@ -759,7 +759,7 @@ IMPORTANT RULES: - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why. - The answers to the subquestions should help you to structure your thoughts in order to answer the question. -Please provide inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.! +It is critical that you provide proper inline citations of documentsin the format [[D1]](), [[D2]](), [[D3]](), etc.! It is important that the citation is close to the information it supports. If you have multiple citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to cite documents in addition to the sub-questions! Proper citations are important for the final answer to be verifiable! \n\n\n @@ -810,9 +810,9 @@ answer {history} -Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.! It is important that the citation -is close to the information it supports. If you have multiple citations that support a fact, please cite for example -as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. +It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.! +It is important that the citation is close to the information it supports. If you have multiple citations that support +a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc. Again, please NEVER cite sub-questions without a document citation! @@ -872,8 +872,8 @@ IMPORTANT RULES: Again, you should be sure that the answer is supported by the information provided! -Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! It is important that the citation -is close to the information it supports. If you have multiple +It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! +It is important that the citation is close to the information it supports. If you have multiple citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user! @@ -909,7 +909,7 @@ the main question. Note that the sub-questions have a type, 'initial' and 'revis 3) a number of documents that were deemed relevant for the question. This the is the context that you use largey for citations (see below). -Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! +It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! It is important that the citation is close to the information it supports. If you have multiple citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question @@ -974,7 +974,7 @@ The information provided below consists of: 1) an initial answer that was given but found to be lacking in some way. 2) a number of documents that were also deemed relevant for the question. -Please provide inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! +It is critical that you provide proper] inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc! It is important that the citation is close to the information it supports. If you have multiple citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user!\n\n @@ -1084,3 +1084,25 @@ With these criteria in mind, is the refined answer substantially better than the Please answer with a simple 'yes' or 'no'. """ +HISTORY_CONTEXT_SUMMARY_PROMPT = """\n +{persona_specification} +You need to summarize the key parts of the history of a conversation between a user and an agent +strictly for the purposed of providing the suitable context for a question. + +Here is the question: +\n--\n +{question} +\n--\n + +And here is the history: +\n--\n +{history} +\n--\n + +Please provide a summarized context from the history so that the question makes sense and can - with +suitable extra information - be answered. + +Please do not use more than three or four sentences. + +Context summary: +""" diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py index 2cd71488b..1697daa7c 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py @@ -20,6 +20,9 @@ from onyx.agents.agent_search.shared_graph_utils.models import ( EntityRelationshipTermExtraction, ) from onyx.agents.agent_search.shared_graph_utils.prompts import DATE_PROMPT +from onyx.agents.agent_search.shared_graph_utils.prompts import ( + HISTORY_CONTEXT_SUMMARY_PROMPT, +) from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import CitationConfig from onyx.chat.models import DocumentPruningConfig @@ -325,3 +328,25 @@ def retrieve_search_docs( break return retrieved_docs + + +def get_answer_citation_ids(answer_str: str) -> list[int]: + citation_ids = re.findall(r"\[\[D(\d+)\]\]", answer_str) + return list(set([(int(id) - 1) for id in citation_ids])) + + +def summarize_history( + history: str, question: str, persona_specification: str, model: LLM +) -> str: + history_context_prompt = HISTORY_CONTEXT_SUMMARY_PROMPT.format( + persona_specification=persona_specification, question=question, history=history + ) + + history_response = model.invoke(history_context_prompt) + + if isinstance(history_response.content, str): + history_context_response_str = history_response.content + else: + history_context_response_str = "" + + return history_context_response_str diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 0e8ad1d6a..25f78e52f 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -92,3 +92,36 @@ except ValueError: raise ValueError( f"AGENT_EXPLORATORY_SEARCH_RESULTS must be an integer, got {AGENT_EXPLORATORY_SEARCH_RESULTS_OS}" ) + +AGENT_MIN_ORIG_QUESTION_DOCS_OS: int | str = os.environ.get( + "AGENT_MIN_ORIG_QUESTION_DOCS", "5" +) + +try: + AGENT_MIN_ORIG_QUESTION_DOCS = int(AGENT_MIN_ORIG_QUESTION_DOCS_OS) +except ValueError: + raise ValueError( + f"AGENT_MIN_ORIG_QUESTION_DOCS must be an integer, got {AGENT_MIN_ORIG_QUESTION_DOCS_OS}" + ) + +AGENT_MAX_ANSWER_CONTEXT_DOCS_OS: int | str = os.environ.get( + "AGENT_MAX_ANSWER_CONTEXT_DOCS", "30" +) + +try: + AGENT_MAX_ANSWER_CONTEXT_DOCS = int(AGENT_MAX_ANSWER_CONTEXT_DOCS_OS) +except ValueError: + raise ValueError( + f"AGENT_MAX_ANSWER_CONTEXT_DOCS must be an integer, got {AGENT_MAX_ANSWER_CONTEXT_DOCS_OS}" + ) + +AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS: int | str = os.environ.get( + "AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS", "100" +) + +try: + AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH = int(AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS) +except ValueError: + raise ValueError( + f"AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH must be an integer, got {AGENT_MAX_STATIC_HISTORY_CHAR_LENGTH_OS}" + )