From a067b324673e7a0b1a7b58f6167722782195bb9c Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 2 Feb 2025 14:21:23 -0800 Subject: [PATCH] Partial Prompt Updates (#3880) --- .../nodes/check_sub_answer.py | 4 +- .../nodes/generate_sub_answer.py | 2 +- .../nodes/generate_initial_answer.py | 20 +- .../initial/generate_initial_answer/states.py | 12 +- .../nodes/decompose_orig_question.py | 21 +- .../initial/generate_sub_answers/states.py | 12 +- .../deep_search/main/nodes/compare_answers.py | 6 +- .../nodes/create_refined_sub_questions.py | 6 +- .../main/nodes/extract_entities_terms.py | 11 +- .../main/nodes/generate_refined_answer.py | 20 +- .../nodes/expand_queries.py | 6 +- .../nodes/verify_documents.py | 2 +- .../shared_graph_utils/agent_prompt_ops.py | 4 +- .../agent_search/shared_graph_utils/utils.py | 18 +- backend/onyx/configs/agent_configs.py | 4 +- .../prompts.py => prompts/agent_search.py} | 442 +++++++++--------- 16 files changed, 280 insertions(+), 310 deletions(-) rename backend/onyx/{agents/agent_search/shared_graph_utils/prompts.py => prompts/agent_search.py} (61%) diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py index bc64f237b6f..fc35cefab97 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/check_sub_answer.py @@ -12,12 +12,12 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer SubQuestionAnswerCheckUpdate, ) from onyx.agents.agent_search.models import GraphConfig -from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_ANSWER_CHECK_PROMPT -from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id +from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT +from onyx.prompts.agent_search import UNKNOWN_ANSWER def check_sub_answer( diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py index 9e1f7024754..b9a5345efca 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_individual_sub_answer/nodes/generate_sub_answer.py @@ -16,7 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( build_sub_question_answer_prompt, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, @@ -31,6 +30,7 @@ from onyx.chat.models import StreamStopInfo from onyx.chat.models import StreamStopReason from onyx.chat.models import StreamType from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS +from onyx.prompts.agent_search import NO_RECOVERED_DOCS from onyx.utils.logger import setup_logger logger = setup_logger() diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py index f8cc41ba193..cfc908bd2f4 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/nodes/generate_initial_answer.py @@ -30,16 +30,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResul from onyx.agents.agent_search.shared_graph_utils.operators import ( dedup_inference_sections, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - SUB_QUESTION_ANSWER_TEMPLATE, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER from onyx.agents.agent_search.shared_graph_utils.utils import ( dispatch_main_answer_stop_info, ) @@ -57,6 +47,16 @@ from onyx.chat.models import ExtendedToolResponse from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS from onyx.context.search.models import InferenceSection +from onyx.prompts.agent_search import ( + INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS, +) +from onyx.prompts.agent_search import ( + INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS, +) +from onyx.prompts.agent_search import ( + SUB_QUESTION_ANSWER_TEMPLATE, +) +from onyx.prompts.agent_search import UNKNOWN_ANSWER from onyx.tools.tool_implementations.search.search_tool import yield_search_responses diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py index bb699f0123a..3852756018c 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_initial_answer/states.py @@ -24,17 +24,15 @@ from onyx.agents.agent_search.deep_search.main.states import ( from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import ( QuestionRetrievalResult, ) +from onyx.context.search.models import InferenceSection + ### States ### - - class SubQuestionRetrievalInput(CoreState): - pass + exploratory_search_results: list[InferenceSection] ## Graph State - - class SubQuestionRetrievalState( # This includes the core state SubQuestionRetrievalInput, @@ -48,8 +46,6 @@ class SubQuestionRetrievalState( base_raw_search_result: Annotated[list[QuestionRetrievalResult], add] -## Graph Output State - presently not used - - +## Graph Output State class SubQuestionRetrievalOutput(TypedDict): log_messages: list[str] diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py index 8be68dba518..12e0f7e3dd4 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/nodes/decompose_orig_question.py @@ -22,12 +22,6 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( build_history_prompt, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - INITIAL_QUESTION_DECOMPOSITION_PROMPT, -) from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, @@ -38,6 +32,15 @@ from onyx.chat.models import StreamStopReason from onyx.chat.models import StreamType from onyx.chat.models import SubQuestionPiece from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION +from onyx.prompts.agent_search import ( + INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH, +) +from onyx.prompts.agent_search import ( + INITIAL_QUESTION_DECOMPOSITION_PROMPT, +) +from onyx.utils.logger import setup_logger + +logger = setup_logger() def decompose_orig_question( @@ -63,6 +66,12 @@ def decompose_orig_question( # Initial search to inform decomposition. Just get top 3 fits if perform_initial_search_decomposition: + # Due to unfortunate state representation in LangGraph, we need here to double check that the retrieval has + # happened prior to this point, allowing silent failure here since it is not critical for decomposition in + # all queries. + if not state.exploratory_search_results: + logger.error("Initial search for decomposition failed") + sample_doc_str = "\n\n".join( [ doc.combined_content diff --git a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py index 29d98b3eedb..c24e2f0e005 100644 --- a/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py +++ b/backend/onyx/agents/agent_search/deep_search/initial/generate_sub_answers/states.py @@ -10,17 +10,15 @@ from onyx.agents.agent_search.deep_search.main.states import ( from onyx.agents.agent_search.deep_search.main.states import ( SubQuestionResultsUpdate, ) +from onyx.context.search.models import InferenceSection + ### States ### - - class SubQuestionAnsweringInput(CoreState): - pass + exploratory_search_results: list[InferenceSection] ## Graph State - - class SubQuestionAnsweringState( # This includes the core state SubQuestionAnsweringInput, @@ -31,8 +29,6 @@ class SubQuestionAnsweringState( pass -## Graph Output State - presently not used - - +## Graph Output State class SubQuestionAnsweringOutput(TypedDict): log_messages: list[str] diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py index 4d2bde7ce3c..5222cec0fc8 100644 --- a/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py +++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/compare_answers.py @@ -10,14 +10,14 @@ from onyx.agents.agent_search.deep_search.main.states import ( ) from onyx.agents.agent_search.deep_search.main.states import MainState from onyx.agents.agent_search.models import GraphConfig -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - INITIAL_REFINED_ANSWER_COMPARISON_PROMPT, -) from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.chat.models import RefinedAnswerImprovement +from onyx.prompts.agent_search import ( + INITIAL_REFINED_ANSWER_COMPARISON_PROMPT, +) def compare_answers( diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py index dc87cc00a9b..33a5c270dce 100644 --- a/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py +++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/create_refined_sub_questions.py @@ -20,9 +20,6 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( build_history_prompt, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - REFINEMENT_QUESTION_DECOMPOSITION_PROMPT, -) from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import ( format_entity_term_extraction, @@ -32,6 +29,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import ( ) from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event +from onyx.prompts.agent_search import ( + REFINEMENT_QUESTION_DECOMPOSITION_PROMPT, +) from onyx.tools.models import ToolCallKickoff diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py index ddcc3ed8a88..6575819ba56 100644 --- a/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py +++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/extract_entities_terms.py @@ -18,19 +18,14 @@ from onyx.agents.agent_search.shared_graph_utils.models import EntityExtractionR from onyx.agents.agent_search.shared_graph_utils.models import ( EntityRelationshipTermExtraction, ) - - -from onyx.agents.agent_search.shared_graph_utils.models import Relationship -from onyx.agents.agent_search.shared_graph_utils.models import Term -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - ENTITY_TERM_EXTRACTION_PROMPT, -) - from onyx.agents.agent_search.shared_graph_utils.utils import format_docs from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) from onyx.configs.constants import NUM_EXPLORATORY_DOCS +from onyx.prompts.agent_search import ( + ENTITY_TERM_EXTRACTION_PROMPT, +) def extract_entities_terms( diff --git a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py index cb629359948..0e5fda15397 100644 --- a/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py +++ b/backend/onyx/agents/agent_search/deep_search/main/nodes/generate_refined_answer.py @@ -28,16 +28,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats from onyx.agents.agent_search.shared_graph_utils.operators import ( dedup_inference_sections, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - SUB_QUESTION_ANSWER_TEMPLATE_REFINED, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER from onyx.agents.agent_search.shared_graph_utils.utils import ( dispatch_main_answer_stop_info, ) @@ -55,6 +45,16 @@ from onyx.chat.models import AgentAnswerPiece from onyx.chat.models import ExtendedToolResponse from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS +from onyx.prompts.agent_search import ( + REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS, +) +from onyx.prompts.agent_search import ( + REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS, +) +from onyx.prompts.agent_search import ( + SUB_QUESTION_ANSWER_TEMPLATE_REFINED, +) +from onyx.prompts.agent_search import UNKNOWN_ANSWER from onyx.tools.tool_implementations.search.search_tool import yield_search_responses diff --git a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py index 57184cc6d3a..b01be14218f 100644 --- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py +++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/expand_queries.py @@ -16,14 +16,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor QueryExpansionUpdate, ) from onyx.agents.agent_search.models import GraphConfig -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - QUERY_REWRITING_PROMPT, -) from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import ( get_langgraph_node_log_string, ) from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id +from onyx.prompts.agent_search import ( + QUERY_REWRITING_PROMPT, +) def expand_queries( diff --git a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py index c670865fd86..2709e688f82 100644 --- a/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py +++ b/backend/onyx/agents/agent_search/deep_search/shared/expanded_retrieval/nodes/verify_documents.py @@ -13,7 +13,7 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( trim_prompt_piece, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( +from onyx.prompts.agent_search import ( DOCUMENT_VERIFICATION_PROMPT, ) diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py index bd68e7f2971..ec1840b586d 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/agent_prompt_ops.py @@ -7,8 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.shared_graph_utils.models import ( AgentPromptEnrichmentComponents, ) -from onyx.agents.agent_search.shared_graph_utils.prompts import HISTORY_FRAMING_PROMPT -from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_QUESTION_RAG_PROMPT from onyx.agents.agent_search.shared_graph_utils.utils import ( get_persona_agent_prompt_expressions, ) @@ -20,6 +18,8 @@ from onyx.llm.interfaces import LLMConfig from onyx.llm.utils import get_max_input_tokens from onyx.natural_language_processing.utils import get_tokenizer from onyx.natural_language_processing.utils import tokenizer_trim_content +from onyx.prompts.agent_search import HISTORY_FRAMING_PROMPT +from onyx.prompts.agent_search import SUB_QUESTION_RAG_PROMPT from onyx.prompts.prompt_utils import build_date_time_string diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py index a9a44c053aa..6b3cf259363 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/utils.py +++ b/backend/onyx/agents/agent_search/shared_graph_utils/utils.py @@ -24,15 +24,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import ( EntityRelationshipTermExtraction, ) from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - ASSISTANT_SYSTEM_PROMPT_DEFAULT, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - ASSISTANT_SYSTEM_PROMPT_PERSONA, -) -from onyx.agents.agent_search.shared_graph_utils.prompts import ( - HISTORY_CONTEXT_SUMMARY_PROMPT, -) from onyx.chat.models import AnswerPacket from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import CitationConfig @@ -56,6 +47,15 @@ from onyx.db.engine import get_session_context_manager from onyx.db.persona import get_persona_by_id from onyx.db.persona import Persona from onyx.llm.interfaces import LLM +from onyx.prompts.agent_search import ( + ASSISTANT_SYSTEM_PROMPT_DEFAULT, +) +from onyx.prompts.agent_search import ( + ASSISTANT_SYSTEM_PROMPT_PERSONA, +) +from onyx.prompts.agent_search import ( + HISTORY_CONTEXT_SUMMARY_PROMPT, +) from onyx.tools.force import ForceUseTool from onyx.tools.tool_constructor import SearchToolConfig from onyx.tools.tool_implementations.search.search_tool import ( diff --git a/backend/onyx/configs/agent_configs.py b/backend/onyx/configs/agent_configs.py index 8685e67d867..87fa8b80b09 100644 --- a/backend/onyx/configs/agent_configs.py +++ b/backend/onyx/configs/agent_configs.py @@ -8,7 +8,7 @@ AGENT_DEFAULT_RERANKING_HITS = 10 AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8 AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3 AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5 -AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3 +AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5 AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000 @@ -59,7 +59,7 @@ AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int( AGENT_EXPLORATORY_SEARCH_RESULTS = int( os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS") or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS -) # 3 +) # 5 AGENT_MIN_ORIG_QUESTION_DOCS = int( os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS") diff --git a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py b/backend/onyx/prompts/agent_search.py similarity index 61% rename from backend/onyx/agents/agent_search/shared_graph_utils/prompts.py rename to backend/onyx/prompts/agent_search.py index 758874db577..f05c8b60217 100644 --- a/backend/onyx/agents/agent_search/shared_graph_utils/prompts.py +++ b/backend/onyx/prompts/agent_search.py @@ -1,55 +1,53 @@ -# The prompts for the agentic framework. The order follows approximately the order -# of the actions in the graph # Standards - +SEPARATOR_LINE = "-------" UNKNOWN_ANSWER = "I do not have enough information to answer this question." - NO_RECOVERED_DOCS = "No relevant information recovered" - -DATE_PROMPT = """Today is {date}.\n\n""" - +DATE_PROMPT = "Today is {date}.\n\n" SUB_CHECK_YES = "yes" SUB_CHECK_NO = "no" + # Framing/Support/Template Prompts - -HISTORY_FRAMING_PROMPT = """\n +HISTORY_FRAMING_PROMPT = f""" For more context, here is the history of the conversation so far that preceded this question: -\n-------\n -{history} -\n-------\n\n -""" +{SEPARATOR_LINE} +{{history}} +{SEPARATOR_LINE} +""".strip() -ASSISTANT_SYSTEM_PROMPT_DEFAULT = """ -You are an assistant for question-answering tasks.""" -ASSISTANT_SYSTEM_PROMPT_PERSONA = """ +ASSISTANT_SYSTEM_PROMPT_DEFAULT = ( + """You are an assistant for question-answering tasks.""" +) + +ASSISTANT_SYSTEM_PROMPT_PERSONA = f""" You are an assistant for question-answering tasks. Here is more information about you: -\n-------\n -{persona_prompt} -\n-------\n -""" +{SEPARATOR_LINE} +{{persona_prompt}} +{SEPARATOR_LINE} +""".strip() + SUB_QUESTION_ANSWER_TEMPLATE = """\n Sub-Question: Q{sub_question_num}\n Sub-Question:\n - \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n """ -SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n -Sub-Question: Q{sub_question_num}\n + +SUB_QUESTION_ANSWER_TEMPLATE_REFINED = f""" +Sub-Question: Q{{sub_question_num}}\n Type: -\n----\n -{sub_question_type} -\n----\n +{SEPARATOR_LINE} +{{sub_question_type}} +{SEPARATOR_LINE} Sub-Question: -\n----\n -{sub_question} -\n----\n -\nAnswer: -\n----\n -{sub_answer} -\n----\n -\n -""" +{SEPARATOR_LINE} +{{sub_question}} +{SEPARATOR_LINE} +Answer: +{SEPARATOR_LINE} +{{sub_answer}} +{SEPARATOR_LINE} +""".strip() SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n @@ -58,230 +56,206 @@ Sub-Question: Q{sub_question_num}\n Type: {sub_question_type}\n Sub-Question:\n """ -# Setap/Util Prompts - -ENTITY_TERM_EXTRACTION_PROMPT = """ \n +# Step/Utility Prompts +ENTITY_TERM_EXTRACTION_PROMPT = f""" Based on the original question and some context retrieved from a dataset, please generate a list of entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts (e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other. -\n\n Here is the original question: -\n-------\n -{question} -\n-------\n +{SEPARATOR_LINE} +{{question}} +{SEPARATOR_LINE} + And here is the context retrieved: -\n-------\n -{context} -\n-------\n +{SEPARATOR_LINE} +{{context}} +{SEPARATOR_LINE} Please format your answer as a json object in the following format: - -{{"retrieved_entities_relationships": {{ - "entities": [{{ - "entity_name": , - "entity_type": - }}], - "relationships": [{{ - "relationship_name": , - "relationship_type": , - "relationship_entities": [, , ...] - }}], - "terms": [{{ - "term_name": , - "term_type": , - "term_similar_to": - }}] +{{ + "retrieved_entities_relationships": {{ + "entities": [ + {{ + "entity_name": "", + "entity_type": "" + }} + ], + "relationships": [ + {{ + "relationship_name": "", + "relationship_type": "", + "relationship_entities": ["", "", "..."] + }} + ], + "terms": [ + {{ + "term_name": "", + "term_type": "", + "term_similar_to": [""] + }} + ] + }} }} -}} - """ +""".strip() -HISTORY_CONTEXT_SUMMARY_PROMPT = """\n -{persona_specification} -Your task now is to summarize the key parts of the history of a conversation between a user and an agent. The -summary has two purposes: - 1) providing the suitable context for a new question, and - 2) To capture the key information that was discussed and that the user may have a follow-up question about. -\n-------\n -{question} -\n-------\n +HISTORY_CONTEXT_SUMMARY_PROMPT = ( + "{persona_specification}\n\n" + "Your task now is to summarize the key parts of the history of a conversation between a user and an agent." + " The summary has two purposes:\n" + " 1) providing the suitable context for a new question, and\n" + " 2) To capture the key information that was discussed and that the user may have a follow-up question about.\n\n" + "Here is the question:\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "And here is the history:\n" + f"{SEPARATOR_LINE}\n" + "{history}\n" + f"{SEPARATOR_LINE}\n\n" + "Please provide a summarized context from the history so that the question makes sense and can" + " - with suitable extra information - be answered.\n\n" + "Do not use more than three or four sentences.\n\n" + "History summary:" +).strip() -And here is the history: -\n-------\n -{history} -\n-------\n - -Please provide a summarized context from the history so that the question makes sense and can - with -suitable extra information - be answered. - -Please do not use more than three or four sentences. - -History summary: -""" # INITIAL PHASE +# Sub-question +# Intentionally left a copy in case we want to modify this one differently +INITIAL_QUESTION_DECOMPOSITION_PROMPT = ( + "Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the" + " original question. The purpose for this decomposition may be to:\n" + " 1) isolate individual entities (i.e., 'compare sales of company A and company B' ->" + " ['what are sales for company A', 'what are sales for company B'])\n" + " 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->" + " ['what are our sales with company A','what is our market share with company A'," + " 'is company A a reference customer for us', etc.])\n" + " 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you" + " are generally familiar with the entity, then you can decompose the question into sub-questions that are more" + " specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve" + " scalability of product X', 'what do we do to improve stability of product X', ...])\n" + " 4) research an area that could really help to answer the question.\n\n" + "Here is the initial question to decompose:\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "{history}\n\n" + "Please formulate your answer as a newline-separated list of questions like so:\n" + " \n" + " \n" + " \n" + " ...\n\n" + "Answer:" +).strip() -## Sub-question +INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = ( + "Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the" + " original question. The purpose for this decomposition may be to:\n" + " 1) isolate individual entities (i.e., 'compare sales of company A and company B' ->" + " ['what are sales for company A', 'what are sales for company B'])\n" + " 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->" + " ['what are our sales with company A','what is our market share with company A'," + " 'is company A a reference customer for us', etc.])\n" + " 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you" + " are generally familiar with the entity, then you can decompose the question into sub-questions that are more" + " specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve" + " scalability of product X', 'what do we do to improve stability of product X', ...])\n" + " 4) research an area that could really help to answer the question.\n\n" + "To give you some context, you will see below also some documents that may relate to the question. Please only" + " use this information to learn what the question is approximately asking about, but do not focus on the details" + " to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may" + " not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions!" + " Decomposition and disambiguations are most important!\n\n" + "Here are the sample docs to give you some context:\n" + f"{SEPARATOR_LINE}\n" + "{sample_doc_str}\n" + f"{SEPARATOR_LINE}\n\n" + "And here is the initial question to decompose:\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "{history}\n\n" + "Please formulate your answer as a newline-separated list of questions like so:\n" + " \n" + " \n" + " \n" + " ...\n\n" + "Answer:" +).strip() -INITIAL_QUESTION_DECOMPOSITION_PROMPT = """ -If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to -answer the original question. The purpose for this decomposition may be to - 1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A', - 'what are sales for company B')] - 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A', - 'what is our market share with company A', 'is company A a reference customer for us', etc.]) - 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally - familiar with the entity, then you can decompose the question into sub-questions that are more specific to components - (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X', - 'what do we do to improve stability of product X', ...]) - 4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.) - -If you think that a decomposition is not needed or helpful, please just return an empty string. That is ok too. - -Here is the initial question: -\n-------\n -{question} -\n-------\n -{history} - -Please formulate your answer as a newline-separated list of questions like so: - - - - -Answer:""" - - -INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = """ -If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to -answer the original question. The purpose for this decomposition may be to - 1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A', - 'what are sales for company B')] - 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A', - 'what is our market share with company A', 'is company A a reference customer for us', etc.]) - 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally - familiar with the entity, then you can decompose the question into sub-questions that are more specific to components - (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X', - 'what do we do to improve stability of product X', ...]) - 4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.) - -Here are some other rules: - -1) To give you some context, you will see below also some documents that relate to the question. Please only -use this information to learn what the question is approximately asking about, but do not focus on the details -to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may -not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! Decomposition and -disambiguations are most important! -2) If you think that a decomposition is not needed or helpful, please just return an empty string. That is very much ok too. - -Here are the sample docs to give you some context: -\n-------\n -{sample_doc_str} -\n-------\n - -And here is the initial question that you should think about decomposing: -\n-------\n -{question} -\n-------\n - -{history} - -Please formulate your answer as a newline-separated list of questions like so: - - - - ... - -Answer:""" # Retrieval +QUERY_REWRITING_PROMPT = ( + "Please convert the initial user question into a 2-3 more appropriate short and pointed search queries for" + " retrieval from a document store. Particularly, try to think about resolving ambiguities and make the search" + " queries more specific, enabling the system to search more broadly.\n" + "Also, try to make the search queries not redundant, i.e. not too similar!\n\n" + "Here is the initial question:\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:\n" + "\n" + "\n" + "...\n\n" + "Queries:" +) -QUERY_REWRITING_PROMPT = """ \n -Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrivel from a -document store. Particularly, try to think about resolving ambiguities and make the search queries more specific, -enabling the system to search more broadly. -Also, try to make the search queries not redundant, i.e. not too similar! \n\n -Here is the initial question: -\n-------\n -{question} -\n-------\n -Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows: - - -... -queries: """ -DOCUMENT_VERIFICATION_PROMPT = """ -You are supposed to judge whether a document text contains data or information that is potentially relevant -for a question. It does not have to be fully relevant, but check whether it has some information that -would help - possibly in conjunction with other documents - to address the question. +DOCUMENT_VERIFICATION_PROMPT = ( + "Determine whether the following document text contains data or information that is potentially relevant " + "for a question. It does not have to be fully relevant, but check whether it has some information that " + "would help - possibly in conjunction with other documents - to address the question.\n\n" + "Be careful that you do not use a document where you are not sure whether the text applies to the objects " + "or entities that are relevant for the question. For example, a book about chess could have long passage " + "discussing the psychology of chess without - within the passage - mentioning chess. If now a question " + "is asked about the psychology of football, one could be tempted to use the document as it does discuss " + "psychology in sports. However, it is NOT about football and should not be deemed relevant. Please " + "consider this logic.\n\n" + "DOCUMENT TEXT:\n" + f"{SEPARATOR_LINE}\n" + "{document_content}\n" + f"{SEPARATOR_LINE}\n\n" + "Do you think that this document text is useful and relevant to answer the following question?\n\n" + "QUESTION:\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "Please answer with exactly and only a 'yes' or 'no':\n\n" + "Answer:" +).strip() -Be careful that you do not use a document where you are not sure whether the text applies to the objects -or entities that are relevant for the question. For example, a book about chess could have long passage -discussing the psychology of chess without - within the passage - mentioning chess. If now a question -is asked about the psychology of football, one could be tempted to use the document as it does discuss -psychology in sports. However, it is NOT about football and should not be deemed relevant. Please -consider this logic. - -Here is a document text that you can take as a fact: - -DOCUMENT INFORMATION: -\n-------\n -{document_content} -\n-------\n - -Do you think that this document text is useful and relevant to answer the following question? - -QUESTION: -\n-------\n -{question} -\n-------\n - -Please answer with 'yes' or 'no': - -Answer: - -""" # Sub-Question Anser Generation - SUB_QUESTION_RAG_PROMPT = ( - """ \n -{date_prompt} -Use the context provided below - and only the -provided context - to answer the given question. (Note that the answer is in service of answering a broader -question, given below as 'motivation'.) - -Again, only use the provided context and do not use your internal knowledge! If you cannot answer the -question based on the context, say """ - + f'"{UNKNOWN_ANSWER}"' - + """. It is a matter of life and death that you do NOT -use your internal knowledge, just the provided information! - -Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. -(But keep other details as well.) - -It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! -It is important that the citation is close to the information it supports. -Proper citations are very important to the user!\n\n\n - -For your general information, here is the ultimate motivation: -\n--\n -{original_question} -\n--\n -\n -And here is the actual question I want you to answer based on the context above (with the motivation in mind): -\n--\n {question} \n--\n - -Here is the context: -\n\n\n--\n {context} \n--\n -Please keep your answer brief and concise, and focus on facts and data. - -Answer: -""" -) + "Use the context provided below - and only the provided context - to answer the given question. " + "(Note that the answer is in service of answering a broader question, given below as 'motivation'.)\n\n" + "Again, only use the provided context and do not use your internal knowledge! If you cannot answer the " + f'question based on the context, say "{UNKNOWN_ANSWER}". It is a matter of life and death that you do NOT ' + "use your internal knowledge, just the provided information!\n\n" + "Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. " + "(But keep other details as well.)\n\n" + "It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! " + "It is important that the citation is close to the information it supports. " + "Proper citations are very important to the user!\n\n" + "For your general information, here is the ultimate motivation:\n" + f"{SEPARATOR_LINE}\n" + "{original_question}\n" + f"{SEPARATOR_LINE}\n\n" + "And here is the actual question I want you to answer based on the context above (with the motivation in mind):\n" + f"{SEPARATOR_LINE}\n" + "{question}\n" + f"{SEPARATOR_LINE}\n\n" + "Here is the context:\n" + f"{SEPARATOR_LINE}\n" + "{context}\n" + f"{SEPARATOR_LINE}\n\n" + "Please keep your answer brief and concise, and focus on facts and data.\n\n" + "Answer:" +).strip() SUB_ANSWER_CHECK_PROMPT = ( @@ -301,8 +275,8 @@ Does the suggested answer address the question? Please answer with """ + f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".' ) -# Initial Answer Generation +# Initial Answer Generation INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS = ( """ \n {persona_specification}