Partial Prompt Updates (#3880)

This commit is contained in:
Yuhong Sun 2025-02-02 14:21:23 -08:00 committed by Evan Lohn
parent 9b6e51b4fe
commit a067b32467
16 changed files with 280 additions and 310 deletions

View File

@ -12,12 +12,12 @@ from onyx.agents.agent_search.deep_search.initial.generate_individual_sub_answer
SubQuestionAnswerCheckUpdate, SubQuestionAnswerCheckUpdate,
) )
from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_ANSWER_CHECK_PROMPT
from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
) )
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.prompts.agent_search import SUB_ANSWER_CHECK_PROMPT
from onyx.prompts.agent_search import UNKNOWN_ANSWER
def check_sub_answer( def check_sub_answer(

View File

@ -16,7 +16,6 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_sub_question_answer_prompt, build_sub_question_answer_prompt,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import NO_RECOVERED_DOCS
from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids from onyx.agents.agent_search.shared_graph_utils.utils import get_answer_citation_ids
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
@ -31,6 +30,7 @@ from onyx.chat.models import StreamStopInfo
from onyx.chat.models import StreamStopReason from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType from onyx.chat.models import StreamType
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.prompts.agent_search import NO_RECOVERED_DOCS
from onyx.utils.logger import setup_logger from onyx.utils.logger import setup_logger
logger = setup_logger() logger = setup_logger()

View File

@ -30,16 +30,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import InitialAgentResul
from onyx.agents.agent_search.shared_graph_utils.operators import ( from onyx.agents.agent_search.shared_graph_utils.operators import (
dedup_inference_sections, dedup_inference_sections,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import (
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
SUB_QUESTION_ANSWER_TEMPLATE,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
dispatch_main_answer_stop_info, dispatch_main_answer_stop_info,
) )
@ -57,6 +47,16 @@ from onyx.chat.models import ExtendedToolResponse
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.context.search.models import InferenceSection from onyx.context.search.models import InferenceSection
from onyx.prompts.agent_search import (
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS,
)
from onyx.prompts.agent_search import (
INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS,
)
from onyx.prompts.agent_search import (
SUB_QUESTION_ANSWER_TEMPLATE,
)
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses from onyx.tools.tool_implementations.search.search_tool import yield_search_responses

View File

@ -24,17 +24,15 @@ from onyx.agents.agent_search.deep_search.main.states import (
from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import ( from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.models import (
QuestionRetrievalResult, QuestionRetrievalResult,
) )
from onyx.context.search.models import InferenceSection
### States ### ### States ###
class SubQuestionRetrievalInput(CoreState): class SubQuestionRetrievalInput(CoreState):
pass exploratory_search_results: list[InferenceSection]
## Graph State ## Graph State
class SubQuestionRetrievalState( class SubQuestionRetrievalState(
# This includes the core state # This includes the core state
SubQuestionRetrievalInput, SubQuestionRetrievalInput,
@ -48,8 +46,6 @@ class SubQuestionRetrievalState(
base_raw_search_result: Annotated[list[QuestionRetrievalResult], add] base_raw_search_result: Annotated[list[QuestionRetrievalResult], add]
## Graph Output State - presently not used ## Graph Output State
class SubQuestionRetrievalOutput(TypedDict): class SubQuestionRetrievalOutput(TypedDict):
log_messages: list[str] log_messages: list[str]

View File

@ -22,12 +22,6 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_history_prompt, build_history_prompt,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import (
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
INITIAL_QUESTION_DECOMPOSITION_PROMPT,
)
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
@ -38,6 +32,15 @@ from onyx.chat.models import StreamStopReason
from onyx.chat.models import StreamType from onyx.chat.models import StreamType
from onyx.chat.models import SubQuestionPiece from onyx.chat.models import SubQuestionPiece
from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION from onyx.configs.agent_configs import AGENT_NUM_DOCS_FOR_DECOMPOSITION
from onyx.prompts.agent_search import (
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH,
)
from onyx.prompts.agent_search import (
INITIAL_QUESTION_DECOMPOSITION_PROMPT,
)
from onyx.utils.logger import setup_logger
logger = setup_logger()
def decompose_orig_question( def decompose_orig_question(
@ -63,6 +66,12 @@ def decompose_orig_question(
# Initial search to inform decomposition. Just get top 3 fits # Initial search to inform decomposition. Just get top 3 fits
if perform_initial_search_decomposition: if perform_initial_search_decomposition:
# Due to unfortunate state representation in LangGraph, we need here to double check that the retrieval has
# happened prior to this point, allowing silent failure here since it is not critical for decomposition in
# all queries.
if not state.exploratory_search_results:
logger.error("Initial search for decomposition failed")
sample_doc_str = "\n\n".join( sample_doc_str = "\n\n".join(
[ [
doc.combined_content doc.combined_content

View File

@ -10,17 +10,15 @@ from onyx.agents.agent_search.deep_search.main.states import (
from onyx.agents.agent_search.deep_search.main.states import ( from onyx.agents.agent_search.deep_search.main.states import (
SubQuestionResultsUpdate, SubQuestionResultsUpdate,
) )
from onyx.context.search.models import InferenceSection
### States ### ### States ###
class SubQuestionAnsweringInput(CoreState): class SubQuestionAnsweringInput(CoreState):
pass exploratory_search_results: list[InferenceSection]
## Graph State ## Graph State
class SubQuestionAnsweringState( class SubQuestionAnsweringState(
# This includes the core state # This includes the core state
SubQuestionAnsweringInput, SubQuestionAnsweringInput,
@ -31,8 +29,6 @@ class SubQuestionAnsweringState(
pass pass
## Graph Output State - presently not used ## Graph Output State
class SubQuestionAnsweringOutput(TypedDict): class SubQuestionAnsweringOutput(TypedDict):
log_messages: list[str] log_messages: list[str]

View File

@ -10,14 +10,14 @@ from onyx.agents.agent_search.deep_search.main.states import (
) )
from onyx.agents.agent_search.deep_search.main.states import MainState from onyx.agents.agent_search.deep_search.main.states import MainState
from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.prompts import (
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
)
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
) )
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import RefinedAnswerImprovement from onyx.chat.models import RefinedAnswerImprovement
from onyx.prompts.agent_search import (
INITIAL_REFINED_ANSWER_COMPARISON_PROMPT,
)
def compare_answers( def compare_answers(

View File

@ -20,9 +20,6 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
build_history_prompt, build_history_prompt,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import (
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
)
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
format_entity_term_extraction, format_entity_term_extraction,
@ -32,6 +29,9 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
) )
from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id from onyx.agents.agent_search.shared_graph_utils.utils import make_question_id
from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.prompts.agent_search import (
REFINEMENT_QUESTION_DECOMPOSITION_PROMPT,
)
from onyx.tools.models import ToolCallKickoff from onyx.tools.models import ToolCallKickoff

View File

@ -18,19 +18,14 @@ from onyx.agents.agent_search.shared_graph_utils.models import EntityExtractionR
from onyx.agents.agent_search.shared_graph_utils.models import ( from onyx.agents.agent_search.shared_graph_utils.models import (
EntityRelationshipTermExtraction, EntityRelationshipTermExtraction,
) )
from onyx.agents.agent_search.shared_graph_utils.models import Relationship
from onyx.agents.agent_search.shared_graph_utils.models import Term
from onyx.agents.agent_search.shared_graph_utils.prompts import (
ENTITY_TERM_EXTRACTION_PROMPT,
)
from onyx.agents.agent_search.shared_graph_utils.utils import format_docs from onyx.agents.agent_search.shared_graph_utils.utils import format_docs
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
) )
from onyx.configs.constants import NUM_EXPLORATORY_DOCS from onyx.configs.constants import NUM_EXPLORATORY_DOCS
from onyx.prompts.agent_search import (
ENTITY_TERM_EXTRACTION_PROMPT,
)
def extract_entities_terms( def extract_entities_terms(

View File

@ -28,16 +28,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import RefinedAgentStats
from onyx.agents.agent_search.shared_graph_utils.operators import ( from onyx.agents.agent_search.shared_graph_utils.operators import (
dedup_inference_sections, dedup_inference_sections,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import (
REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import UNKNOWN_ANSWER
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
dispatch_main_answer_stop_info, dispatch_main_answer_stop_info,
) )
@ -55,6 +45,16 @@ from onyx.chat.models import AgentAnswerPiece
from onyx.chat.models import ExtendedToolResponse from onyx.chat.models import ExtendedToolResponse
from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS from onyx.configs.agent_configs import AGENT_MAX_ANSWER_CONTEXT_DOCS
from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS from onyx.configs.agent_configs import AGENT_MIN_ORIG_QUESTION_DOCS
from onyx.prompts.agent_search import (
REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS,
)
from onyx.prompts.agent_search import (
REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS,
)
from onyx.prompts.agent_search import (
SUB_QUESTION_ANSWER_TEMPLATE_REFINED,
)
from onyx.prompts.agent_search import UNKNOWN_ANSWER
from onyx.tools.tool_implementations.search.search_tool import yield_search_responses from onyx.tools.tool_implementations.search.search_tool import yield_search_responses

View File

@ -16,14 +16,14 @@ from onyx.agents.agent_search.deep_search.shared.expanded_retrieval.states impor
QueryExpansionUpdate, QueryExpansionUpdate,
) )
from onyx.agents.agent_search.models import GraphConfig from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.prompts import (
QUERY_REWRITING_PROMPT,
)
from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated from onyx.agents.agent_search.shared_graph_utils.utils import dispatch_separated
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_langgraph_node_log_string, get_langgraph_node_log_string,
) )
from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id from onyx.agents.agent_search.shared_graph_utils.utils import parse_question_id
from onyx.prompts.agent_search import (
QUERY_REWRITING_PROMPT,
)
def expand_queries( def expand_queries(

View File

@ -13,7 +13,7 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import ( from onyx.agents.agent_search.shared_graph_utils.agent_prompt_ops import (
trim_prompt_piece, trim_prompt_piece,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import ( from onyx.prompts.agent_search import (
DOCUMENT_VERIFICATION_PROMPT, DOCUMENT_VERIFICATION_PROMPT,
) )

View File

@ -7,8 +7,6 @@ from onyx.agents.agent_search.models import GraphConfig
from onyx.agents.agent_search.shared_graph_utils.models import ( from onyx.agents.agent_search.shared_graph_utils.models import (
AgentPromptEnrichmentComponents, AgentPromptEnrichmentComponents,
) )
from onyx.agents.agent_search.shared_graph_utils.prompts import HISTORY_FRAMING_PROMPT
from onyx.agents.agent_search.shared_graph_utils.prompts import SUB_QUESTION_RAG_PROMPT
from onyx.agents.agent_search.shared_graph_utils.utils import ( from onyx.agents.agent_search.shared_graph_utils.utils import (
get_persona_agent_prompt_expressions, get_persona_agent_prompt_expressions,
) )
@ -20,6 +18,8 @@ from onyx.llm.interfaces import LLMConfig
from onyx.llm.utils import get_max_input_tokens from onyx.llm.utils import get_max_input_tokens
from onyx.natural_language_processing.utils import get_tokenizer from onyx.natural_language_processing.utils import get_tokenizer
from onyx.natural_language_processing.utils import tokenizer_trim_content from onyx.natural_language_processing.utils import tokenizer_trim_content
from onyx.prompts.agent_search import HISTORY_FRAMING_PROMPT
from onyx.prompts.agent_search import SUB_QUESTION_RAG_PROMPT
from onyx.prompts.prompt_utils import build_date_time_string from onyx.prompts.prompt_utils import build_date_time_string

View File

@ -24,15 +24,6 @@ from onyx.agents.agent_search.shared_graph_utils.models import (
EntityRelationshipTermExtraction, EntityRelationshipTermExtraction,
) )
from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions from onyx.agents.agent_search.shared_graph_utils.models import PersonaPromptExpressions
from onyx.agents.agent_search.shared_graph_utils.prompts import (
ASSISTANT_SYSTEM_PROMPT_DEFAULT,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
ASSISTANT_SYSTEM_PROMPT_PERSONA,
)
from onyx.agents.agent_search.shared_graph_utils.prompts import (
HISTORY_CONTEXT_SUMMARY_PROMPT,
)
from onyx.chat.models import AnswerPacket from onyx.chat.models import AnswerPacket
from onyx.chat.models import AnswerStyleConfig from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import CitationConfig from onyx.chat.models import CitationConfig
@ -56,6 +47,15 @@ from onyx.db.engine import get_session_context_manager
from onyx.db.persona import get_persona_by_id from onyx.db.persona import get_persona_by_id
from onyx.db.persona import Persona from onyx.db.persona import Persona
from onyx.llm.interfaces import LLM from onyx.llm.interfaces import LLM
from onyx.prompts.agent_search import (
ASSISTANT_SYSTEM_PROMPT_DEFAULT,
)
from onyx.prompts.agent_search import (
ASSISTANT_SYSTEM_PROMPT_PERSONA,
)
from onyx.prompts.agent_search import (
HISTORY_CONTEXT_SUMMARY_PROMPT,
)
from onyx.tools.force import ForceUseTool from onyx.tools.force import ForceUseTool
from onyx.tools.tool_constructor import SearchToolConfig from onyx.tools.tool_constructor import SearchToolConfig
from onyx.tools.tool_implementations.search.search_tool import ( from onyx.tools.tool_implementations.search.search_tool import (

View File

@ -8,7 +8,7 @@ AGENT_DEFAULT_RERANKING_HITS = 10
AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8 AGENT_DEFAULT_SUB_QUESTION_MAX_CONTEXT_HITS = 8
AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3 AGENT_DEFAULT_NUM_DOCS_FOR_INITIAL_DECOMPOSITION = 3
AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5 AGENT_DEFAULT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = 5
AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 3 AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS = 5
AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3 AGENT_DEFAULT_MIN_ORIG_QUESTION_DOCS = 3
AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10 AGENT_DEFAULT_MAX_ANSWER_CONTEXT_DOCS = 10
AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000 AGENT_DEFAULT_MAX_STATIC_HISTORY_WORD_LENGTH = 2000
@ -59,7 +59,7 @@ AGENT_NUM_DOCS_FOR_REFINED_DECOMPOSITION = int(
AGENT_EXPLORATORY_SEARCH_RESULTS = int( AGENT_EXPLORATORY_SEARCH_RESULTS = int(
os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS") os.environ.get("AGENT_EXPLORATORY_SEARCH_RESULTS")
or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS or AGENT_DEFAULT_EXPLORATORY_SEARCH_RESULTS
) # 3 ) # 5
AGENT_MIN_ORIG_QUESTION_DOCS = int( AGENT_MIN_ORIG_QUESTION_DOCS = int(
os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS") os.environ.get("AGENT_MIN_ORIG_QUESTION_DOCS")

View File

@ -1,55 +1,53 @@
# The prompts for the agentic framework. The order follows approximately the order
# of the actions in the graph
# Standards # Standards
SEPARATOR_LINE = "-------"
UNKNOWN_ANSWER = "I do not have enough information to answer this question." UNKNOWN_ANSWER = "I do not have enough information to answer this question."
NO_RECOVERED_DOCS = "No relevant information recovered" NO_RECOVERED_DOCS = "No relevant information recovered"
DATE_PROMPT = "Today is {date}.\n\n"
DATE_PROMPT = """Today is {date}.\n\n"""
SUB_CHECK_YES = "yes" SUB_CHECK_YES = "yes"
SUB_CHECK_NO = "no" SUB_CHECK_NO = "no"
# Framing/Support/Template Prompts # Framing/Support/Template Prompts
HISTORY_FRAMING_PROMPT = f"""
HISTORY_FRAMING_PROMPT = """\n
For more context, here is the history of the conversation so far that preceded this question: For more context, here is the history of the conversation so far that preceded this question:
\n-------\n {SEPARATOR_LINE}
{history} {{history}}
\n-------\n\n {SEPARATOR_LINE}
""" """.strip()
ASSISTANT_SYSTEM_PROMPT_DEFAULT = """
You are an assistant for question-answering tasks."""
ASSISTANT_SYSTEM_PROMPT_PERSONA = """ ASSISTANT_SYSTEM_PROMPT_DEFAULT = (
"""You are an assistant for question-answering tasks."""
)
ASSISTANT_SYSTEM_PROMPT_PERSONA = f"""
You are an assistant for question-answering tasks. Here is more information about you: You are an assistant for question-answering tasks. Here is more information about you:
\n-------\n {SEPARATOR_LINE}
{persona_prompt} {{persona_prompt}}
\n-------\n {SEPARATOR_LINE}
""" """.strip()
SUB_QUESTION_ANSWER_TEMPLATE = """\n SUB_QUESTION_ANSWER_TEMPLATE = """\n
Sub-Question: Q{sub_question_num}\n Sub-Question:\n - \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n Sub-Question: Q{sub_question_num}\n Sub-Question:\n - \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n
""" """
SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n
Sub-Question: Q{sub_question_num}\n SUB_QUESTION_ANSWER_TEMPLATE_REFINED = f"""
Sub-Question: Q{{sub_question_num}}\n
Type: Type:
\n----\n {SEPARATOR_LINE}
{sub_question_type} {{sub_question_type}}
\n----\n {SEPARATOR_LINE}
Sub-Question: Sub-Question:
\n----\n {SEPARATOR_LINE}
{sub_question} {{sub_question}}
\n----\n {SEPARATOR_LINE}
\nAnswer: Answer:
\n----\n {SEPARATOR_LINE}
{sub_answer} {{sub_answer}}
\n----\n {SEPARATOR_LINE}
\n """.strip()
"""
SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n SUB_QUESTION_ANSWER_TEMPLATE_REFINED = """\n
@ -58,230 +56,206 @@ Sub-Question: Q{sub_question_num}\n Type: {sub_question_type}\n Sub-Question:\n
""" """
# Setap/Util Prompts # Step/Utility Prompts
ENTITY_TERM_EXTRACTION_PROMPT = f"""
ENTITY_TERM_EXTRACTION_PROMPT = """ \n
Based on the original question and some context retrieved from a dataset, please generate a list of Based on the original question and some context retrieved from a dataset, please generate a list of
entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
(e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other. (e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
\n\n
Here is the original question: Here is the original question:
\n-------\n {SEPARATOR_LINE}
{question} {{question}}
\n-------\n {SEPARATOR_LINE}
And here is the context retrieved: And here is the context retrieved:
\n-------\n {SEPARATOR_LINE}
{context} {{context}}
\n-------\n {SEPARATOR_LINE}
Please format your answer as a json object in the following format: Please format your answer as a json object in the following format:
{{
{{"retrieved_entities_relationships": {{ "retrieved_entities_relationships": {{
"entities": [{{ "entities": [
"entity_name": <assign a name for the entity>, {{
"entity_type": <specify a short type name for the entity, such as 'company', 'location',...> "entity_name": "<assign a name for the entity>",
}}], "entity_type": "<specify a short type name for the entity, such as 'company', 'location',...>"
"relationships": [{{ }}
"relationship_name": <assign a name for the relationship>, ],
"relationship_type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>, "relationships": [
"relationship_entities": [<related entity name 1>, <related entity name 2>, ...] {{
}}], "relationship_name": "<assign a name for the relationship>",
"terms": [{{ "relationship_type": "<specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>",
"term_name": <assign a name for the term>, "relationship_entities": ["<related entity name 1>", "<related entity name 2>", "..."]
"term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>, }}
"term_similar_to": <list terms that are similar to this term> ],
}}] "terms": [
{{
"term_name": "<assign a name for the term>",
"term_type": "<specify a short type name for the term, such as 'revenue', 'market_share',...>",
"term_similar_to": ["<list terms that are similar to this term>"]
}}
]
}}
}} }}
}} """.strip()
"""
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
{persona_specification}
Your task now is to summarize the key parts of the history of a conversation between a user and an agent. The HISTORY_CONTEXT_SUMMARY_PROMPT = (
summary has two purposes: "{persona_specification}\n\n"
1) providing the suitable context for a new question, and "Your task now is to summarize the key parts of the history of a conversation between a user and an agent."
2) To capture the key information that was discussed and that the user may have a follow-up question about. " The summary has two purposes:\n"
\n-------\n " 1) providing the suitable context for a new question, and\n"
{question} " 2) To capture the key information that was discussed and that the user may have a follow-up question about.\n\n"
\n-------\n "Here is the question:\n"
f"{SEPARATOR_LINE}\n"
"{question}\n"
f"{SEPARATOR_LINE}\n\n"
"And here is the history:\n"
f"{SEPARATOR_LINE}\n"
"{history}\n"
f"{SEPARATOR_LINE}\n\n"
"Please provide a summarized context from the history so that the question makes sense and can"
" - with suitable extra information - be answered.\n\n"
"Do not use more than three or four sentences.\n\n"
"History summary:"
).strip()
And here is the history:
\n-------\n
{history}
\n-------\n
Please provide a summarized context from the history so that the question makes sense and can - with
suitable extra information - be answered.
Please do not use more than three or four sentences.
History summary:
"""
# INITIAL PHASE # INITIAL PHASE
# Sub-question
# Intentionally left a copy in case we want to modify this one differently
INITIAL_QUESTION_DECOMPOSITION_PROMPT = (
"Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the"
" original question. The purpose for this decomposition may be to:\n"
" 1) isolate individual entities (i.e., 'compare sales of company A and company B' ->"
" ['what are sales for company A', 'what are sales for company B'])\n"
" 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->"
" ['what are our sales with company A','what is our market share with company A',"
" 'is company A a reference customer for us', etc.])\n"
" 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you"
" are generally familiar with the entity, then you can decompose the question into sub-questions that are more"
" specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve"
" scalability of product X', 'what do we do to improve stability of product X', ...])\n"
" 4) research an area that could really help to answer the question.\n\n"
"Here is the initial question to decompose:\n"
f"{SEPARATOR_LINE}\n"
"{question}\n"
f"{SEPARATOR_LINE}\n\n"
"{history}\n\n"
"Please formulate your answer as a newline-separated list of questions like so:\n"
" <sub-question>\n"
" <sub-question>\n"
" <sub-question>\n"
" ...\n\n"
"Answer:"
).strip()
## Sub-question INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = (
"Decompose the initial user question into no more than 3 appropriate sub-questions that help to answer the"
" original question. The purpose for this decomposition may be to:\n"
" 1) isolate individual entities (i.e., 'compare sales of company A and company B' ->"
" ['what are sales for company A', 'what are sales for company B'])\n"
" 2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' ->"
" ['what are our sales with company A','what is our market share with company A',"
" 'is company A a reference customer for us', etc.])\n"
" 3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you"
" are generally familiar with the entity, then you can decompose the question into sub-questions that are more"
" specific to components (i.e., 'what do we do to improve scalability of product X', 'what do we to to improve"
" scalability of product X', 'what do we do to improve stability of product X', ...])\n"
" 4) research an area that could really help to answer the question.\n\n"
"To give you some context, you will see below also some documents that may relate to the question. Please only"
" use this information to learn what the question is approximately asking about, but do not focus on the details"
" to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may"
" not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions!"
" Decomposition and disambiguations are most important!\n\n"
"Here are the sample docs to give you some context:\n"
f"{SEPARATOR_LINE}\n"
"{sample_doc_str}\n"
f"{SEPARATOR_LINE}\n\n"
"And here is the initial question to decompose:\n"
f"{SEPARATOR_LINE}\n"
"{question}\n"
f"{SEPARATOR_LINE}\n\n"
"{history}\n\n"
"Please formulate your answer as a newline-separated list of questions like so:\n"
" <sub-question>\n"
" <sub-question>\n"
" <sub-question>\n"
" ...\n\n"
"Answer:"
).strip()
INITIAL_QUESTION_DECOMPOSITION_PROMPT = """
If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
answer the original question. The purpose for this decomposition may be to
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
'what are sales for company B')]
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
'what is our market share with company A', 'is company A a reference customer for us', etc.])
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
(i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
'what do we do to improve stability of product X', ...])
4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
If you think that a decomposition is not needed or helpful, please just return an empty string. That is ok too.
Here is the initial question:
\n-------\n
{question}
\n-------\n
{history}
Please formulate your answer as a newline-separated list of questions like so:
<sub-question>
<sub-question>
<sub-question>
Answer:"""
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = """
If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
answer the original question. The purpose for this decomposition may be to
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
'what are sales for company B')]
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
'what is our market share with company A', 'is company A a reference customer for us', etc.])
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
(i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
'what do we do to improve stability of product X', ...])
4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
Here are some other rules:
1) To give you some context, you will see below also some documents that relate to the question. Please only
use this information to learn what the question is approximately asking about, but do not focus on the details
to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may
not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! Decomposition and
disambiguations are most important!
2) If you think that a decomposition is not needed or helpful, please just return an empty string. That is very much ok too.
Here are the sample docs to give you some context:
\n-------\n
{sample_doc_str}
\n-------\n
And here is the initial question that you should think about decomposing:
\n-------\n
{question}
\n-------\n
{history}
Please formulate your answer as a newline-separated list of questions like so:
<sub-question>
<sub-question>
<sub-question>
...
Answer:"""
# Retrieval # Retrieval
QUERY_REWRITING_PROMPT = (
"Please convert the initial user question into a 2-3 more appropriate short and pointed search queries for"
" retrieval from a document store. Particularly, try to think about resolving ambiguities and make the search"
" queries more specific, enabling the system to search more broadly.\n"
"Also, try to make the search queries not redundant, i.e. not too similar!\n\n"
"Here is the initial question:\n"
f"{SEPARATOR_LINE}\n"
"{question}\n"
f"{SEPARATOR_LINE}\n\n"
"Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:\n"
"<query 1>\n"
"<query 2>\n"
"...\n\n"
"Queries:"
)
QUERY_REWRITING_PROMPT = """ \n
Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrivel from a
document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
enabling the system to search more broadly.
Also, try to make the search queries not redundant, i.e. not too similar! \n\n
Here is the initial question:
\n-------\n
{question}
\n-------\n
Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:
<query 1>
<query 2>
...
queries: """
DOCUMENT_VERIFICATION_PROMPT = """ DOCUMENT_VERIFICATION_PROMPT = (
You are supposed to judge whether a document text contains data or information that is potentially relevant "Determine whether the following document text contains data or information that is potentially relevant "
for a question. It does not have to be fully relevant, but check whether it has some information that "for a question. It does not have to be fully relevant, but check whether it has some information that "
would help - possibly in conjunction with other documents - to address the question. "would help - possibly in conjunction with other documents - to address the question.\n\n"
"Be careful that you do not use a document where you are not sure whether the text applies to the objects "
"or entities that are relevant for the question. For example, a book about chess could have long passage "
"discussing the psychology of chess without - within the passage - mentioning chess. If now a question "
"is asked about the psychology of football, one could be tempted to use the document as it does discuss "
"psychology in sports. However, it is NOT about football and should not be deemed relevant. Please "
"consider this logic.\n\n"
"DOCUMENT TEXT:\n"
f"{SEPARATOR_LINE}\n"
"{document_content}\n"
f"{SEPARATOR_LINE}\n\n"
"Do you think that this document text is useful and relevant to answer the following question?\n\n"
"QUESTION:\n"
f"{SEPARATOR_LINE}\n"
"{question}\n"
f"{SEPARATOR_LINE}\n\n"
"Please answer with exactly and only a 'yes' or 'no':\n\n"
"Answer:"
).strip()
Be careful that you do not use a document where you are not sure whether the text applies to the objects
or entities that are relevant for the question. For example, a book about chess could have long passage
discussing the psychology of chess without - within the passage - mentioning chess. If now a question
is asked about the psychology of football, one could be tempted to use the document as it does discuss
psychology in sports. However, it is NOT about football and should not be deemed relevant. Please
consider this logic.
Here is a document text that you can take as a fact:
DOCUMENT INFORMATION:
\n-------\n
{document_content}
\n-------\n
Do you think that this document text is useful and relevant to answer the following question?
QUESTION:
\n-------\n
{question}
\n-------\n
Please answer with 'yes' or 'no':
Answer:
"""
# Sub-Question Anser Generation # Sub-Question Anser Generation
SUB_QUESTION_RAG_PROMPT = ( SUB_QUESTION_RAG_PROMPT = (
""" \n "Use the context provided below - and only the provided context - to answer the given question. "
{date_prompt} "(Note that the answer is in service of answering a broader question, given below as 'motivation'.)\n\n"
Use the context provided below - and only the "Again, only use the provided context and do not use your internal knowledge! If you cannot answer the "
provided context - to answer the given question. (Note that the answer is in service of answering a broader f'question based on the context, say "{UNKNOWN_ANSWER}". It is a matter of life and death that you do NOT '
question, given below as 'motivation'.) "use your internal knowledge, just the provided information!\n\n"
"Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. "
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the "(But keep other details as well.)\n\n"
question based on the context, say """ "It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! "
+ f'"{UNKNOWN_ANSWER}"' "It is important that the citation is close to the information it supports. "
+ """. It is a matter of life and death that you do NOT "Proper citations are very important to the user!\n\n"
use your internal knowledge, just the provided information! "For your general information, here is the ultimate motivation:\n"
f"{SEPARATOR_LINE}\n"
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal. "{original_question}\n"
(But keep other details as well.) f"{SEPARATOR_LINE}\n\n"
"And here is the actual question I want you to answer based on the context above (with the motivation in mind):\n"
It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc! f"{SEPARATOR_LINE}\n"
It is important that the citation is close to the information it supports. "{question}\n"
Proper citations are very important to the user!\n\n\n f"{SEPARATOR_LINE}\n\n"
"Here is the context:\n"
For your general information, here is the ultimate motivation: f"{SEPARATOR_LINE}\n"
\n--\n "{context}\n"
{original_question} f"{SEPARATOR_LINE}\n\n"
\n--\n "Please keep your answer brief and concise, and focus on facts and data.\n\n"
\n "Answer:"
And here is the actual question I want you to answer based on the context above (with the motivation in mind): ).strip()
\n--\n {question} \n--\n
Here is the context:
\n\n\n--\n {context} \n--\n
Please keep your answer brief and concise, and focus on facts and data.
Answer:
"""
)
SUB_ANSWER_CHECK_PROMPT = ( SUB_ANSWER_CHECK_PROMPT = (
@ -301,8 +275,8 @@ Does the suggested answer address the question? Please answer with """
+ f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".' + f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".'
) )
# Initial Answer Generation
# Initial Answer Generation
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS = ( INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS = (
""" \n """ \n
{persona_specification} {persona_specification}