mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-17 21:32:36 +01:00
Yuhong
This commit is contained in:
parent
a067b32467
commit
506a9f1b94
@ -23,9 +23,8 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
get_langgraph_node_log_string,
|
||||
)
|
||||
from onyx.configs.constants import NUM_EXPLORATORY_DOCS
|
||||
from onyx.prompts.agent_search import (
|
||||
ENTITY_TERM_EXTRACTION_PROMPT,
|
||||
)
|
||||
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT
|
||||
from onyx.prompts.agent_search import ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE
|
||||
|
||||
|
||||
def extract_entities_terms(
|
||||
@ -58,16 +57,21 @@ def extract_entities_terms(
|
||||
# start with the entity/term/extraction
|
||||
doc_context = format_docs(initial_search_docs)
|
||||
|
||||
# Calculation here is only approximate
|
||||
doc_context = trim_prompt_piece(
|
||||
graph_config.tooling.fast_llm.config,
|
||||
doc_context,
|
||||
ENTITY_TERM_EXTRACTION_PROMPT + question,
|
||||
ENTITY_TERM_EXTRACTION_PROMPT
|
||||
+ question
|
||||
+ ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE,
|
||||
)
|
||||
|
||||
msg = [
|
||||
HumanMessage(
|
||||
content=ENTITY_TERM_EXTRACTION_PROMPT.format(
|
||||
question=question, context=doc_context
|
||||
),
|
||||
)
|
||||
+ ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE,
|
||||
)
|
||||
]
|
||||
fast_llm = graph_config.tooling.fast_llm
|
||||
|
@ -13,6 +13,7 @@ from onyx.agents.agent_search.shared_graph_utils.utils import (
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import remove_document_citations
|
||||
from onyx.agents.agent_search.shared_graph_utils.utils import summarize_history
|
||||
from onyx.configs.agent_configs import AGENT_MAX_STATIC_HISTORY_WORD_LENGTH
|
||||
from onyx.configs.constants import MessageType
|
||||
from onyx.context.search.models import InferenceSection
|
||||
from onyx.llm.interfaces import LLMConfig
|
||||
from onyx.llm.utils import get_max_input_tokens
|
||||
@ -21,6 +22,9 @@ from onyx.natural_language_processing.utils import tokenizer_trim_content
|
||||
from onyx.prompts.agent_search import HISTORY_FRAMING_PROMPT
|
||||
from onyx.prompts.agent_search import SUB_QUESTION_RAG_PROMPT
|
||||
from onyx.prompts.prompt_utils import build_date_time_string
|
||||
from onyx.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def build_sub_question_answer_prompt(
|
||||
@ -36,9 +40,9 @@ def build_sub_question_answer_prompt(
|
||||
|
||||
date_str = build_date_time_string()
|
||||
|
||||
# TODO: This should include document metadata and title
|
||||
docs_format_list = [
|
||||
f"""Document Number: [D{doc_num + 1}]\n
|
||||
Content: {doc.combined_content}\n\n"""
|
||||
f"Document Number: [D{doc_num + 1}]\nContent: {doc.combined_content}\n\n"
|
||||
for doc_num, doc in enumerate(docs)
|
||||
]
|
||||
|
||||
@ -88,7 +92,6 @@ def trim_prompt_piece(config: LLMConfig, prompt_piece: str, reserved_str: str) -
|
||||
|
||||
def build_history_prompt(config: GraphConfig, question: str) -> str:
|
||||
prompt_builder = config.inputs.prompt_builder
|
||||
model = config.tooling.fast_llm
|
||||
persona_base = get_persona_agent_prompt_expressions(
|
||||
config.inputs.search_request.persona
|
||||
).base_prompt
|
||||
@ -102,23 +105,31 @@ def build_history_prompt(config: GraphConfig, question: str) -> str:
|
||||
history_components = []
|
||||
previous_message_type = None
|
||||
for message in prompt_builder.raw_message_history:
|
||||
if "user" in message.message_type:
|
||||
if message.message_type == MessageType.USER:
|
||||
history_components.append(f"User: {message.message}\n")
|
||||
previous_message_type = "user"
|
||||
elif "assistant" in message.message_type:
|
||||
# only use the last agent answer for the history
|
||||
if previous_message_type != "assistant":
|
||||
history_components.append(f"You/Agent: {message.message}\n")
|
||||
else:
|
||||
history_components = history_components[:-1]
|
||||
history_components.append(f"You/Agent: {message.message}\n")
|
||||
previous_message_type = "assistant"
|
||||
previous_message_type = MessageType.USER
|
||||
elif message.message_type == MessageType.ASSISTANT:
|
||||
# Previously there could be multiple assistant messages in a row
|
||||
# Now this is handled at the message history construction
|
||||
assert previous_message_type is not MessageType.ASSISTANT
|
||||
history_components.append(f"You/Agent: {message.message}\n")
|
||||
previous_message_type = MessageType.ASSISTANT
|
||||
else:
|
||||
# Other message types are not included here, currently there should be no other message types
|
||||
logger.error(
|
||||
f"Unhandled message type: {message.message_type} with message: {message.message}"
|
||||
)
|
||||
continue
|
||||
|
||||
history = "\n".join(history_components)
|
||||
history = remove_document_citations(history)
|
||||
if len(history.split()) > AGENT_MAX_STATIC_HISTORY_WORD_LENGTH:
|
||||
history = summarize_history(history, question, persona_base, model)
|
||||
history = summarize_history(
|
||||
history=history,
|
||||
question=question,
|
||||
persona_specification=persona_base,
|
||||
llm=config.tooling.fast_llm,
|
||||
)
|
||||
|
||||
return HISTORY_FRAMING_PROMPT.format(history=history) if history else ""
|
||||
|
||||
|
@ -119,7 +119,7 @@ class CombinedAgentMetrics(BaseModel):
|
||||
|
||||
class PersonaPromptExpressions(BaseModel):
|
||||
contextualized_prompt: str
|
||||
base_prompt: str
|
||||
base_prompt: str | None
|
||||
|
||||
|
||||
class AgentPromptEnrichmentComponents(BaseModel):
|
||||
|
@ -56,6 +56,7 @@ from onyx.prompts.agent_search import (
|
||||
from onyx.prompts.agent_search import (
|
||||
HISTORY_CONTEXT_SUMMARY_PROMPT,
|
||||
)
|
||||
from onyx.prompts.prompt_utils import handle_onyx_date_awareness
|
||||
from onyx.tools.force import ForceUseTool
|
||||
from onyx.tools.tool_constructor import SearchToolConfig
|
||||
from onyx.tools.tool_implementations.search.search_tool import (
|
||||
@ -227,16 +228,26 @@ def get_test_config(
|
||||
def get_persona_agent_prompt_expressions(
|
||||
persona: Persona | None,
|
||||
) -> PersonaPromptExpressions:
|
||||
if persona is None:
|
||||
persona_base = ""
|
||||
persona_prompt = ASSISTANT_SYSTEM_PROMPT_DEFAULT
|
||||
else:
|
||||
persona_base = "\n".join([x.system_prompt for x in persona.prompts])
|
||||
persona_prompt = ASSISTANT_SYSTEM_PROMPT_PERSONA.format(
|
||||
persona_prompt=persona_base
|
||||
if persona is None or len(persona.prompts) == 0:
|
||||
# TODO base_prompt should be None, but no time to properly fix
|
||||
return PersonaPromptExpressions(
|
||||
contextualized_prompt=ASSISTANT_SYSTEM_PROMPT_DEFAULT, base_prompt=""
|
||||
)
|
||||
|
||||
# Only a 1:1 mapping between personas and prompts currently
|
||||
prompt = persona.prompts[0]
|
||||
prompt_config = PromptConfig.from_model(prompt)
|
||||
datetime_aware_system_prompt = handle_onyx_date_awareness(
|
||||
prompt_str=prompt_config.system_prompt,
|
||||
prompt_config=prompt_config,
|
||||
add_additional_info_if_no_tag=prompt.datetime_aware,
|
||||
)
|
||||
|
||||
return PersonaPromptExpressions(
|
||||
contextualized_prompt=persona_prompt, base_prompt=persona_base
|
||||
contextualized_prompt=ASSISTANT_SYSTEM_PROMPT_PERSONA.format(
|
||||
persona_prompt=datetime_aware_system_prompt
|
||||
),
|
||||
base_prompt=datetime_aware_system_prompt,
|
||||
)
|
||||
|
||||
|
||||
@ -322,7 +333,7 @@ def get_answer_citation_ids(answer_str: str) -> list[int]:
|
||||
|
||||
|
||||
def summarize_history(
|
||||
history: str, question: str, persona_specification: str, model: LLM
|
||||
history: str, question: str, persona_specification: str | None, llm: LLM
|
||||
) -> str:
|
||||
history_context_prompt = remove_document_citations(
|
||||
HISTORY_CONTEXT_SUMMARY_PROMPT.format(
|
||||
@ -332,7 +343,7 @@ def summarize_history(
|
||||
)
|
||||
)
|
||||
|
||||
history_response = model.invoke(history_context_prompt)
|
||||
history_response = llm.invoke(history_context_prompt)
|
||||
assert isinstance(history_response.content, str)
|
||||
return history_response.content
|
||||
|
||||
|
@ -166,6 +166,7 @@ def create_chat_chain(
|
||||
)
|
||||
|
||||
current_message: ChatMessage | None = root_message
|
||||
previous_message: ChatMessage | None = None
|
||||
while current_message is not None:
|
||||
child_msg = current_message.latest_child_message
|
||||
|
||||
@ -183,7 +184,17 @@ def create_chat_chain(
|
||||
"could not find next message in the same session"
|
||||
)
|
||||
|
||||
mainline_messages.append(current_message)
|
||||
if (
|
||||
current_message.message_type == MessageType.ASSISTANT
|
||||
and previous_message is not None
|
||||
and previous_message.message_type == MessageType.ASSISTANT
|
||||
and mainline_messages
|
||||
):
|
||||
mainline_messages[-1] = current_message
|
||||
else:
|
||||
mainline_messages.append(current_message)
|
||||
|
||||
previous_message = current_message
|
||||
|
||||
if not mainline_messages:
|
||||
raise RuntimeError("Could not trace chat message history")
|
||||
|
@ -2,7 +2,6 @@
|
||||
SEPARATOR_LINE = "-------"
|
||||
UNKNOWN_ANSWER = "I do not have enough information to answer this question."
|
||||
NO_RECOVERED_DOCS = "No relevant information recovered"
|
||||
DATE_PROMPT = "Today is {date}.\n\n"
|
||||
SUB_CHECK_YES = "yes"
|
||||
SUB_CHECK_NO = "no"
|
||||
|
||||
@ -16,9 +15,7 @@ For more context, here is the history of the conversation so far that preceded t
|
||||
""".strip()
|
||||
|
||||
|
||||
ASSISTANT_SYSTEM_PROMPT_DEFAULT = (
|
||||
"""You are an assistant for question-answering tasks."""
|
||||
)
|
||||
ASSISTANT_SYSTEM_PROMPT_DEFAULT = "You are an assistant for question-answering tasks."
|
||||
|
||||
ASSISTANT_SYSTEM_PROMPT_PERSONA = f"""
|
||||
You are an assistant for question-answering tasks. Here is more information about you:
|
||||
@ -28,21 +25,25 @@ You are an assistant for question-answering tasks. Here is more information abou
|
||||
""".strip()
|
||||
|
||||
|
||||
SUB_QUESTION_ANSWER_TEMPLATE = """\n
|
||||
Sub-Question: Q{sub_question_num}\n Sub-Question:\n - \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n
|
||||
"""
|
||||
SUB_QUESTION_ANSWER_TEMPLATE = f"""
|
||||
Sub-Question: Q{{sub_question_num}}
|
||||
Question:
|
||||
{{sub_question}}
|
||||
{SEPARATOR_LINE}
|
||||
Answer:
|
||||
{{sub_answer}}
|
||||
""".strip()
|
||||
|
||||
|
||||
SUB_QUESTION_ANSWER_TEMPLATE_REFINED = f"""
|
||||
Sub-Question: Q{{sub_question_num}}\n
|
||||
Type:
|
||||
{SEPARATOR_LINE}
|
||||
{{sub_question_type}}
|
||||
{SEPARATOR_LINE}
|
||||
Sub-Question: Q{{sub_question_num}}
|
||||
Type: {{sub_question_type}}
|
||||
|
||||
Sub-Question:
|
||||
{SEPARATOR_LINE}
|
||||
{{sub_question}}
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Answer:
|
||||
{SEPARATOR_LINE}
|
||||
{{sub_answer}}
|
||||
@ -73,30 +74,33 @@ And here is the context retrieved:
|
||||
{SEPARATOR_LINE}
|
||||
|
||||
Please format your answer as a json object in the following format:
|
||||
{{
|
||||
"retrieved_entities_relationships": {{
|
||||
""".strip()
|
||||
|
||||
ENTITY_TERM_EXTRACTION_PROMPT_JSON_EXAMPLE = """
|
||||
{
|
||||
"retrieved_entities_relationships": {
|
||||
"entities": [
|
||||
{{
|
||||
{
|
||||
"entity_name": "<assign a name for the entity>",
|
||||
"entity_type": "<specify a short type name for the entity, such as 'company', 'location',...>"
|
||||
}}
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
{{
|
||||
{
|
||||
"relationship_name": "<assign a name for the relationship>",
|
||||
"relationship_type": "<specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>",
|
||||
"relationship_entities": ["<related entity name 1>", "<related entity name 2>", "..."]
|
||||
}}
|
||||
}
|
||||
],
|
||||
"terms": [
|
||||
{{
|
||||
{
|
||||
"term_name": "<assign a name for the term>",
|
||||
"term_type": "<specify a short type name for the term, such as 'revenue', 'market_share',...>",
|
||||
"term_similar_to": ["<list terms that are similar to this term>"]
|
||||
}}
|
||||
}
|
||||
]
|
||||
}}
|
||||
}}
|
||||
}
|
||||
}
|
||||
""".strip()
|
||||
|
||||
|
||||
@ -259,79 +263,63 @@ SUB_QUESTION_RAG_PROMPT = (
|
||||
|
||||
|
||||
SUB_ANSWER_CHECK_PROMPT = (
|
||||
"""\n
|
||||
Your task is to see whether a given answer addresses a given question.
|
||||
Please do not use any internal knowledge you may have - just focus on whether the answer
|
||||
as given seems to largely address the question as given, or at least addresses part of the question.
|
||||
Here is the question:
|
||||
\n-------\n
|
||||
{question}
|
||||
\n-------\n
|
||||
Here is the suggested answer:
|
||||
\n-------\n
|
||||
{base_answer}
|
||||
\n-------\n
|
||||
Does the suggested answer address the question? Please answer with """
|
||||
+ f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".'
|
||||
)
|
||||
"Determine whether the given answer addresses the given question. "
|
||||
"Please do not use any internal knowledge you may have - just focus on whether the answer "
|
||||
"as given seems to largely address the question as given, or at least addresses part of the question.\n\n"
|
||||
"Here is the question:\n"
|
||||
f"{SEPARATOR_LINE}\n"
|
||||
"{question}\n"
|
||||
f"{SEPARATOR_LINE}\n\n"
|
||||
"Here is the suggested answer:\n"
|
||||
f"{SEPARATOR_LINE}\n"
|
||||
"{base_answer}\n"
|
||||
f"{SEPARATOR_LINE}\n\n"
|
||||
f'Does the suggested answer address the question? Please answer with "{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".'
|
||||
).strip()
|
||||
|
||||
|
||||
# Initial Answer Generation
|
||||
INITIAL_ANSWER_PROMPT_W_SUB_QUESTIONS = (
|
||||
""" \n
|
||||
{persona_specification}
|
||||
{date_prompt}
|
||||
Use the information provided below - and only the provided information - to answer the provided main question.
|
||||
|
||||
The information provided below consists of:
|
||||
1) a number of answered sub-questions - these are very important to help you organize your thoughts and your answer
|
||||
2) a number of documents that deemed relevant for the question.
|
||||
|
||||
{history}
|
||||
|
||||
It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
|
||||
It is important that the citation is close to the information it supports. If you have multiple citations that support
|
||||
a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
|
||||
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
|
||||
citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
|
||||
Again, please NEVER cite sub-questions without a document citation!
|
||||
Proper citations are very important for the user!
|
||||
|
||||
IMPORTANT RULES:
|
||||
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
||||
You may give some additional facts you learned, but do not try to invent an answer.
|
||||
- If the information is empty or irrelevant, just say """
|
||||
+ f'"{UNKNOWN_ANSWER}"'
|
||||
+ """.
|
||||
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
||||
|
||||
Again, you should be sure that the answer is supported by the information provided!
|
||||
|
||||
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
|
||||
or assumptions you made.
|
||||
|
||||
Here is the contextual information:
|
||||
---------------
|
||||
|
||||
*Answered Sub-questions (these should really matter!):
|
||||
\n-------\n
|
||||
{answered_sub_questions}
|
||||
\n-------\n
|
||||
|
||||
And here are relevant document information that support the sub-question answers, or that are relevant for the actual question:\n
|
||||
\n-------\n
|
||||
{relevant_docs}
|
||||
\n-------\n
|
||||
|
||||
And here is the question I want you to answer based on the information above:
|
||||
\n-------\n
|
||||
{question}
|
||||
\n-------\n\n
|
||||
|
||||
Please keep your answer brief and concise, and focus on facts and data.
|
||||
|
||||
Answer:"""
|
||||
)
|
||||
"{persona_specification}\n\n"
|
||||
"Use the information provided below - and only the provided information - to answer the provided main question.\n\n"
|
||||
"The information provided below consists of:\n"
|
||||
" 1) a number of answered sub-questions - these are very important to help you organize your thoughts and your answer\n"
|
||||
" 2) a number of documents that deemed relevant for the question.\n\n"
|
||||
"{history}\n\n"
|
||||
"It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!\n"
|
||||
"It is important that the citation is close to the information it supports. If you have multiple citations that support\n"
|
||||
"a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.\n"
|
||||
"Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the "
|
||||
"sub-question citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or "
|
||||
"[[D2]]()[[D7]]()[[Q4]](), etc.\n"
|
||||
"Again, please NEVER cite sub-questions without a document citation! "
|
||||
"Proper citations are very important for the user!\n\n"
|
||||
"IMPORTANT RULES:\n"
|
||||
" - If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.\n"
|
||||
" You may give some additional facts you learned, but do not try to invent an answer.\n"
|
||||
f' - If the information is empty or irrelevant, just say "{UNKNOWN_ANSWER}".\n'
|
||||
" - If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.\n\n"
|
||||
"Again, you should be sure that the answer is supported by the information provided!\n\n"
|
||||
"Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,\n"
|
||||
"or assumptions you made.\n\n"
|
||||
"Here is the contextual information:\n"
|
||||
"---------------\n\n"
|
||||
"*Answered Sub-questions (these should really matter!):\n"
|
||||
f"{SEPARATOR_LINE}\n"
|
||||
"{answered_sub_questions}\n"
|
||||
f"{SEPARATOR_LINE}\n\n"
|
||||
"And here are relevant document information that support the sub-question answers, "
|
||||
"or that are relevant for the actual question:\n"
|
||||
f"{SEPARATOR_LINE}\n"
|
||||
"{relevant_docs}\n"
|
||||
f"{SEPARATOR_LINE}\n\n"
|
||||
"And here is the question I want you to answer based on the information above:\n"
|
||||
f"{SEPARATOR_LINE}\n"
|
||||
"{question}\n"
|
||||
f"{SEPARATOR_LINE}\n\n"
|
||||
"Please keep your answer brief and concise, and focus on facts and data.\n\n"
|
||||
"Answer:"
|
||||
).strip()
|
||||
|
||||
|
||||
# used if sub_question_answer_str is empty
|
||||
@ -339,7 +327,6 @@ INITIAL_ANSWER_PROMPT_WO_SUB_QUESTIONS = (
|
||||
"""\n
|
||||
{answered_sub_questions}
|
||||
{persona_specification}
|
||||
{date_prompt}
|
||||
|
||||
Use the information provided below - and only the provided information - to answer the provided question.
|
||||
The information provided below consists of a number of documents that were deemed relevant for the question.
|
||||
@ -465,7 +452,7 @@ Generate the list of questions separated by one new line like this:
|
||||
REFINED_ANSWER_PROMPT_W_SUB_QUESTIONS = (
|
||||
"""\n
|
||||
{persona_specification}
|
||||
{date_prompt}
|
||||
|
||||
Your task is to improve on a given answer to a question, as the initial answer was found to be lacking in some way.
|
||||
|
||||
Use the information provided below - and only the provided information - to write your new and improved answer.
|
||||
@ -542,7 +529,7 @@ REFINED_ANSWER_PROMPT_WO_SUB_QUESTIONS = (
|
||||
"""\n
|
||||
{answered_sub_questions}\n
|
||||
{persona_specification}
|
||||
{date_prompt}
|
||||
|
||||
Use the information provided below - and only the
|
||||
provided information - to answer the provided question.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user