mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-19 08:10:13 +02:00
1114 lines
46 KiB
Python
1114 lines
46 KiB
Python
UNKNOWN_ANSWER = "I do not have enough information to answer this question."
|
|
|
|
NO_RECOVERED_DOCS = "No relevant information recovered"
|
|
|
|
DATE_PROMPT = """Today is {date}.\n\n"""
|
|
|
|
HISTORY_PROMPT = """\n
|
|
For more context, here is the history of the conversation so far that preceded this question:
|
|
\n ------- \n
|
|
{history}
|
|
\n ------- \n\n
|
|
"""
|
|
|
|
REWRITE_PROMPT_MULTI_ORIGINAL = """ \n
|
|
Please convert an initial user question into a 2-3 more appropriate short and pointed search queries for retrivel from a
|
|
document store. Particularly, try to think about resolving ambiguities and make the search queries more specific,
|
|
enabling the system to search more broadly.
|
|
Also, try to make the search queries not redundant, i.e. not too similar! \n\n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
Formulate the queries separated by newlines (Do not say 'Query 1: ...', just write the querytext) as follows:
|
|
<query 1>
|
|
<query 2>
|
|
...
|
|
queries: """
|
|
|
|
REWRITE_PROMPT_MULTI = """ \n
|
|
Please create a list of 2-3 sample documents that could answer an original question. Each document
|
|
should be about as long as the original question. \n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
Formulate the sample documents separated by '--' (Do not say 'Document 1: ...', just write the text): """
|
|
|
|
# The prompt is only used if there is no persona prompt, so the placeholder is ''
|
|
BASE_RAG_PROMPT = (
|
|
""" \n
|
|
{persona_specification}
|
|
{date_prompt}
|
|
Use the context provided below - and only the
|
|
provided context - to answer the given question. (Note that the answer is in service of ansering a broader
|
|
question, given below as 'motivation'.)
|
|
|
|
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
|
|
question based on the context, say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
It is a matter of life and death that you do NOT use your internal knowledge, just the provided`
|
|
information!
|
|
|
|
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
|
|
(But keep other details as well.)
|
|
|
|
\nContext:\n {context} \n
|
|
|
|
Motivation:\n {original_question} \n\n
|
|
\n\n
|
|
And here is the question I want you to answer based on the context above (with the motivation in mind):
|
|
\n--\n {question} \n--\n
|
|
"""
|
|
)
|
|
|
|
BASE_RAG_PROMPT_v2 = (
|
|
""" \n
|
|
{date_prompt}
|
|
Use the context provided below - and only the
|
|
provided context - to answer the given question. (Note that the answer is in service of answering a broader
|
|
question, given below as 'motivation'.)
|
|
|
|
Again, only use the provided context and do not use your internal knowledge! If you cannot answer the
|
|
question based on the context, say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """. It is a matter of life and death that you do NOT
|
|
use your internal knowledge, just the provided information!
|
|
|
|
Make sure that you keep all relevant information, specifically as it concerns to the ultimate goal.
|
|
(But keep other details as well.)
|
|
|
|
It is critical that you provide inline citations in the format [[D1]](), [[D2]](), [[D3]](), etc!
|
|
It is important that the citation is close to the information it supports.
|
|
Proper citations are very important to the user!\n\n\n
|
|
|
|
For your general information, here is the ultimate motivation:
|
|
\n--\n {original_question} \n--\n
|
|
\n\n
|
|
And here is the actual question I want you to answer based on the context above (with the motivation in mind):
|
|
\n--\n {question} \n--\n
|
|
|
|
Here is the context:
|
|
\n\n\n--\n {context} \n--\n
|
|
Please keep your answer brief and concise, and focus on facts and data.
|
|
|
|
Answer:
|
|
"""
|
|
)
|
|
|
|
SUB_CHECK_YES = "yes"
|
|
SUB_CHECK_NO = "no"
|
|
|
|
SUB_CHECK_PROMPT = (
|
|
"""
|
|
Your task is to see whether a given answer addresses a given question.
|
|
Please do not use any internal knowledge you may have - just focus on whether the answer
|
|
as given seems to largely address the question as given, or at least addresses part of the question.
|
|
Here is the question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
Here is the suggested answer:
|
|
\n ------- \n
|
|
{base_answer}
|
|
\n ------- \n
|
|
Does the suggested answer address the question? Please answer with """
|
|
+ f'"{SUB_CHECK_YES}" or "{SUB_CHECK_NO}".'
|
|
)
|
|
|
|
|
|
BASE_CHECK_PROMPT = """ \n
|
|
Please check whether 1) the suggested answer seems to fully address the original question AND 2)the
|
|
original question requests a simple, factual answer, and there are no ambiguities, judgements,
|
|
aggregations, or any other complications that may require extra context. (I.e., if the question is
|
|
somewhat addressed, but the answer would benefit from more context, then answer with 'no'.)
|
|
|
|
Please only answer with 'yes' or 'no' \n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
Here is the proposed answer:
|
|
\n ------- \n
|
|
{initial_answer}
|
|
\n ------- \n
|
|
Please answer with yes or no:"""
|
|
|
|
VERIFIER_PROMPT = """
|
|
You are supposed to judge whether a document text contains data or information that is potentially relevant
|
|
for a question. It does not have to be fully relevant, but check whether it has some information that
|
|
would help - possibly in conjunction with other documents - to address the question.
|
|
|
|
Here is a document text that you can take as a fact:
|
|
--
|
|
DOCUMENT INFORMATION:
|
|
{document_content}
|
|
--
|
|
|
|
Do you think that this document text is useful and relevant to answer the following question?
|
|
|
|
--
|
|
QUESTION:
|
|
{question}
|
|
--
|
|
|
|
Please answer with 'yes' or 'no':
|
|
|
|
Answer:
|
|
|
|
"""
|
|
|
|
INITIAL_DECOMPOSITION_PROMPT_BASIC = """ \n
|
|
If you think it is helpful, please decompose an initial user question into not more
|
|
than 4 appropriate sub-questions that help to answer the original question.
|
|
The purpose for this decomposition is to isolate individual entities
|
|
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
|
|
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
|
|
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
|
|
for us'), etc. Each sub-question should realistically be answerable by a good RAG system.
|
|
|
|
Importantly, if you think it is not needed or helpful, please just return an empty list. That is ok too.
|
|
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
|
|
Please formulate your answer as a list of subquestions:
|
|
|
|
Answer:
|
|
"""
|
|
|
|
REWRITE_PROMPT_SINGLE = """ \n
|
|
Please convert an initial user question into a more appropriate search query for retrieval from a
|
|
document store. \n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
|
|
Formulate the query: """
|
|
|
|
MODIFIED_RAG_PROMPT = (
|
|
"""You are an assistant for question-answering tasks. Use the context provided below
|
|
- and only this context - to answer the question. It is a matter of life and death that you do NOT
|
|
use your internal knowledge, just the provided information!
|
|
If you don't have enough information to generate an answer, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
Use three sentences maximum and keep the answer concise.
|
|
Pay also particular attention to the sub-questions and their answers, at least it may enrich the answer.
|
|
Again, only use the provided context and do not use your internal knowledge!
|
|
|
|
\nQuestion: {question}
|
|
\nContext: {combined_context} \n
|
|
|
|
Answer:"""
|
|
)
|
|
|
|
ERT_INFORMED_DEEP_DECOMPOSE_PROMPT = """ \n
|
|
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
|
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
|
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
|
were not directly answerable. Also, some entities, relationships and terms are givenm to you so that
|
|
you have an idea of how the available data looks like.
|
|
|
|
Your role is to generate 3-5 new sub-questions that would help to answer the initial question,
|
|
considering:
|
|
|
|
1) The initial question
|
|
2) The initial answer that was found to be unsatisfactory
|
|
3) The sub-questions that were answered
|
|
4) The sub-questions that were suggested but not answered
|
|
5) The entities, relationships and terms that were extracted from the context
|
|
|
|
The individual questions should be answerable by a good RAG system.
|
|
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
|
question for different entities that may be involved in the original question, but in a way that does
|
|
not duplicate questions that were already tried.
|
|
|
|
Additional Guidelines:
|
|
- The sub-questions should be specific to the question and provide richer context for the question,
|
|
resolve ambiguities, or address shortcoming of the initial answer
|
|
- Each sub-question - when answered - should be relevant for the answer to the original question
|
|
- The sub-questions should be free from comparisons, ambiguities,judgements, aggregations, or any
|
|
other complications that may require extra context.
|
|
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
|
a RAG system independently without the original question available
|
|
(Example:
|
|
- initial question: "What is the capital of France?"
|
|
- bad sub-question: "What is the name of the river there?"
|
|
- good sub-question: "What is the name of the river that flows through Paris?"
|
|
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
|
|
generate a list of dictionaries with the following format:
|
|
[{{"sub_question": <sub-question>, "explanation": <explanation>, "search_term": <rewrite the
|
|
sub-question using as a search phrase for the document store>}}, ...]
|
|
|
|
\n\n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
|
|
Here is the initial sub-optimal answer:
|
|
\n ------- \n
|
|
{base_answer}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were answered:
|
|
\n ------- \n
|
|
{answered_sub_questions}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were suggested but not answered:
|
|
\n ------- \n
|
|
{failed_sub_questions}
|
|
\n ------- \n
|
|
|
|
And here are the entities, relationships and terms extracted from the context:
|
|
\n ------- \n
|
|
{entity_term_extraction_str}
|
|
\n ------- \n
|
|
|
|
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
|
main question. Again, please find questions that are NOT overlapping too much with the already answered
|
|
sub-questions or those that already were suggested and failed.
|
|
In other words - what can we try in addition to what has been tried so far?
|
|
|
|
Please think through it step by step and then generate the list of json dictionaries with the following
|
|
format:
|
|
|
|
{{"sub_questions": [{{"sub_question": <sub-question>,
|
|
"explanation": <explanation>,
|
|
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
|
|
...]}} """
|
|
|
|
DOC_INFORMED_DEEP_DECOMPOSE_PROMPT = """ \n
|
|
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
|
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
|
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
|
were not directly answerable. Also, some entities, relationships and terms are given to you so that
|
|
you have an idea of how the avaiolable data looks like.
|
|
|
|
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
|
considering:
|
|
|
|
1) The initial question
|
|
2) The initial answer that was found to be unsatisfactory
|
|
3) The sub-questions that were answered
|
|
4) The sub-questions that were suggested but not answered
|
|
5) A sample of the TYPE of documents that may be in the database in order to inform
|
|
you what type of entities, relationships, and terms you may want to consider asking about.
|
|
(But do not build the questions strictly on these documents! They are only examples!
|
|
Take the, as illustrations.)
|
|
|
|
The individual questions should be answerable by a good RAG system.
|
|
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
|
question for different entities that may be involved in the original question, but in a way that does
|
|
not duplicate questions that were already tried.
|
|
|
|
Additional Guidelines:
|
|
- The sub-questions should be specific to the question and provide richer context for the question,
|
|
resolve ambiguities, or address shortcoming of the initial answer
|
|
- Each sub-question - when answered - should be relevant for the answer to the original question
|
|
- The sub-questions should be free from comparisions, ambiguities,judgements, aggregations, or any
|
|
other complications that may require extra context.
|
|
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
|
a RAG system independently without the original question available
|
|
(Example:
|
|
- initial question: "What is the capital of France?"
|
|
- bad sub-question: "What is the name of the river there?"
|
|
- good sub-question: "What is the name of the river that flows through Paris?"
|
|
- For each sub-question, please also provide a search term that can be used to retrieve relevant
|
|
documents from a document store.
|
|
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not
|
|
answerable with the available context, and you should not ask similar questions.
|
|
\n\n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
{history}
|
|
|
|
Here is the initial sub-optimal answer:
|
|
\n ------- \n
|
|
{base_answer}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were answered:
|
|
\n ------- \n
|
|
{answered_sub_questions}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were suggested but not answered:
|
|
\n ------- \n
|
|
{failed_sub_questions}
|
|
\n ------- \n
|
|
|
|
And here some reference documents that show you what type of entities, relationships,
|
|
and terms you may want to consider to be as relevant to your initial question.
|
|
\n ------- \n
|
|
{docs_str}
|
|
\n ------- \n
|
|
|
|
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
|
main question.
|
|
|
|
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of
|
|
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not
|
|
mentioned in the 'entities, relationships and terms' section.
|
|
|
|
Again, please find questions that are NOT overlapping too much with the already answered
|
|
sub-questions or those that already were suggested and failed.
|
|
In other words - what can we try in addition to what has been tried so far?
|
|
|
|
Generate the list of questions separated by one new line like this:
|
|
<sub-question 1>
|
|
<sub-question 2>
|
|
<sub-question 3>
|
|
...
|
|
"""
|
|
|
|
DEEP_DECOMPOSE_PROMPT_WITH_ENTITIES = """ \n
|
|
An initial user question needs to be answered. An initial answer has been provided but it wasn't quite
|
|
good enough. Also, some sub-questions had been answered and this information has been used to provide
|
|
the initial answer. Some other subquestions may have been suggested based on little knowledge, but they
|
|
were not directly answerable. Also, some entities, relationships and terms are given to you so that
|
|
you have an idea of how the available data looks like.
|
|
|
|
Your role is to generate 2-4 new sub-questions that would help to answer the initial question,
|
|
considering:
|
|
|
|
1) The initial question
|
|
2) The initial answer that was found to be unsatisfactory
|
|
3) The sub-questions that were answered
|
|
4) The sub-questions that were suggested but not answered
|
|
5) The entities, relationships and terms that were extracted from the context
|
|
|
|
The individual questions should be answerable by a good RAG system.
|
|
So a good idea would be to use the sub-questions to resolve ambiguities and/or to separate the
|
|
question for different entities that may be involved in the original question, but in a way that does
|
|
not duplicate questions that were already tried.
|
|
|
|
Additional Guidelines:
|
|
- The sub-questions should be specific to the question and provide richer context for the question,
|
|
resolve ambiguities, or address shortcoming of the initial answer
|
|
- Each sub-question - when answered - should be relevant for the answer to the original question
|
|
- The sub-questions should be free from comparisons, ambiguities,judgements, aggregations, or any
|
|
other complications that may require extra context.
|
|
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
|
a RAG system independently without the original question available
|
|
(Example:
|
|
- initial question: "What is the capital of France?"
|
|
- bad sub-question: "What is the name of the river there?"
|
|
- good sub-question: "What is the name of the river that flows through Paris?"
|
|
- For each sub-question, please also provide a search term that can be used to retrieve relevant
|
|
documents from a document store.
|
|
- Consider specifically the sub-questions that were suggested but not answered. This is a sign that they are not
|
|
answerable with the available context, and you should not ask similar questions.
|
|
\n\n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
{history}
|
|
|
|
Here is the initial sub-optimal answer:
|
|
\n ------- \n
|
|
{base_answer}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were answered:
|
|
\n ------- \n
|
|
{answered_sub_questions}
|
|
\n ------- \n
|
|
|
|
Here are the sub-questions that were suggested but not answered:
|
|
\n ------- \n
|
|
{failed_sub_questions}
|
|
\n ------- \n
|
|
|
|
And here are the entities, relationships and terms extracted from the context:
|
|
\n ------- \n
|
|
{entity_term_extraction_str}
|
|
\n ------- \n
|
|
|
|
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
|
main question.
|
|
|
|
Specifically pay attention also to the entities, relationships and terms extracted, as these indicate what type of
|
|
objects/relationships/terms you can ask about! Do not ask about entities, terms or relationships that are not
|
|
mentioned in the 'entities, relationships and terms' section.
|
|
|
|
Again, please find questions that are NOT overlapping too much with the already answered
|
|
sub-questions or those that already were suggested and failed.
|
|
In other words - what can we try in addition to what has been tried so far?
|
|
|
|
Generate the list of questions separated by one new line like this:
|
|
<sub-question 1>
|
|
<sub-question 2>
|
|
<sub-question 3>
|
|
...
|
|
"""
|
|
|
|
DECOMPOSE_PROMPT = """ \n
|
|
For an initial user question, please generate at 5-10 individual sub-questions whose answers would help
|
|
\n to answer the initial question. The individual questions should be answerable by a good RAG system.
|
|
So a good idea would be to \n use the sub-questions to resolve ambiguities and/or to separate the
|
|
question for different entities that may be involved in the original question.
|
|
|
|
In order to arrive at meaningful sub-questions, please also consider the context retrieved from the
|
|
document store, expressed as entities, relationships and terms. You can also think about the types
|
|
mentioned in brackets
|
|
|
|
Guidelines:
|
|
- The sub-questions should be specific to the question and provide richer context for the question,
|
|
and or resolve ambiguities
|
|
- Each sub-question - when answered - should be relevant for the answer to the original question
|
|
- The sub-questions should be free from comparisons, ambiguities,judgements, aggregations, or any
|
|
other complications that may require extra context.
|
|
- The sub-questions MUST have the full context of the original question so that it can be executed by
|
|
a RAG system independently without the original question available
|
|
(Example:
|
|
- initial question: "What is the capital of France?"
|
|
- bad sub-question: "What is the name of the river there?"
|
|
- good sub-question: "What is the name of the river that flows through Paris?"
|
|
- For each sub-question, please provide a short explanation for why it is a good sub-question. So
|
|
generate a list of dictionaries with the following format:
|
|
[{{"sub_question": <sub-question>, "explanation": <explanation>}}, ...]
|
|
|
|
\n\n
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
|
|
And here are the entities, relationships and terms extracted from the context:
|
|
\n ------- \n
|
|
{entity_term_extraction_str}
|
|
\n ------- \n
|
|
|
|
Please generate the list of good, fully contextualized sub-questions that would help to address the
|
|
main question. Don't be too specific unless the original question is specific.
|
|
Please think through it step by step and then generate the list of json dictionaries with the following
|
|
format:
|
|
{{"sub_questions": [{{"sub_question": <sub-question>,
|
|
"explanation": <explanation>,
|
|
"search_term": <rewrite the sub-question using as a search phrase for the document store>}},
|
|
...]}} """
|
|
|
|
#### Consolidations
|
|
COMBINED_CONTEXT = """-------
|
|
Below you will find useful information to answer the original question. First, you see a number of
|
|
sub-questions with their answers. This information should be considered to be more focussed and
|
|
somewhat more specific to the original question as it tries to contextualized facts.
|
|
After that will see the documents that were considered to be relevant to answer the original question.
|
|
|
|
Here are the sub-questions and their answers:
|
|
\n\n {deep_answer_context} \n\n
|
|
\n\n Here are the documents that were considered to be relevant to answer the original question:
|
|
\n\n {formated_docs} \n\n
|
|
----------------
|
|
"""
|
|
|
|
SUB_QUESTION_EXPLANATION_RANKER_PROMPT = """-------
|
|
Below you will find a question that we ultimately want to answer (the original question) and a list of
|
|
motivations in arbitrary order for generated sub-questions that are supposed to help us answering the
|
|
original question. The motivations are formatted as <motivation number>: <motivation explanation>.
|
|
(Again, the numbering is arbitrary and does not necessarily mean that 1 is the most relevant
|
|
motivation and 2 is less relevant.)
|
|
|
|
Please rank the motivations in order of relevance for answering the original question. Also, try to
|
|
ensure that the top questions do not duplicate too much, i.e. that they are not too similar.
|
|
Ultimately, create a list with the motivation numbers where the number of the most relevant
|
|
motivation comes first.
|
|
|
|
Here is the original question:
|
|
\n\n {original_question} \n\n
|
|
\n\n Here is the list of sub-question motivations:
|
|
\n\n {sub_question_explanations} \n\n
|
|
----------------
|
|
|
|
Please think step by step and then generate the ranked list of motivations.
|
|
|
|
Please format your answer as a json object in the following format:
|
|
{{"reasonning": <explain your reasoning for the ranking>,
|
|
"ranked_motivations": <ranked list of motivation numbers>}}
|
|
"""
|
|
|
|
|
|
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS = """
|
|
If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
|
|
answer the original question. The purpose for this decomposition may be to
|
|
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
|
|
'what are sales for company B')]
|
|
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
|
|
'what is our market share with company A', 'is company A a reference customer for us', etc.])
|
|
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
|
|
familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
|
|
(i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
|
|
'what do we do to improve stability of product X', ...])
|
|
4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
|
|
|
|
If you think that a decomposition is not needed or helpful, please just return an empty string. That is ok too.
|
|
|
|
Here is the initial question:
|
|
-------
|
|
{question}
|
|
-------
|
|
{history}
|
|
|
|
Please formulate your answer as a newline-separated list of questions like so:
|
|
<sub-question>
|
|
<sub-question>
|
|
<sub-question>
|
|
|
|
Answer:"""
|
|
|
|
INITIAL_DECOMPOSITION_PROMPT_QUESTIONS_AFTER_SEARCH = """
|
|
If you think it is helpful, please decompose an initial user question into no more than 3 appropriate sub-questions that help to
|
|
answer the original question. The purpose for this decomposition may be to
|
|
1) isolate individual entities (i.e., 'compare sales of company A and company B' -> ['what are sales for company A',
|
|
'what are sales for company B')]
|
|
2) clarify or disambiguate ambiguous terms (i.e., 'what is our success with company A' -> ['what are our sales with company A',
|
|
'what is our market share with company A', 'is company A a reference customer for us', etc.])
|
|
3) if a term or a metric is essentially clear, but it could relate to various components of an entity and you are generally
|
|
familiar with the entity, then you can decompose the question into sub-questions that are more specific to components
|
|
(i.e., 'what do we do to improve scalability of product X', 'what do we to to improve scalability of product X',
|
|
'what do we do to improve stability of product X', ...])
|
|
4) research an area that could really help to answer the question. (But clarifications or disambiguations are more important.)
|
|
|
|
Here are some other rules:
|
|
|
|
1) To give you some context, you will see below also some documents that relate to the question. Please only
|
|
use this information to learn what the question is approximately asking about, but do not focus on the details
|
|
to construct the sub-questions! Also, some of the entities, relationships and terms that are in the dataset may
|
|
not be in these few documents, so DO NOT focussed too much on the documents when constructing the sub-questions! Decomposition and
|
|
disambiguations are most important!
|
|
2) If you think that a decomposition is not needed or helpful, please just return an empty string. That is very much ok too.
|
|
|
|
Here are the sample docs to give you some context:
|
|
-------
|
|
{sample_doc_str}
|
|
-------
|
|
|
|
And here is the initial question that you should think about decomposing:
|
|
-------
|
|
{question}
|
|
-------
|
|
|
|
{history}
|
|
|
|
Please formulate your answer as a newline-separated list of questions like so:
|
|
<sub-question>
|
|
<sub-question>
|
|
<sub-question>
|
|
...
|
|
|
|
Answer:"""
|
|
|
|
INITIAL_DECOMPOSITION_PROMPT = """ \n
|
|
Please decompose an initial user question into 2 or 3 appropriate sub-questions that help to
|
|
answer the original question. The purpose for this decomposition is to isolate individual entities
|
|
(i.e., 'compare sales of company A and company B' -> 'what are sales for company A' + 'what are sales
|
|
for company B'), split ambiguous terms (i.e., 'what is our success with company A' -> 'what are our
|
|
sales with company A' + 'what is our market share with company A' + 'is company A a reference customer
|
|
for us'), etc. Each sub-question should realistically be answerable by a good RAG system. \n
|
|
|
|
For each sub-question, please also create one search term that can be used to retrieve relevant
|
|
documents from a document store.
|
|
|
|
Here is the initial question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
|
|
Please formulate your answer as a list of json objects with the following format:
|
|
|
|
[{{"sub_question": <sub-question>, "search_term": <search term>}}, ...]
|
|
|
|
Answer:
|
|
"""
|
|
|
|
INITIAL_RAG_BASE_PROMPT = (
|
|
""" \n
|
|
You are an assistant for question-answering tasks. Use the information provided below - and only the
|
|
provided information - to answer the provided question.
|
|
|
|
The information provided below consists ofa number of documents that were deemed relevant for the question.
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
|
|
|
Try to keep your answer concise.
|
|
|
|
Here is the contextual information from the document store:
|
|
\n ------- \n
|
|
{context} \n\n\n
|
|
\n ------- \n
|
|
And here is the question I want you to answer based on the context above (with the motivation in mind):
|
|
\n--\n {question} \n--\n
|
|
Answer:"""
|
|
)
|
|
|
|
|
|
AGENT_DECISION_PROMPT = """
|
|
You are a large language model assistant helping users address their information needs. You are tasked with deciding
|
|
whether to use a thorough agent search ('research') of a document store to answer a question or request, or whether you want to
|
|
address the question or request yourself as an LLM.
|
|
|
|
Here are some rules:
|
|
- If you think that a thorough search through a document store will help answer the question
|
|
or address the request, you should choose the 'research' option.
|
|
- If the question asks you do something ('please create...', 'write for me...', etc.), you should choose the 'LLM' option.
|
|
- If you think the question is very general and does not refer to a contents of a document store, you should choose
|
|
the 'LLM' option.
|
|
- Otherwise, you should choose the 'research' option.
|
|
{history}
|
|
|
|
Here is the initial question:
|
|
-------
|
|
{question}
|
|
-------
|
|
|
|
Please decide whether to use the agent search or the LLM to answer the question. Choose from two choices,
|
|
'research' or 'LLM'.
|
|
|
|
Answer:"""
|
|
|
|
AGENT_DECISION_PROMPT_AFTER_SEARCH = """
|
|
You are a large language model assistant helping users address their information needs. You are given an initial question
|
|
or request and very few sample of documents that a preliminary and fast search from a document store returned.
|
|
You are tasked with deciding whether to use a thorough agent search ('research') of the document store to answer a question
|
|
or request, or whether you want to address the question or request yourself as an LLM.
|
|
|
|
Here are some rules:
|
|
- If based on the retrieved documents you think there may be useful information in the document
|
|
store to answer or materially help with the request, you should choose the 'research' option.
|
|
- If you think that the retrieved document do not help to answer the question or do not help with the request, AND
|
|
you know the answer/can handle the request, you should choose the 'LLM' option.
|
|
- If the question asks you do something ('please create...', 'write for me...', etc.), you should choose the 'LLM' option.
|
|
- If in doubt, choose the 'research' option.
|
|
{history}
|
|
|
|
Here is the initial question:
|
|
-------
|
|
{question}
|
|
-------
|
|
|
|
Here is the sample of documents that were retrieved from a document store:
|
|
-------
|
|
{sample_doc_str}
|
|
-------
|
|
|
|
Please decide whether to use the agent search ('research') or the LLM to answer the question. Choose from two choices,
|
|
'research' or 'LLM'.
|
|
|
|
Answer:"""
|
|
|
|
### ANSWER GENERATION PROMPTS
|
|
|
|
# Persona specification
|
|
ASSISTANT_SYSTEM_PROMPT_DEFAULT = """
|
|
You are an assistant for question-answering tasks."""
|
|
|
|
ASSISTANT_SYSTEM_PROMPT_PERSONA = """
|
|
You are an assistant for question-answering tasks. Here is more information about you:
|
|
\n ------- \n
|
|
{persona_prompt}
|
|
\n ------- \n
|
|
"""
|
|
|
|
SUB_QUESTION_ANSWER_TEMPLATE = """
|
|
Sub-Question: Q{sub_question_nr}\n Sub-Question:\n - \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n
|
|
"""
|
|
|
|
SUB_QUESTION_ANSWER_TEMPLATE_REVISED = """
|
|
Sub-Question: Q{sub_question_nr}\n Type: {level_type}\n Sub-Question:\n
|
|
- \n{sub_question}\n --\nAnswer:\n -\n {sub_answer}\n\n
|
|
"""
|
|
|
|
SUB_QUESTION_SEARCH_RESULTS_TEMPLATE = """
|
|
Sub-Question: Q{sub_question_nr}\n Sub-Question:\n - \n{sub_question}\n --\nRelevant Documents:\n
|
|
-\n {formatted_sub_question_docs}\n\n
|
|
"""
|
|
|
|
INITIAL_RAG_PROMPT_SUB_QUESTION_SEARCH = (
|
|
""" \n
|
|
{persona_specification}
|
|
{date_prompt}
|
|
Use the information provided below - and only the provided information - to answer the main question that will be provided.
|
|
|
|
The information provided below consists of:
|
|
1) a number of sub-questions and supporting document information that would help answer them.
|
|
2) a broader collection of documents that were deemed relevant for the question. These documents contain information
|
|
that was also provided in the sub-questions and often more.
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
|
- The answers to the subquestions should help you to structure your thoughts in order to answer the question.
|
|
|
|
It is critical that you provide proper inline citations of documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
|
|
It is important that the citation is close to the information it supports. If you have multiple citations,
|
|
please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Feel free to cite documents in addition
|
|
to the sub-questions! Proper citations are important for the final answer to be verifiable! \n\n\n
|
|
|
|
Again, you should be sure that the answer is supported by the information provided!
|
|
|
|
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
|
|
or assumptions you made.
|
|
|
|
Here is the contextual information:
|
|
\n-------\n
|
|
*Answered Sub-questions (these should really help to organize your thoughts):
|
|
{answered_sub_questions}
|
|
|
|
And here are relevant document information that supports the sub-question answers, or that are relevant for the actual question:\n
|
|
|
|
{relevant_docs}
|
|
|
|
\n-------\n
|
|
\n
|
|
And here is the main question I want you to answer based on the information above:
|
|
\n--\n
|
|
{question}
|
|
\n--\n\n
|
|
Answer:"""
|
|
)
|
|
|
|
|
|
DIRECT_LLM_PROMPT = """ \n
|
|
{persona_specification}
|
|
|
|
Please answer the following question/address the request:
|
|
\n--\n
|
|
{question}
|
|
\n--\n\n
|
|
Answer:"""
|
|
|
|
INITIAL_RAG_PROMPT = (
|
|
""" \n
|
|
{persona_specification}
|
|
{date_prompt}
|
|
Use the information provided below - and only the provided information - to answer the provided main question.
|
|
|
|
The information provided below consists of:
|
|
1) a number of answered sub-questions - these are very important to help you organize your thoughts and your
|
|
answer
|
|
2) a number of documents that deemed relevant for the question.
|
|
|
|
{history}
|
|
|
|
It is critical that you provide prover inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc.!
|
|
It is important that the citation is close to the information it supports. If you have multiple citations that support
|
|
a fact, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
|
|
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
|
|
citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
|
|
Again, please NEVER cite sub-questions without a document citation!
|
|
Proper citations are very important for the user!
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is empty or irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
|
|
|
Again, you should be sure that the answer is supported by the information provided!
|
|
|
|
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
|
|
or assumptions you made.
|
|
|
|
Here is the contextual information:
|
|
\n-------\n
|
|
*Answered Sub-questions (these should really matter!):
|
|
{answered_sub_questions}
|
|
|
|
And here are relevant document information that support the sub-question answers, or that are relevant for the actual question:\n
|
|
|
|
{relevant_docs}
|
|
|
|
\n-------\n
|
|
\n
|
|
And here is the question I want you to answer based on the information above:
|
|
\n--\n
|
|
{question}
|
|
\n--\n\n
|
|
|
|
Please keep your answer brief and concise, and focus on facts and data.
|
|
|
|
Answer:"""
|
|
)
|
|
|
|
# sub_question_answer_str is empty
|
|
INITIAL_RAG_PROMPT_NO_SUB_QUESTIONS = (
|
|
"""{answered_sub_questions}
|
|
{persona_specification}
|
|
{date_prompt}
|
|
|
|
Use the information provided below - and only the provided information - to answer the provided question.
|
|
The information provided below consists of a number of documents that were deemed relevant for the question.
|
|
{history}
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, specify that the information is not conclusive and say why.
|
|
|
|
Again, you should be sure that the answer is supported by the information provided!
|
|
|
|
It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
|
|
It is important that the citation is close to the information it supports. If you have multiple
|
|
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the
|
|
user!
|
|
|
|
Try to keep your answer concise.
|
|
|
|
Here are is the relevant context information:
|
|
\n-------\n
|
|
{relevant_docs}
|
|
\n-------\n
|
|
|
|
And here is the question I want you to answer based on the context above
|
|
\n--\n
|
|
{question}
|
|
\n--\n
|
|
|
|
Please keep your answer brief and concise, and focus on facts and data.
|
|
|
|
Answer:"""
|
|
)
|
|
|
|
REVISED_RAG_PROMPT = (
|
|
"""\n
|
|
{persona_specification}
|
|
{date_prompt}
|
|
Your task is to improve on a given answer to a question, as the initial answer was found to be lacking in some way.
|
|
|
|
Use the information provided below - and only the provided information - to write your new and improved answer.
|
|
|
|
The information provided below consists of:
|
|
1) an initial answer that was given but found to be lacking in some way.
|
|
|
|
2) a number of answered sub-questions - these are very important(!) and definitely should help you to answer
|
|
the main question. Note that the sub-questions have a type, 'initial' and 'revised'. The 'initial'
|
|
ones were available for the initial answer, and the 'revised' were not, they are new. So please use
|
|
the 'revised' sub-questions in particular to update/extend/correct the initial answer!
|
|
|
|
3) a number of documents that were deemed relevant for the question. This the is the context that you use largely for
|
|
citations (see below). So consider the answers to the sub-questions as guidelines to construct your new answer, but
|
|
make sure you cite the relevant document for a fact!
|
|
|
|
It is critical that you provide proper inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
|
|
It is important that the citation is close to the information it supports. If you have multiple
|
|
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc.
|
|
Feel free to also cite sub-questions in addition to documents, but make sure that you have documents cited with the sub-question
|
|
citation. If you want to cite both a document and a sub-question, please use [[D1]]()[[Q3]](), or [[D2]]()[[D7]]()[[Q4]](), etc.
|
|
Again, please NEVER cite sub-questions without a document citation!
|
|
Proper citations are very important for the user!\n\n
|
|
|
|
{history}
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is empty or irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, provide an answer to the extent you can but also
|
|
specify that the information is not conclusive and why.
|
|
- Ignore any existing citations within the answered sub-questions, like [[D1]]()... and [[Q2]]()!
|
|
The citations you will need to use will need to refer to the documents (and sub-questions) that you are explicitly
|
|
presented with below!
|
|
|
|
Again, you should be sure that the answer is supported by the information provided!
|
|
|
|
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
|
|
or assumptions you made.
|
|
|
|
Here is the contextual information:
|
|
\n-------\n
|
|
|
|
*Initial Answer that was found to be lacking:
|
|
{initial_answer}
|
|
|
|
*Answered Sub-questions (these should really help you to research your answer! They also contain questions/answers
|
|
that were not available when the original answer was constructed):
|
|
{answered_sub_questions}
|
|
|
|
And here are the relevant documents that support the sub-question answers, and that are relevant for the actual question:\n
|
|
|
|
{relevant_docs}
|
|
|
|
\n-------\n
|
|
\n
|
|
Lastly, here is the main question I want you to answer based on the information above:
|
|
\n--\n
|
|
{question}
|
|
\n--\n\n
|
|
|
|
Please keep your answer brief and concise, and focus on facts and data.
|
|
|
|
Answer:"""
|
|
)
|
|
|
|
# sub_question_answer_str is empty
|
|
REVISED_RAG_PROMPT_NO_SUB_QUESTIONS = (
|
|
"""{answered_sub_questions}\n
|
|
{persona_specification}
|
|
{date_prompt}
|
|
Use the information provided below - and only the
|
|
provided information - to answer the provided question.
|
|
|
|
The information provided below consists of:
|
|
1) an initial answer that was given but found to be lacking in some way.
|
|
2) a number of documents that were also deemed relevant for the question.
|
|
|
|
It is critical that you provide proper] inline citations to documents in the format [[D1]](), [[D2]](), [[D3]](), etc!
|
|
It is important that the citation is close to the information it supports. If you have multiple
|
|
citations, please cite for example as [[D1]]()[[D3]](), or [[D2]]()[[D4]](), etc. Citations are very important for the user!\n\n
|
|
|
|
{history}
|
|
|
|
IMPORTANT RULES:
|
|
- If you cannot reliably answer the question solely using the provided information, say that you cannot reliably answer.
|
|
You may give some additional facts you learned, but do not try to invent an answer.
|
|
- If the information is empty or irrelevant, just say """
|
|
+ f'"{UNKNOWN_ANSWER}"'
|
|
+ """.
|
|
- If the information is relevant but not fully conclusive, provide and answer to the extent you can but also
|
|
specify that the information is not conclusive and why.
|
|
|
|
Again, you should be sure that the answer is supported by the information provided!
|
|
|
|
Try to keep your answer concise. But also highlight uncertainties you may have should there be substantial ones,
|
|
or assumptions you made.
|
|
|
|
Here is the contextual information:
|
|
\n-------\n
|
|
|
|
*Initial Answer that was found to be lacking:
|
|
{initial_answer}
|
|
|
|
And here are relevant document information that support the sub-question answers, or that are relevant for the actual question:\n
|
|
|
|
{relevant_docs}
|
|
|
|
\n-------\n
|
|
\n
|
|
Lastly, here is the question I want you to answer based on the information above:
|
|
\n--\n
|
|
{question}
|
|
\n--\n\n
|
|
Please keep your answer brief and concise, and focus on facts and data.
|
|
|
|
Answer:"""
|
|
)
|
|
|
|
|
|
ENTITY_TERM_PROMPT = """ \n
|
|
Based on the original question and some context retrieved from a dataset, please generate a list of
|
|
entities (e.g. companies, organizations, industries, products, locations, etc.), terms and concepts
|
|
(e.g. sales, revenue, etc.) that are relevant for the question, plus their relations to each other.
|
|
|
|
\n\n
|
|
Here is the original question:
|
|
\n ------- \n
|
|
{question}
|
|
\n ------- \n
|
|
And here is the context retrieved:
|
|
\n ------- \n
|
|
{context}
|
|
\n ------- \n
|
|
|
|
Please format your answer as a json object in the following format:
|
|
|
|
{{"retrieved_entities_relationships": {{
|
|
"entities": [{{
|
|
"entity_name": <assign a name for the entity>,
|
|
"entity_type": <specify a short type name for the entity, such as 'company', 'location',...>
|
|
}}],
|
|
"relationships": [{{
|
|
"relationship_name": <assign a name for the relationship>,
|
|
"relationship_type": <specify a short type name for the relationship, such as 'sales_to', 'is_location_of',...>,
|
|
"relationship_entities": [<related entity name 1>, <related entity name 2>, ...]
|
|
}}],
|
|
"terms": [{{
|
|
"term_name": <assign a name for the term>,
|
|
"term_type": <specify a short type name for the term, such as 'revenue', 'market_share',...>,
|
|
"term_similar_to": <list terms that are similar to this term>
|
|
}}]
|
|
}}
|
|
}}
|
|
|
|
"""
|
|
ANSWER_COMPARISON_PROMPT = """
|
|
For the given question, please compare the initial answer and the refined answer and determine if
|
|
the refined answer is substantially better than the initial answer. Better could mean:
|
|
- additional information
|
|
- more comprehensive information
|
|
- more concise information
|
|
- more structured information
|
|
- new bullet points
|
|
- substantially more document citations ([[D1]](), [[D2]](), [[D3]](), etc.)
|
|
|
|
Put yourself in the shoes of the user and think about whether the refined answer is really substantially
|
|
better than the initial answer.
|
|
|
|
Here is the question:
|
|
--
|
|
{question}
|
|
--
|
|
|
|
Here is the initial answer:
|
|
--
|
|
{initial_answer}
|
|
--
|
|
|
|
Here is the refined answer:
|
|
--
|
|
{refined_answer}
|
|
--
|
|
|
|
With these criteria in mind, is the refined answer substantially better than the initial answer?
|
|
|
|
Please answer with a simple 'yes' or 'no'.
|
|
"""
|
|
HISTORY_CONTEXT_SUMMARY_PROMPT = """\n
|
|
{persona_specification}
|
|
You need to summarize the key parts of the history of a conversation between a user and an agent
|
|
strictly for the purposed of providing the suitable context for a question.
|
|
|
|
Here is the question:
|
|
\n--\n
|
|
{question}
|
|
\n--\n
|
|
|
|
And here is the history:
|
|
\n--\n
|
|
{history}
|
|
\n--\n
|
|
|
|
Please provide a summarized context from the history so that the question makes sense and can - with
|
|
suitable extra information - be answered.
|
|
|
|
Please do not use more than three or four sentences.
|
|
|
|
Context summary:
|
|
"""
|