From bc403d97f2c4c0d99dc005bcc48ae27775e72924 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Sun, 3 Dec 2023 13:27:11 -0800 Subject: [PATCH] Organize Prompts for Chat implementation (#807) --- backend/danswer/prompts/answer_validation.py | 35 ++++ backend/danswer/prompts/direct_qa_prompts.py | 2 + backend/danswer/prompts/filter_extration.py | 66 +++++++ backend/danswer/prompts/llm_chunk_filter.py | 30 +++ .../danswer/prompts/miscellaneous_prompts.py | 17 ++ backend/danswer/prompts/query_validation.py | 58 ++++++ .../danswer/prompts/secondary_llm_flows.py | 172 ------------------ .../secondary_llm_flows/answer_validation.py | 2 +- .../secondary_llm_flows/chunk_usefulness.py | 4 +- .../secondary_llm_flows/query_expansion.py | 2 +- .../secondary_llm_flows/query_validation.py | 2 +- .../secondary_llm_flows/source_filter.py | 6 +- .../secondary_llm_flows/time_filter.py | 2 +- 13 files changed, 217 insertions(+), 181 deletions(-) create mode 100644 backend/danswer/prompts/answer_validation.py create mode 100644 backend/danswer/prompts/filter_extration.py create mode 100644 backend/danswer/prompts/llm_chunk_filter.py create mode 100644 backend/danswer/prompts/miscellaneous_prompts.py create mode 100644 backend/danswer/prompts/query_validation.py delete mode 100644 backend/danswer/prompts/secondary_llm_flows.py diff --git a/backend/danswer/prompts/answer_validation.py b/backend/danswer/prompts/answer_validation.py new file mode 100644 index 0000000000..1a29a48ce5 --- /dev/null +++ b/backend/danswer/prompts/answer_validation.py @@ -0,0 +1,35 @@ +# The following prompts are used for verifying the LLM answer after it is already produced. +# Reflexion flow essentially. This feature can be toggled on/off +from danswer.prompts.constants import ANSWER_PAT +from danswer.prompts.constants import QUESTION_PAT + + +ANSWER_VALIDITY_PROMPT = f""" +You are an assistant to identify invalid query/answer pairs coming from a large language model. +The query/answer pair is invalid if any of the following are True: +1. Query is asking for information that varies by person or is subjective. If there is not a \ +globally true answer, the language model should not respond, therefore any answer is invalid. +2. Answer addresses a related but different query. To be helpful, the model may provide provide \ +related information about a query but it won't match what the user is asking, this is invalid. +3. Answer is just some form of "I don\'t know" or "not enough information" without significant \ +additional useful information. Explaining why it does not know or cannot answer is invalid. + +{QUESTION_PAT} {{user_query}} +{ANSWER_PAT} {{llm_answer}} + +------------------------ +You MUST answer in EXACTLY the following format: +``` +1. True or False +2. True or False +3. True or False +Final Answer: Valid or Invalid +``` + +Hint: Remember, if ANY of the conditions are True, it is Invalid. +""".strip() + + +# Use the following for easy viewing of prompts +if __name__ == "__main__": + print(ANSWER_VALIDITY_PROMPT) diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index f632af3a8a..474cdfbf75 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -1,3 +1,5 @@ +# The following prompts are used for the initial response before a chat history exists +# It is used also for the one shot direct QA flow import json from danswer.prompts.constants import ANSWER_PAT diff --git a/backend/danswer/prompts/filter_extration.py b/backend/danswer/prompts/filter_extration.py new file mode 100644 index 0000000000..ce3596a07f --- /dev/null +++ b/backend/danswer/prompts/filter_extration.py @@ -0,0 +1,66 @@ +# The following prompts are used for extracting filters to apply along with the query in the +# document index. For example, a filter for dates or a filter by source type such as GitHub +# or Slack +from danswer.prompts.constants import SOURCES_KEY + + +# Smaller followup prompts in time_filter.py +TIME_FILTER_PROMPT = """ +You are a tool to identify time filters to apply to a user query for a downstream search \ +application. The downstream application is able to use a recency bias or apply a hard cutoff to \ +remove all documents before the cutoff. Identify the correct filters to apply for the user query. + +The current day and time is {current_day_time_str}. + +Always answer with ONLY a json which contains the keys "filter_type", "filter_value", \ +"value_multiple" and "date". + +The valid values for "filter_type" are "hard cutoff", "favors recent", or "not time sensitive". +The valid values for "filter_value" are "day", "week", "month", "quarter", "half", or "year". +The valid values for "value_multiple" is any number. +The valid values for "date" is a date in format MM/DD/YYYY, ALWAYS follow this format. +""".strip() + + +# Smaller followup prompts in source_filter.py +# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains "web" but not +# "confluence" and the user asks for confluence related things, the LLM will select "web" since +# confluence is accessed as a website. This cannot be fixed without also reducing the capability +# to match things like repository->github, website->web, etc. +# This is generally not a big issue though as if the company has confluence, hopefully they add +# a connector for it or the user is aware that confluence has not been added. +SOURCE_FILTER_PROMPT = f""" +Given a user query, extract relevant source filters for use in a downstream search tool. +Respond with a json containing the source filters or null if no specific sources are referenced. +ONLY extract sources when the user is explicitly limiting the scope of where information is \ +coming from. +The user may provide invalid source filters, ignore those. + +The valid sources are: +{{valid_sources}} +{{web_source_warning}} +{{file_source_warning}} + + +ALWAYS answer with ONLY a json with the key "{SOURCES_KEY}". \ +The value for "{SOURCES_KEY}" must be null or a list of valid sources. + +Sample Response: +{{sample_response}} +""".strip() + +WEB_SOURCE_WARNING = """ +Note: The "web" source only applies to when the user specifies "website" in the query. \ +It does not apply to tools such as Confluence, GitHub, etc. which have a website. +""".strip() + +FILE_SOURCE_WARNING = """ +Note: The "file" source only applies to when the user refers to uploaded files in the query. +""".strip() + + +# Use the following for easy viewing of prompts +if __name__ == "__main__": + print(TIME_FILTER_PROMPT) + print("------------------") + print(SOURCE_FILTER_PROMPT) diff --git a/backend/danswer/prompts/llm_chunk_filter.py b/backend/danswer/prompts/llm_chunk_filter.py new file mode 100644 index 0000000000..623ae58770 --- /dev/null +++ b/backend/danswer/prompts/llm_chunk_filter.py @@ -0,0 +1,30 @@ +# The following prompts are used to pass each chunk to the LLM (the cheap/fast one) +# to determine if the chunk is useful towards the user query. This is used as part +# of the reranking flow + +USEFUL_PAT = "Yes useful" +NONUSEFUL_PAT = "Not useful" +CHUNK_FILTER_PROMPT = f""" +Determine if the reference section is USEFUL for answering the user query. +It is NOT enough for the section to be related to the query, \ +it must contain information that is USEFUL for answering the query. +If the section contains ANY useful information, that is good enough, \ +it does not need to fully answer the every part of the user query. + +Reference Section: +``` +{{chunk_text}} +``` + +User Query: +``` +{{user_query}} +``` + +Respond with EXACTLY AND ONLY: "{USEFUL_PAT}" or "{NONUSEFUL_PAT}" +""".strip() + + +# Use the following for easy viewing of prompts +if __name__ == "__main__": + print(CHUNK_FILTER_PROMPT) diff --git a/backend/danswer/prompts/miscellaneous_prompts.py b/backend/danswer/prompts/miscellaneous_prompts.py new file mode 100644 index 0000000000..c58cbb48c2 --- /dev/null +++ b/backend/danswer/prompts/miscellaneous_prompts.py @@ -0,0 +1,17 @@ +# Prompts that aren't part of a particular configurable feature + +LANGUAGE_REPHRASE_PROMPT = """ +Rephrase the query in {target_language}. +If the query is already in the correct language, \ +simply repeat the ORIGINAL query back to me, EXACTLY as is with no rephrasing. +NEVER change proper nouns, technical terms, acronyms, or terms you are not familiar with. +IMPORTANT, if the query is already in the target language, DO NOT REPHRASE OR EDIT the query! + +Query: +{query} +""".strip() + + +# Use the following for easy viewing of prompts +if __name__ == "__main__": + print(LANGUAGE_REPHRASE_PROMPT) diff --git a/backend/danswer/prompts/query_validation.py b/backend/danswer/prompts/query_validation.py new file mode 100644 index 0000000000..6816322992 --- /dev/null +++ b/backend/danswer/prompts/query_validation.py @@ -0,0 +1,58 @@ +# The following prompts are used for verifying if the user's query can be answered by the current +# system. Many new users do not understand the design/capabilities of the system and will ask +# questions that are unanswerable such as aggregations or user specific questions that the system +# cannot handle, this is used to identify those cases +from danswer.prompts.constants import ANSWERABLE_PAT +from danswer.prompts.constants import GENERAL_SEP_PAT +from danswer.prompts.constants import QUESTION_PAT +from danswer.prompts.constants import THOUGHT_PAT + + +ANSWERABLE_PROMPT = f""" +You are a helper tool to determine if a query is answerable using retrieval augmented generation. +The main system will try to answer the user query based on ONLY the top 5 most relevant \ +documents found from search. +Sources contain both up to date and proprietary information for the specific team. +For named or unknown entities, assume the search will find relevant and consistent knowledge \ +about the entity. +The system is not tuned for writing code. +The system is not tuned for interfacing with structured data via query languages like SQL. +If the question might not require code or query language, then assume it can be answered without \ +code or query language. +Determine if that system should attempt to answer. +"ANSWERABLE" must be exactly "True" or "False" + +{GENERAL_SEP_PAT} + +{QUESTION_PAT.upper()} What is this Slack channel about? +``` +{THOUGHT_PAT.upper()} First the system must determine which Slack channel is being referred to. \ +By fetching 5 documents related to Slack channel contents, it is not possible to determine which \ +Slack channel the user is referring to. +{ANSWERABLE_PAT.upper()} False +``` + +{QUESTION_PAT.upper()} Danswer is unreachable. +``` +{THOUGHT_PAT.upper()} The system searches documents related to Danswer being unreachable. \ +Assuming the documents from search contains situations where Danswer is not reachable and \ +contains a fix, the query may be answerable. +{ANSWERABLE_PAT.upper()} True +``` + +{QUESTION_PAT.upper()} How many customers do we have +``` +{THOUGHT_PAT.upper()} Assuming the retrieved documents contain up to date customer acquisition \ +information including a list of customers, the query can be answered. It is important to note \ +that if the information only exists in a SQL database, the system is unable to execute SQL and \ +won't find an answer. +{ANSWERABLE_PAT.upper()} True +``` + +{QUESTION_PAT.upper()} {{user_query}} +""".strip() + + +# Use the following for easy viewing of prompts +if __name__ == "__main__": + print(ANSWERABLE_PROMPT) diff --git a/backend/danswer/prompts/secondary_llm_flows.py b/backend/danswer/prompts/secondary_llm_flows.py deleted file mode 100644 index fecd985978..0000000000 --- a/backend/danswer/prompts/secondary_llm_flows.py +++ /dev/null @@ -1,172 +0,0 @@ -from danswer.prompts.constants import ANSWER_PAT -from danswer.prompts.constants import ANSWERABLE_PAT -from danswer.prompts.constants import GENERAL_SEP_PAT -from danswer.prompts.constants import QUESTION_PAT -from danswer.prompts.constants import SOURCES_KEY -from danswer.prompts.constants import THOUGHT_PAT - - -ANSWER_VALIDITY_PROMPT = f""" -You are an assistant to identify invalid query/answer pairs coming from a large language model. -The query/answer pair is invalid if any of the following are True: -1. Query is asking for information that varies by person or is subjective. If there is not a \ -globally true answer, the language model should not respond, therefore any answer is invalid. -2. Answer addresses a related but different query. To be helpful, the model may provide provide \ -related information about a query but it won't match what the user is asking, this is invalid. -3. Answer is just some form of "I don\'t know" or "not enough information" without significant \ -additional useful information. Explaining why it does not know or cannot answer is invalid. - -{QUESTION_PAT} {{user_query}} -{ANSWER_PAT} {{llm_answer}} - ------------------------- -You MUST answer in EXACTLY the following format: -``` -1. True or False -2. True or False -3. True or False -Final Answer: Valid or Invalid -``` - -Hint: Remember, if ANY of the conditions are True, it is Invalid. -""".strip() - - -ANSWERABLE_PROMPT = f""" -You are a helper tool to determine if a query is answerable using retrieval augmented generation. -The main system will try to answer the user query based on ONLY the top 5 most relevant \ -documents found from search. -Sources contain both up to date and proprietary information for the specific team. -For named or unknown entities, assume the search will find relevant and consistent knowledge \ -about the entity. -The system is not tuned for writing code. -The system is not tuned for interfacing with structured data via query languages like SQL. -If the question might not require code or query language, then assume it can be answered without \ -code or query language. -Determine if that system should attempt to answer. -"ANSWERABLE" must be exactly "True" or "False" - -{GENERAL_SEP_PAT} - -{QUESTION_PAT.upper()} What is this Slack channel about? -``` -{THOUGHT_PAT.upper()} First the system must determine which Slack channel is being referred to. \ -By fetching 5 documents related to Slack channel contents, it is not possible to determine which \ -Slack channel the user is referring to. -{ANSWERABLE_PAT.upper()} False -``` - -{QUESTION_PAT.upper()} Danswer is unreachable. -``` -{THOUGHT_PAT.upper()} The system searches documents related to Danswer being unreachable. \ -Assuming the documents from search contains situations where Danswer is not reachable and \ -contains a fix, the query may be answerable. -{ANSWERABLE_PAT.upper()} True -``` - -{QUESTION_PAT.upper()} How many customers do we have -``` -{THOUGHT_PAT.upper()} Assuming the retrieved documents contain up to date customer acquisition \ -information including a list of customers, the query can be answered. It is important to note \ -that if the information only exists in a SQL database, the system is unable to execute SQL and \ -won't find an answer. -{ANSWERABLE_PAT.upper()} True -``` - -{QUESTION_PAT.upper()} {{user_query}} -""".strip() - - -# Smaller followup prompts in time_filter.py -TIME_FILTER_PROMPT = """ -You are a tool to identify time filters to apply to a user query for a downstream search \ -application. The downstream application is able to use a recency bias or apply a hard cutoff to \ -remove all documents before the cutoff. Identify the correct filters to apply for the user query. - -The current day and time is {current_day_time_str}. - -Always answer with ONLY a json which contains the keys "filter_type", "filter_value", \ -"value_multiple" and "date". - -The valid values for "filter_type" are "hard cutoff", "favors recent", or "not time sensitive". -The valid values for "filter_value" are "day", "week", "month", "quarter", "half", or "year". -The valid values for "value_multiple" is any number. -The valid values for "date" is a date in format MM/DD/YYYY, ALWAYS follow this format. -""".strip() - - -# Smaller followup prompts in source_filter.py -# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains "web" but not -# "confluence" and the user asks for confluence related things, the LLM will select "web" since -# confluence is accessed as a website. This cannot be fixed without also reducing the capability -# to match things like repository->github, website->web, etc. -# This is generally not a big issue though as if the company has confluence, hopefully they add -# a connector for it or the user is aware that confluence has not been added. -SOURCE_FILTER_PROMPT = f""" -Given a user query, extract relevant source filters for use in a downstream search tool. -Respond with a json containing the source filters or null if no specific sources are referenced. -ONLY extract sources when the user is explicitly limiting the scope of where information is \ -coming from. -The user may provide invalid source filters, ignore those. - -The valid sources are: -{{valid_sources}} -{{web_source_warning}} -{{file_source_warning}} - - -ALWAYS answer with ONLY a json with the key "{SOURCES_KEY}". \ -The value for "{SOURCES_KEY}" must be null or a list of valid sources. - -Sample Response: -{{sample_response}} -""".strip() - -WEB_SOURCE_WARNING = """ -Note: The "web" source only applies to when the user specifies "website" in the query. \ -It does not apply to tools such as Confluence, GitHub, etc. which have a website. -""".strip() - -FILE_SOURCE_WARNING = """ -Note: The "file" source only applies to when the user refers to uploaded files in the query. -""".strip() - - -USEFUL_PAT = "Yes useful" -NONUSEFUL_PAT = "Not useful" -CHUNK_FILTER_PROMPT = f""" -Determine if the reference section is USEFUL for answering the user query. -It is NOT enough for the section to be related to the query, \ -it must contain information that is USEFUL for answering the query. -If the section contains ANY useful information, that is good enough, \ -it does not need to fully answer the every part of the user query. - -Reference Section: -``` -{{chunk_text}} -``` - -User Query: -``` -{{user_query}} -``` - -Respond with EXACTLY AND ONLY: "{USEFUL_PAT}" or "{NONUSEFUL_PAT}" -""".strip() - - -LANGUAGE_REPHRASE_PROMPT = """ -Rephrase the query in {target_language}. -If the query is already in the correct language, \ -simply repeat the ORIGINAL query back to me, EXACTLY as is with no rephrasing. -NEVER change proper nouns, technical terms, acronyms, or terms you are not familiar with. -IMPORTANT, if the query is already in the target language, DO NOT REPHRASE OR EDIT the query! - -Query: -{query} -""".strip() - - -# User the following for easy viewing of prompts -if __name__ == "__main__": - print(ANSWERABLE_PROMPT) diff --git a/backend/danswer/secondary_llm_flows/answer_validation.py b/backend/danswer/secondary_llm_flows/answer_validation.py index 4ef8e8bef3..26b0e096fd 100644 --- a/backend/danswer/secondary_llm_flows/answer_validation.py +++ b/backend/danswer/secondary_llm_flows/answer_validation.py @@ -1,6 +1,6 @@ from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt -from danswer.prompts.secondary_llm_flows import ANSWER_VALIDITY_PROMPT +from danswer.prompts.answer_validation import ANSWER_VALIDITY_PROMPT from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time diff --git a/backend/danswer/secondary_llm_flows/chunk_usefulness.py b/backend/danswer/secondary_llm_flows/chunk_usefulness.py index 636401912c..b977947bf4 100644 --- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py +++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py @@ -2,8 +2,8 @@ from collections.abc import Callable from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt -from danswer.prompts.secondary_llm_flows import CHUNK_FILTER_PROMPT -from danswer.prompts.secondary_llm_flows import NONUSEFUL_PAT +from danswer.prompts.llm_chunk_filter import CHUNK_FILTER_PROMPT +from danswer.prompts.llm_chunk_filter import NONUSEFUL_PAT from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel diff --git a/backend/danswer/secondary_llm_flows/query_expansion.py b/backend/danswer/secondary_llm_flows/query_expansion.py index 874ca2131a..c80dbcbfc3 100644 --- a/backend/danswer/secondary_llm_flows/query_expansion.py +++ b/backend/danswer/secondary_llm_flows/query_expansion.py @@ -2,7 +2,7 @@ from collections.abc import Callable from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt -from danswer.prompts.secondary_llm_flows import LANGUAGE_REPHRASE_PROMPT +from danswer.prompts.miscellaneous_prompts import LANGUAGE_REPHRASE_PROMPT from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py index 7d2b752ad7..a345127247 100644 --- a/backend/danswer/secondary_llm_flows/query_validation.py +++ b/backend/danswer/secondary_llm_flows/query_validation.py @@ -7,7 +7,7 @@ from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.prompts.constants import ANSWERABLE_PAT from danswer.prompts.constants import THOUGHT_PAT -from danswer.prompts.secondary_llm_flows import ANSWERABLE_PROMPT +from danswer.prompts.query_validation import ANSWERABLE_PROMPT from danswer.server.chat.models import QueryValidationResponse from danswer.server.utils import get_json_line from danswer.utils.logger import setup_logger diff --git a/backend/danswer/secondary_llm_flows/source_filter.py b/backend/danswer/secondary_llm_flows/source_filter.py index 7327eac04a..3560e072af 100644 --- a/backend/danswer/secondary_llm_flows/source_filter.py +++ b/backend/danswer/secondary_llm_flows/source_filter.py @@ -9,9 +9,9 @@ from danswer.db.engine import get_sqlalchemy_engine from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.prompts.constants import SOURCES_KEY -from danswer.prompts.secondary_llm_flows import FILE_SOURCE_WARNING -from danswer.prompts.secondary_llm_flows import SOURCE_FILTER_PROMPT -from danswer.prompts.secondary_llm_flows import WEB_SOURCE_WARNING +from danswer.prompts.filter_extration import FILE_SOURCE_WARNING +from danswer.prompts.filter_extration import SOURCE_FILTER_PROMPT +from danswer.prompts.filter_extration import WEB_SOURCE_WARNING from danswer.utils.logger import setup_logger from danswer.utils.text_processing import extract_embedded_json from danswer.utils.timing import log_function_time diff --git a/backend/danswer/secondary_llm_flows/time_filter.py b/backend/danswer/secondary_llm_flows/time_filter.py index 96fa079bd6..ebd1d80fd3 100644 --- a/backend/danswer/secondary_llm_flows/time_filter.py +++ b/backend/danswer/secondary_llm_flows/time_filter.py @@ -7,8 +7,8 @@ from dateutil.parser import parse from danswer.llm.factory import get_default_llm from danswer.llm.utils import dict_based_prompt_to_langchain_prompt +from danswer.prompts.filter_extration import TIME_FILTER_PROMPT from danswer.prompts.prompt_utils import get_current_llm_day_time -from danswer.prompts.secondary_llm_flows import TIME_FILTER_PROMPT from danswer.utils.logger import setup_logger from danswer.utils.timing import log_function_time