From bc403d97f2c4c0d99dc005bcc48ae27775e72924 Mon Sep 17 00:00:00 2001
From: Yuhong Sun <yuhongsun96@gmail.com>
Date: Sun, 3 Dec 2023 13:27:11 -0800
Subject: [PATCH] Organize Prompts for Chat implementation (#807)

---
 backend/danswer/prompts/answer_validation.py  |  35 ++++
 backend/danswer/prompts/direct_qa_prompts.py  |   2 +
 backend/danswer/prompts/filter_extration.py   |  66 +++++++
 backend/danswer/prompts/llm_chunk_filter.py   |  30 +++
 .../danswer/prompts/miscellaneous_prompts.py  |  17 ++
 backend/danswer/prompts/query_validation.py   |  58 ++++++
 .../danswer/prompts/secondary_llm_flows.py    | 172 ------------------
 .../secondary_llm_flows/answer_validation.py  |   2 +-
 .../secondary_llm_flows/chunk_usefulness.py   |   4 +-
 .../secondary_llm_flows/query_expansion.py    |   2 +-
 .../secondary_llm_flows/query_validation.py   |   2 +-
 .../secondary_llm_flows/source_filter.py      |   6 +-
 .../secondary_llm_flows/time_filter.py        |   2 +-
 13 files changed, 217 insertions(+), 181 deletions(-)
 create mode 100644 backend/danswer/prompts/answer_validation.py
 create mode 100644 backend/danswer/prompts/filter_extration.py
 create mode 100644 backend/danswer/prompts/llm_chunk_filter.py
 create mode 100644 backend/danswer/prompts/miscellaneous_prompts.py
 create mode 100644 backend/danswer/prompts/query_validation.py
 delete mode 100644 backend/danswer/prompts/secondary_llm_flows.py

diff --git a/backend/danswer/prompts/answer_validation.py b/backend/danswer/prompts/answer_validation.py
new file mode 100644
index 0000000000..1a29a48ce5
--- /dev/null
+++ b/backend/danswer/prompts/answer_validation.py
@@ -0,0 +1,35 @@
+# The following prompts are used for verifying the LLM answer after it is already produced.
+# Reflexion flow essentially. This feature can be toggled on/off
+from danswer.prompts.constants import ANSWER_PAT
+from danswer.prompts.constants import QUESTION_PAT
+
+
+ANSWER_VALIDITY_PROMPT = f"""
+You are an assistant to identify invalid query/answer pairs coming from a large language model.
+The query/answer pair is invalid if any of the following are True:
+1. Query is asking for information that varies by person or is subjective. If there is not a \
+globally true answer, the language model should not respond, therefore any answer is invalid.
+2. Answer addresses a related but different query. To be helpful, the model may provide provide \
+related information about a query but it won't match what the user is asking, this is invalid.
+3. Answer is just some form of "I don\'t know" or "not enough information" without significant \
+additional useful information. Explaining why it does not know or cannot answer is invalid.
+
+{QUESTION_PAT} {{user_query}}
+{ANSWER_PAT} {{llm_answer}}
+
+------------------------
+You MUST answer in EXACTLY the following format:
+```
+1. True or False
+2. True or False
+3. True or False
+Final Answer: Valid or Invalid
+```
+
+Hint: Remember, if ANY of the conditions are True, it is Invalid.
+""".strip()
+
+
+# Use the following for easy viewing of prompts
+if __name__ == "__main__":
+    print(ANSWER_VALIDITY_PROMPT)
diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py
index f632af3a8a..474cdfbf75 100644
--- a/backend/danswer/prompts/direct_qa_prompts.py
+++ b/backend/danswer/prompts/direct_qa_prompts.py
@@ -1,3 +1,5 @@
+# The following prompts are used for the initial response before a chat history exists
+# It is used also for the one shot direct QA flow
 import json
 
 from danswer.prompts.constants import ANSWER_PAT
diff --git a/backend/danswer/prompts/filter_extration.py b/backend/danswer/prompts/filter_extration.py
new file mode 100644
index 0000000000..ce3596a07f
--- /dev/null
+++ b/backend/danswer/prompts/filter_extration.py
@@ -0,0 +1,66 @@
+# The following prompts are used for extracting filters to apply along with the query in the
+# document index. For example, a filter for dates or a filter by source type such as GitHub
+# or Slack
+from danswer.prompts.constants import SOURCES_KEY
+
+
+# Smaller followup prompts in time_filter.py
+TIME_FILTER_PROMPT = """
+You are a tool to identify time filters to apply to a user query for a downstream search \
+application. The downstream application is able to use a recency bias or apply a hard cutoff to \
+remove all documents before the cutoff. Identify the correct filters to apply for the user query.
+
+The current day and time is {current_day_time_str}.
+
+Always answer with ONLY a json which contains the keys "filter_type", "filter_value", \
+"value_multiple" and "date".
+
+The valid values for "filter_type" are "hard cutoff", "favors recent", or "not time sensitive".
+The valid values for "filter_value" are "day", "week", "month", "quarter", "half", or "year".
+The valid values for "value_multiple" is any number.
+The valid values for "date" is a date in format MM/DD/YYYY, ALWAYS follow this format.
+""".strip()
+
+
+# Smaller followup prompts in source_filter.py
+# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains "web" but not
+# "confluence" and the user asks for confluence related things, the LLM will select "web" since
+# confluence is accessed as a website. This cannot be fixed without also reducing the capability
+# to match things like repository->github, website->web, etc.
+# This is generally not a big issue though as if the company has confluence, hopefully they add
+# a connector for it or the user is aware that confluence has not been added.
+SOURCE_FILTER_PROMPT = f"""
+Given a user query, extract relevant source filters for use in a downstream search tool.
+Respond with a json containing the source filters or null if no specific sources are referenced.
+ONLY extract sources when the user is explicitly limiting the scope of where information is \
+coming from.
+The user may provide invalid source filters, ignore those.
+
+The valid sources are:
+{{valid_sources}}
+{{web_source_warning}}
+{{file_source_warning}}
+
+
+ALWAYS answer with ONLY a json with the key "{SOURCES_KEY}". \
+The value for "{SOURCES_KEY}" must be null or a list of valid sources.
+
+Sample Response:
+{{sample_response}}
+""".strip()
+
+WEB_SOURCE_WARNING = """
+Note: The "web" source only applies to when the user specifies "website" in the query. \
+It does not apply to tools such as Confluence, GitHub, etc. which have a website.
+""".strip()
+
+FILE_SOURCE_WARNING = """
+Note: The "file" source only applies to when the user refers to uploaded files in the query.
+""".strip()
+
+
+# Use the following for easy viewing of prompts
+if __name__ == "__main__":
+    print(TIME_FILTER_PROMPT)
+    print("------------------")
+    print(SOURCE_FILTER_PROMPT)
diff --git a/backend/danswer/prompts/llm_chunk_filter.py b/backend/danswer/prompts/llm_chunk_filter.py
new file mode 100644
index 0000000000..623ae58770
--- /dev/null
+++ b/backend/danswer/prompts/llm_chunk_filter.py
@@ -0,0 +1,30 @@
+# The following prompts are used to pass each chunk to the LLM (the cheap/fast one)
+# to determine if the chunk is useful towards the user query. This is used as part
+# of the reranking flow
+
+USEFUL_PAT = "Yes useful"
+NONUSEFUL_PAT = "Not useful"
+CHUNK_FILTER_PROMPT = f"""
+Determine if the reference section is USEFUL for answering the user query.
+It is NOT enough for the section to be related to the query, \
+it must contain information that is USEFUL for answering the query.
+If the section contains ANY useful information, that is good enough, \
+it does not need to fully answer the every part of the user query.
+
+Reference Section:
+```
+{{chunk_text}}
+```
+
+User Query:
+```
+{{user_query}}
+```
+
+Respond with EXACTLY AND ONLY: "{USEFUL_PAT}" or "{NONUSEFUL_PAT}"
+""".strip()
+
+
+# Use the following for easy viewing of prompts
+if __name__ == "__main__":
+    print(CHUNK_FILTER_PROMPT)
diff --git a/backend/danswer/prompts/miscellaneous_prompts.py b/backend/danswer/prompts/miscellaneous_prompts.py
new file mode 100644
index 0000000000..c58cbb48c2
--- /dev/null
+++ b/backend/danswer/prompts/miscellaneous_prompts.py
@@ -0,0 +1,17 @@
+# Prompts that aren't part of a particular configurable feature
+
+LANGUAGE_REPHRASE_PROMPT = """
+Rephrase the query in {target_language}.
+If the query is already in the correct language, \
+simply repeat the ORIGINAL query back to me, EXACTLY as is with no rephrasing.
+NEVER change proper nouns, technical terms, acronyms, or terms you are not familiar with.
+IMPORTANT, if the query is already in the target language, DO NOT REPHRASE OR EDIT the query!
+
+Query:
+{query}
+""".strip()
+
+
+# Use the following for easy viewing of prompts
+if __name__ == "__main__":
+    print(LANGUAGE_REPHRASE_PROMPT)
diff --git a/backend/danswer/prompts/query_validation.py b/backend/danswer/prompts/query_validation.py
new file mode 100644
index 0000000000..6816322992
--- /dev/null
+++ b/backend/danswer/prompts/query_validation.py
@@ -0,0 +1,58 @@
+# The following prompts are used for verifying if the user's query can be answered by the current
+# system. Many new users do not understand the design/capabilities of the system and will ask
+# questions that are unanswerable such as aggregations or user specific questions that the system
+# cannot handle, this is used to identify those cases
+from danswer.prompts.constants import ANSWERABLE_PAT
+from danswer.prompts.constants import GENERAL_SEP_PAT
+from danswer.prompts.constants import QUESTION_PAT
+from danswer.prompts.constants import THOUGHT_PAT
+
+
+ANSWERABLE_PROMPT = f"""
+You are a helper tool to determine if a query is answerable using retrieval augmented generation.
+The main system will try to answer the user query based on ONLY the top 5 most relevant \
+documents found from search.
+Sources contain both up to date and proprietary information for the specific team.
+For named or unknown entities, assume the search will find relevant and consistent knowledge \
+about the entity.
+The system is not tuned for writing code.
+The system is not tuned for interfacing with structured data via query languages like SQL.
+If the question might not require code or query language, then assume it can be answered without \
+code or query language.
+Determine if that system should attempt to answer.
+"ANSWERABLE" must be exactly "True" or "False"
+
+{GENERAL_SEP_PAT}
+
+{QUESTION_PAT.upper()} What is this Slack channel about?
+```
+{THOUGHT_PAT.upper()} First the system must determine which Slack channel is being referred to. \
+By fetching 5 documents related to Slack channel contents, it is not possible to determine which \
+Slack channel the user is referring to.
+{ANSWERABLE_PAT.upper()} False
+```
+
+{QUESTION_PAT.upper()} Danswer is unreachable.
+```
+{THOUGHT_PAT.upper()} The system searches documents related to Danswer being unreachable. \
+Assuming the documents from search contains situations where Danswer is not reachable and \
+contains a fix, the query may be answerable.
+{ANSWERABLE_PAT.upper()} True
+```
+
+{QUESTION_PAT.upper()} How many customers do we have
+```
+{THOUGHT_PAT.upper()} Assuming the retrieved documents contain up to date customer acquisition \
+information including a list of customers, the query can be answered. It is important to note \
+that if the information only exists in a SQL database, the system is unable to execute SQL and \
+won't find an answer.
+{ANSWERABLE_PAT.upper()} True
+```
+
+{QUESTION_PAT.upper()} {{user_query}}
+""".strip()
+
+
+# Use the following for easy viewing of prompts
+if __name__ == "__main__":
+    print(ANSWERABLE_PROMPT)
diff --git a/backend/danswer/prompts/secondary_llm_flows.py b/backend/danswer/prompts/secondary_llm_flows.py
deleted file mode 100644
index fecd985978..0000000000
--- a/backend/danswer/prompts/secondary_llm_flows.py
+++ /dev/null
@@ -1,172 +0,0 @@
-from danswer.prompts.constants import ANSWER_PAT
-from danswer.prompts.constants import ANSWERABLE_PAT
-from danswer.prompts.constants import GENERAL_SEP_PAT
-from danswer.prompts.constants import QUESTION_PAT
-from danswer.prompts.constants import SOURCES_KEY
-from danswer.prompts.constants import THOUGHT_PAT
-
-
-ANSWER_VALIDITY_PROMPT = f"""
-You are an assistant to identify invalid query/answer pairs coming from a large language model.
-The query/answer pair is invalid if any of the following are True:
-1. Query is asking for information that varies by person or is subjective. If there is not a \
-globally true answer, the language model should not respond, therefore any answer is invalid.
-2. Answer addresses a related but different query. To be helpful, the model may provide provide \
-related information about a query but it won't match what the user is asking, this is invalid.
-3. Answer is just some form of "I don\'t know" or "not enough information" without significant \
-additional useful information. Explaining why it does not know or cannot answer is invalid.
-
-{QUESTION_PAT} {{user_query}}
-{ANSWER_PAT} {{llm_answer}}
-
-------------------------
-You MUST answer in EXACTLY the following format:
-```
-1. True or False
-2. True or False
-3. True or False
-Final Answer: Valid or Invalid
-```
-
-Hint: Remember, if ANY of the conditions are True, it is Invalid.
-""".strip()
-
-
-ANSWERABLE_PROMPT = f"""
-You are a helper tool to determine if a query is answerable using retrieval augmented generation.
-The main system will try to answer the user query based on ONLY the top 5 most relevant \
-documents found from search.
-Sources contain both up to date and proprietary information for the specific team.
-For named or unknown entities, assume the search will find relevant and consistent knowledge \
-about the entity.
-The system is not tuned for writing code.
-The system is not tuned for interfacing with structured data via query languages like SQL.
-If the question might not require code or query language, then assume it can be answered without \
-code or query language.
-Determine if that system should attempt to answer.
-"ANSWERABLE" must be exactly "True" or "False"
-
-{GENERAL_SEP_PAT}
-
-{QUESTION_PAT.upper()} What is this Slack channel about?
-```
-{THOUGHT_PAT.upper()} First the system must determine which Slack channel is being referred to. \
-By fetching 5 documents related to Slack channel contents, it is not possible to determine which \
-Slack channel the user is referring to.
-{ANSWERABLE_PAT.upper()} False
-```
-
-{QUESTION_PAT.upper()} Danswer is unreachable.
-```
-{THOUGHT_PAT.upper()} The system searches documents related to Danswer being unreachable. \
-Assuming the documents from search contains situations where Danswer is not reachable and \
-contains a fix, the query may be answerable.
-{ANSWERABLE_PAT.upper()} True
-```
-
-{QUESTION_PAT.upper()} How many customers do we have
-```
-{THOUGHT_PAT.upper()} Assuming the retrieved documents contain up to date customer acquisition \
-information including a list of customers, the query can be answered. It is important to note \
-that if the information only exists in a SQL database, the system is unable to execute SQL and \
-won't find an answer.
-{ANSWERABLE_PAT.upper()} True
-```
-
-{QUESTION_PAT.upper()} {{user_query}}
-""".strip()
-
-
-# Smaller followup prompts in time_filter.py
-TIME_FILTER_PROMPT = """
-You are a tool to identify time filters to apply to a user query for a downstream search \
-application. The downstream application is able to use a recency bias or apply a hard cutoff to \
-remove all documents before the cutoff. Identify the correct filters to apply for the user query.
-
-The current day and time is {current_day_time_str}.
-
-Always answer with ONLY a json which contains the keys "filter_type", "filter_value", \
-"value_multiple" and "date".
-
-The valid values for "filter_type" are "hard cutoff", "favors recent", or "not time sensitive".
-The valid values for "filter_value" are "day", "week", "month", "quarter", "half", or "year".
-The valid values for "value_multiple" is any number.
-The valid values for "date" is a date in format MM/DD/YYYY, ALWAYS follow this format.
-""".strip()
-
-
-# Smaller followup prompts in source_filter.py
-# Known issue: LLMs like GPT-3.5 try to generalize. If the valid sources contains "web" but not
-# "confluence" and the user asks for confluence related things, the LLM will select "web" since
-# confluence is accessed as a website. This cannot be fixed without also reducing the capability
-# to match things like repository->github, website->web, etc.
-# This is generally not a big issue though as if the company has confluence, hopefully they add
-# a connector for it or the user is aware that confluence has not been added.
-SOURCE_FILTER_PROMPT = f"""
-Given a user query, extract relevant source filters for use in a downstream search tool.
-Respond with a json containing the source filters or null if no specific sources are referenced.
-ONLY extract sources when the user is explicitly limiting the scope of where information is \
-coming from.
-The user may provide invalid source filters, ignore those.
-
-The valid sources are:
-{{valid_sources}}
-{{web_source_warning}}
-{{file_source_warning}}
-
-
-ALWAYS answer with ONLY a json with the key "{SOURCES_KEY}". \
-The value for "{SOURCES_KEY}" must be null or a list of valid sources.
-
-Sample Response:
-{{sample_response}}
-""".strip()
-
-WEB_SOURCE_WARNING = """
-Note: The "web" source only applies to when the user specifies "website" in the query. \
-It does not apply to tools such as Confluence, GitHub, etc. which have a website.
-""".strip()
-
-FILE_SOURCE_WARNING = """
-Note: The "file" source only applies to when the user refers to uploaded files in the query.
-""".strip()
-
-
-USEFUL_PAT = "Yes useful"
-NONUSEFUL_PAT = "Not useful"
-CHUNK_FILTER_PROMPT = f"""
-Determine if the reference section is USEFUL for answering the user query.
-It is NOT enough for the section to be related to the query, \
-it must contain information that is USEFUL for answering the query.
-If the section contains ANY useful information, that is good enough, \
-it does not need to fully answer the every part of the user query.
-
-Reference Section:
-```
-{{chunk_text}}
-```
-
-User Query:
-```
-{{user_query}}
-```
-
-Respond with EXACTLY AND ONLY: "{USEFUL_PAT}" or "{NONUSEFUL_PAT}"
-""".strip()
-
-
-LANGUAGE_REPHRASE_PROMPT = """
-Rephrase the query in {target_language}.
-If the query is already in the correct language, \
-simply repeat the ORIGINAL query back to me, EXACTLY as is with no rephrasing.
-NEVER change proper nouns, technical terms, acronyms, or terms you are not familiar with.
-IMPORTANT, if the query is already in the target language, DO NOT REPHRASE OR EDIT the query!
-
-Query:
-{query}
-""".strip()
-
-
-# User the following for easy viewing of prompts
-if __name__ == "__main__":
-    print(ANSWERABLE_PROMPT)
diff --git a/backend/danswer/secondary_llm_flows/answer_validation.py b/backend/danswer/secondary_llm_flows/answer_validation.py
index 4ef8e8bef3..26b0e096fd 100644
--- a/backend/danswer/secondary_llm_flows/answer_validation.py
+++ b/backend/danswer/secondary_llm_flows/answer_validation.py
@@ -1,6 +1,6 @@
 from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
-from danswer.prompts.secondary_llm_flows import ANSWER_VALIDITY_PROMPT
+from danswer.prompts.answer_validation import ANSWER_VALIDITY_PROMPT
 from danswer.utils.logger import setup_logger
 from danswer.utils.timing import log_function_time
 
diff --git a/backend/danswer/secondary_llm_flows/chunk_usefulness.py b/backend/danswer/secondary_llm_flows/chunk_usefulness.py
index 636401912c..b977947bf4 100644
--- a/backend/danswer/secondary_llm_flows/chunk_usefulness.py
+++ b/backend/danswer/secondary_llm_flows/chunk_usefulness.py
@@ -2,8 +2,8 @@ from collections.abc import Callable
 
 from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
-from danswer.prompts.secondary_llm_flows import CHUNK_FILTER_PROMPT
-from danswer.prompts.secondary_llm_flows import NONUSEFUL_PAT
+from danswer.prompts.llm_chunk_filter import CHUNK_FILTER_PROMPT
+from danswer.prompts.llm_chunk_filter import NONUSEFUL_PAT
 from danswer.utils.logger import setup_logger
 from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 
diff --git a/backend/danswer/secondary_llm_flows/query_expansion.py b/backend/danswer/secondary_llm_flows/query_expansion.py
index 874ca2131a..c80dbcbfc3 100644
--- a/backend/danswer/secondary_llm_flows/query_expansion.py
+++ b/backend/danswer/secondary_llm_flows/query_expansion.py
@@ -2,7 +2,7 @@ from collections.abc import Callable
 
 from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
-from danswer.prompts.secondary_llm_flows import LANGUAGE_REPHRASE_PROMPT
+from danswer.prompts.miscellaneous_prompts import LANGUAGE_REPHRASE_PROMPT
 from danswer.utils.logger import setup_logger
 from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel
 
diff --git a/backend/danswer/secondary_llm_flows/query_validation.py b/backend/danswer/secondary_llm_flows/query_validation.py
index 7d2b752ad7..a345127247 100644
--- a/backend/danswer/secondary_llm_flows/query_validation.py
+++ b/backend/danswer/secondary_llm_flows/query_validation.py
@@ -7,7 +7,7 @@ from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.prompts.constants import ANSWERABLE_PAT
 from danswer.prompts.constants import THOUGHT_PAT
-from danswer.prompts.secondary_llm_flows import ANSWERABLE_PROMPT
+from danswer.prompts.query_validation import ANSWERABLE_PROMPT
 from danswer.server.chat.models import QueryValidationResponse
 from danswer.server.utils import get_json_line
 from danswer.utils.logger import setup_logger
diff --git a/backend/danswer/secondary_llm_flows/source_filter.py b/backend/danswer/secondary_llm_flows/source_filter.py
index 7327eac04a..3560e072af 100644
--- a/backend/danswer/secondary_llm_flows/source_filter.py
+++ b/backend/danswer/secondary_llm_flows/source_filter.py
@@ -9,9 +9,9 @@ from danswer.db.engine import get_sqlalchemy_engine
 from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.prompts.constants import SOURCES_KEY
-from danswer.prompts.secondary_llm_flows import FILE_SOURCE_WARNING
-from danswer.prompts.secondary_llm_flows import SOURCE_FILTER_PROMPT
-from danswer.prompts.secondary_llm_flows import WEB_SOURCE_WARNING
+from danswer.prompts.filter_extration import FILE_SOURCE_WARNING
+from danswer.prompts.filter_extration import SOURCE_FILTER_PROMPT
+from danswer.prompts.filter_extration import WEB_SOURCE_WARNING
 from danswer.utils.logger import setup_logger
 from danswer.utils.text_processing import extract_embedded_json
 from danswer.utils.timing import log_function_time
diff --git a/backend/danswer/secondary_llm_flows/time_filter.py b/backend/danswer/secondary_llm_flows/time_filter.py
index 96fa079bd6..ebd1d80fd3 100644
--- a/backend/danswer/secondary_llm_flows/time_filter.py
+++ b/backend/danswer/secondary_llm_flows/time_filter.py
@@ -7,8 +7,8 @@ from dateutil.parser import parse
 
 from danswer.llm.factory import get_default_llm
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
+from danswer.prompts.filter_extration import TIME_FILTER_PROMPT
 from danswer.prompts.prompt_utils import get_current_llm_day_time
-from danswer.prompts.secondary_llm_flows import TIME_FILTER_PROMPT
 from danswer.utils.logger import setup_logger
 from danswer.utils.timing import log_function_time