diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 6778147b0..e6eb2a97d 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -54,7 +54,7 @@ CHUNK_OVERLAP = 5 ##### # Other API Keys ##### -OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") ##### diff --git a/backend/danswer/direct_qa/qa_prompts.py b/backend/danswer/direct_qa/qa_prompts.py index f6a4c13bd..8a9dd1ceb 100644 --- a/backend/danswer/direct_qa/qa_prompts.py +++ b/backend/danswer/direct_qa/qa_prompts.py @@ -1,11 +1,92 @@ +import json + DOC_SEP_PAT = "---NEW DOCUMENT---" QUESTION_PAT = "Query:" ANSWER_PAT = "Answer:" UNCERTAINTY_PAT = "?" QUOTE_PAT = "Quote:" +SYSTEM_ROLE = "You are a Question Answering system that answers queries based on provided documents. " -def generic_prompt_processor(question: str, documents: list[str]) -> str: +BASE_PROMPT = ( + f"Answer the query based on provided documents and quote relevant sections. " + f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n" +) + +UNABLE_TO_FIND_JSON_MSG = ( + "If the query cannot be answered based on the documents, respond with {}. " +) + +SAMPLE_QUESTION = "Where is the Eiffel Tower?" + +SAMPLE_JSON_RESPONSE = { + "answer": "The Eiffel Tower is located in Paris, France.", + "quotes": [ + "The Eiffel Tower is an iconic symbol of Paris", + "located on the Champ de Mars in France.", + ], +} + + +def json_processor(question: str, documents: list[str]) -> str: + prompt = ( + BASE_PROMPT + f"Sample response:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n" + f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n' + ) + + for document in documents: + prompt += f"\n{DOC_SEP_PAT}\n{document}" + + prompt += "\n\n---\n\n" + prompt += f"{QUESTION_PAT}\n{question}\n" + return prompt + + +# Chain of Thought approach works however has higher token cost (more expensive) and is slower. +# Should use this one if users ask questions that require logical reasoning. +def json_cot_variant_processor(question: str, documents: list[str]) -> str: + prompt = ( + f"Answer the query based on provided documents and quote relevant sections. " + f'Respond with a freeform reasoning section followed by "Final Answer:" with a ' + f"json containing a concise answer to the query and up to three most relevant quotes from the documents.\n" + f"Sample answer json:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n" + f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n' + ) + + for document in documents: + prompt += f"\n{DOC_SEP_PAT}\n{document}" + + prompt += "\n\n---\n\n" + prompt += f"{QUESTION_PAT}\n{question}\n" + prompt += "Reasoning:\n" + return prompt + + +# This one seems largely useless with a single example +# Model seems to take the one example of answering Yes and just does that too. +def json_reflexion_processor(question: str, documents: list[str]) -> str: + reflexion_str = "Does this fully answer the user query?" + prompt = ( + BASE_PROMPT + + f'After each generated json, ask "{reflexion_str}" and respond Yes or No. ' + f"If No, generate a better json response to the query.\n" + f"Sample question and response:\n" + f"{QUESTION_PAT}\n{SAMPLE_QUESTION}\n" + f"{json.dumps(SAMPLE_JSON_RESPONSE)}\n" + f"{reflexion_str} Yes\n\n" + f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n' + ) + + for document in documents: + prompt += f"\n---NEW CONTEXT DOCUMENT---\n{document}" + + prompt += "\n\n---\n\n" + prompt += f"{QUESTION_PAT}\n{question}\n" + return prompt + + +# Initial design, works pretty well but not optimal +def freeform_processor(question: str, documents: list[str]) -> str: prompt = ( f"Answer the query based on the documents below and quote the documents segments containing the answer. " f'Respond with one "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as is relevant. ' @@ -23,7 +104,40 @@ def generic_prompt_processor(question: str, documents: list[str]) -> str: return prompt -def openai_chat_completion_processor( +def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, str]]: + role_msg = ( + SYSTEM_ROLE + + 'Start by reading the following documents and responding with "Acknowledged"' + ) + + messages = [{"role": "system", "content": role_msg}] + + for document in documents: + messages.extend( + [ + { + "role": "user", + "content": document, + }, + {"role": "assistant", "content": "Acknowledged"}, + ] + ) + sample_msg = ( + f"Now answer the user query based on documents above and quote relevant sections. " + f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n" + f"Sample response: {json.dumps(SAMPLE_JSON_RESPONSE)}" + ) + messages.append({"role": "system", "content": sample_msg}) + + messages.append({"role": "user", "content": f"{QUESTION_PAT}\n{question}\n"}) + + # Note that the below will be dropped in reflexion if used + messages.append({"role": "assistant", "content": "Answer Json:\n"}) + + return messages + + +def freeform_chat_processor( question: str, documents: list[str] ) -> list[dict[str, str]]: sample_quote = "Quote:\nThe hotdogs are freshly cooked.\n\nQuote:\nThey are very cheap at only a dollar each." @@ -60,3 +174,16 @@ def openai_chat_completion_processor( ) return messages + + +# Not very useful, have not seen it improve an answer based on this +# Sometimes gpt-3.5-turbo will just answer something worse like: +# 'The response is a valid json that fully answers the user query with quotes exactly matching sections of the source +# document. No revision is needed.' +def get_chat_reflexion_msg() -> dict[str, str]: + reflexion_content = ( + "Is the assistant response a valid json that fully answer the user query? " + "If the response needs to be fixed or if an improvement is possible, provide a revised json. " + "Otherwise, respond with the same json." + ) + return {"role": "system", "content": reflexion_content} diff --git a/backend/danswer/direct_qa/question_answer.py b/backend/danswer/direct_qa/question_answer.py index 704ae4c13..e29f03d27 100644 --- a/backend/danswer/direct_qa/question_answer.py +++ b/backend/danswer/direct_qa/question_answer.py @@ -1,3 +1,4 @@ +import json import math import re from collections.abc import Callable @@ -18,8 +19,9 @@ from danswer.configs.model_configs import OPENAI_MAX_OUTPUT_TOKENS from danswer.configs.model_configs import OPENAPI_MODEL_VERSION from danswer.direct_qa.interfaces import QAModel from danswer.direct_qa.qa_prompts import ANSWER_PAT -from danswer.direct_qa.qa_prompts import generic_prompt_processor -from danswer.direct_qa.qa_prompts import openai_chat_completion_processor +from danswer.direct_qa.qa_prompts import get_chat_reflexion_msg +from danswer.direct_qa.qa_prompts import json_chat_processor +from danswer.direct_qa.qa_prompts import json_processor from danswer.direct_qa.qa_prompts import QUOTE_PAT from danswer.direct_qa.qa_prompts import UNCERTAINTY_PAT from danswer.utils.logging import setup_logger @@ -33,10 +35,9 @@ logger = setup_logger() openai.api_key = OPENAI_API_KEY -def separate_answer_quotes( +def extract_answer_quotes_freeform( answer_raw: str, ) -> Tuple[Optional[str], Optional[list[str]]]: - """Gives back the answer and quote sections""" null_answer_check = ( answer_raw.replace(ANSWER_PAT, "").replace(QUOTE_PAT, "").strip() ) @@ -71,6 +72,27 @@ def separate_answer_quotes( return answer, sections_clean[1:] +def extract_answer_quotes_json( + answer_dict: dict[str, str | list[str]] +) -> Tuple[Optional[str], Optional[list[str]]]: + answer_dict = {k.lower(): v for k, v in answer_dict.items()} + answer = str(answer_dict.get("answer")) + quotes = answer_dict.get("quotes") or answer_dict.get("quote") + if isinstance(quotes, str): + quotes = [quotes] + return answer, quotes + + +def separate_answer_quotes( + answer_raw: str, +) -> Tuple[Optional[str], Optional[list[str]]]: + try: + model_raw_json = json.loads(answer_raw) + return extract_answer_quotes_json(model_raw_json) + except ValueError: + return extract_answer_quotes_freeform(answer_raw) + + def match_quotes_to_docs( quotes: list[str], chunks: list[InferenceChunk], @@ -140,7 +162,7 @@ def process_answer( class OpenAICompletionQA(QAModel): def __init__( self, - prompt_processor: Callable[[str, list[str]], str] = generic_prompt_processor, + prompt_processor: Callable[[str, list[str]], str] = json_processor, model_version: str = OPENAPI_MODEL_VERSION, max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS, ) -> None: @@ -185,13 +207,15 @@ class OpenAIChatCompletionQA(QAModel): self, prompt_processor: Callable[ [str, list[str]], list[dict[str, str]] - ] = openai_chat_completion_processor, + ] = json_chat_processor, model_version: str = OPENAPI_MODEL_VERSION, max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS, + reflexion_try_count: int = 0, ) -> None: self.prompt_processor = prompt_processor self.model_version = model_version self.max_output_tokens = max_output_tokens + self.reflexion_try_count = reflexion_try_count @log_function_time() def answer_question( @@ -200,24 +224,28 @@ class OpenAIChatCompletionQA(QAModel): top_contents = [ranked_chunk.content for ranked_chunk in context_docs] messages = self.prompt_processor(query, top_contents) logger.debug(messages) - - try: - response = openai.ChatCompletion.create( - messages=messages, - temperature=0, - top_p=1, - frequency_penalty=0, - presence_penalty=0, - model=self.model_version, - max_tokens=self.max_output_tokens, - ) - model_output = response["choices"][0]["message"]["content"].strip() - logger.info( - "OpenAI Token Usage: " + str(response["usage"]).replace("\n", "") - ) - except Exception as e: - logger.exception(e) - model_output = "Model Failure" + model_output = "" + for _ in range(self.reflexion_try_count + 1): + try: + response = openai.ChatCompletion.create( + messages=messages, + temperature=0, + top_p=1, + frequency_penalty=0, + presence_penalty=0, + model=self.model_version, + max_tokens=self.max_output_tokens, + ) + model_output = response["choices"][0]["message"]["content"].strip() + assistant_msg = {"content": model_output, "role": "assistant"} + messages.extend([assistant_msg, get_chat_reflexion_msg()]) + logger.info( + "OpenAI Token Usage: " + str(response["usage"]).replace("\n", "") + ) + except Exception as e: + logger.exception(e) + logger.warning(f"Model failure for query: {query}") + return None, None logger.debug(model_output) diff --git a/backend/danswer/utils/text_processing.py b/backend/danswer/utils/text_processing.py index 685d073ae..07359e845 100644 --- a/backend/danswer/utils/text_processing.py +++ b/backend/danswer/utils/text_processing.py @@ -19,6 +19,12 @@ def shared_precompare_cleanup(text: str) -> str: text = text.replace("*", "") # GPT models sometimes like to edit the quoting, ie "Title: Contents" becomes Title: "Contents" + text = text.replace('\\"', "") text = text.replace('"', "") + # GPT models often change up punctuations to make the text flow better. + text = text.replace(".", "") + text = text.replace(":", "") + text = text.replace(",", "") + return text