mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-05 02:20:23 +02:00
DAN 17 QOL Model Output (#20)
This commit is contained in:
parent
e8bf6b0364
commit
e896d0786e
backend/danswer
@ -54,7 +54,7 @@ CHUNK_OVERLAP = 5
|
||||
#####
|
||||
# Other API Keys
|
||||
#####
|
||||
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
|
||||
|
||||
|
||||
#####
|
||||
|
@ -1,11 +1,92 @@
|
||||
import json
|
||||
|
||||
DOC_SEP_PAT = "---NEW DOCUMENT---"
|
||||
QUESTION_PAT = "Query:"
|
||||
ANSWER_PAT = "Answer:"
|
||||
UNCERTAINTY_PAT = "?"
|
||||
QUOTE_PAT = "Quote:"
|
||||
|
||||
SYSTEM_ROLE = "You are a Question Answering system that answers queries based on provided documents. "
|
||||
|
||||
def generic_prompt_processor(question: str, documents: list[str]) -> str:
|
||||
BASE_PROMPT = (
|
||||
f"Answer the query based on provided documents and quote relevant sections. "
|
||||
f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
|
||||
)
|
||||
|
||||
UNABLE_TO_FIND_JSON_MSG = (
|
||||
"If the query cannot be answered based on the documents, respond with {}. "
|
||||
)
|
||||
|
||||
SAMPLE_QUESTION = "Where is the Eiffel Tower?"
|
||||
|
||||
SAMPLE_JSON_RESPONSE = {
|
||||
"answer": "The Eiffel Tower is located in Paris, France.",
|
||||
"quotes": [
|
||||
"The Eiffel Tower is an iconic symbol of Paris",
|
||||
"located on the Champ de Mars in France.",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def json_processor(question: str, documents: list[str]) -> str:
|
||||
prompt = (
|
||||
BASE_PROMPT + f"Sample response:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
|
||||
f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
|
||||
)
|
||||
|
||||
for document in documents:
|
||||
prompt += f"\n{DOC_SEP_PAT}\n{document}"
|
||||
|
||||
prompt += "\n\n---\n\n"
|
||||
prompt += f"{QUESTION_PAT}\n{question}\n"
|
||||
return prompt
|
||||
|
||||
|
||||
# Chain of Thought approach works however has higher token cost (more expensive) and is slower.
|
||||
# Should use this one if users ask questions that require logical reasoning.
|
||||
def json_cot_variant_processor(question: str, documents: list[str]) -> str:
|
||||
prompt = (
|
||||
f"Answer the query based on provided documents and quote relevant sections. "
|
||||
f'Respond with a freeform reasoning section followed by "Final Answer:" with a '
|
||||
f"json containing a concise answer to the query and up to three most relevant quotes from the documents.\n"
|
||||
f"Sample answer json:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
|
||||
f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
|
||||
)
|
||||
|
||||
for document in documents:
|
||||
prompt += f"\n{DOC_SEP_PAT}\n{document}"
|
||||
|
||||
prompt += "\n\n---\n\n"
|
||||
prompt += f"{QUESTION_PAT}\n{question}\n"
|
||||
prompt += "Reasoning:\n"
|
||||
return prompt
|
||||
|
||||
|
||||
# This one seems largely useless with a single example
|
||||
# Model seems to take the one example of answering Yes and just does that too.
|
||||
def json_reflexion_processor(question: str, documents: list[str]) -> str:
|
||||
reflexion_str = "Does this fully answer the user query?"
|
||||
prompt = (
|
||||
BASE_PROMPT
|
||||
+ f'After each generated json, ask "{reflexion_str}" and respond Yes or No. '
|
||||
f"If No, generate a better json response to the query.\n"
|
||||
f"Sample question and response:\n"
|
||||
f"{QUESTION_PAT}\n{SAMPLE_QUESTION}\n"
|
||||
f"{json.dumps(SAMPLE_JSON_RESPONSE)}\n"
|
||||
f"{reflexion_str} Yes\n\n"
|
||||
f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
|
||||
)
|
||||
|
||||
for document in documents:
|
||||
prompt += f"\n---NEW CONTEXT DOCUMENT---\n{document}"
|
||||
|
||||
prompt += "\n\n---\n\n"
|
||||
prompt += f"{QUESTION_PAT}\n{question}\n"
|
||||
return prompt
|
||||
|
||||
|
||||
# Initial design, works pretty well but not optimal
|
||||
def freeform_processor(question: str, documents: list[str]) -> str:
|
||||
prompt = (
|
||||
f"Answer the query based on the documents below and quote the documents segments containing the answer. "
|
||||
f'Respond with one "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as is relevant. '
|
||||
@ -23,7 +104,40 @@ def generic_prompt_processor(question: str, documents: list[str]) -> str:
|
||||
return prompt
|
||||
|
||||
|
||||
def openai_chat_completion_processor(
|
||||
def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, str]]:
|
||||
role_msg = (
|
||||
SYSTEM_ROLE
|
||||
+ 'Start by reading the following documents and responding with "Acknowledged"'
|
||||
)
|
||||
|
||||
messages = [{"role": "system", "content": role_msg}]
|
||||
|
||||
for document in documents:
|
||||
messages.extend(
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": document,
|
||||
},
|
||||
{"role": "assistant", "content": "Acknowledged"},
|
||||
]
|
||||
)
|
||||
sample_msg = (
|
||||
f"Now answer the user query based on documents above and quote relevant sections. "
|
||||
f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
|
||||
f"Sample response: {json.dumps(SAMPLE_JSON_RESPONSE)}"
|
||||
)
|
||||
messages.append({"role": "system", "content": sample_msg})
|
||||
|
||||
messages.append({"role": "user", "content": f"{QUESTION_PAT}\n{question}\n"})
|
||||
|
||||
# Note that the below will be dropped in reflexion if used
|
||||
messages.append({"role": "assistant", "content": "Answer Json:\n"})
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
def freeform_chat_processor(
|
||||
question: str, documents: list[str]
|
||||
) -> list[dict[str, str]]:
|
||||
sample_quote = "Quote:\nThe hotdogs are freshly cooked.\n\nQuote:\nThey are very cheap at only a dollar each."
|
||||
@ -60,3 +174,16 @@ def openai_chat_completion_processor(
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
|
||||
# Not very useful, have not seen it improve an answer based on this
|
||||
# Sometimes gpt-3.5-turbo will just answer something worse like:
|
||||
# 'The response is a valid json that fully answers the user query with quotes exactly matching sections of the source
|
||||
# document. No revision is needed.'
|
||||
def get_chat_reflexion_msg() -> dict[str, str]:
|
||||
reflexion_content = (
|
||||
"Is the assistant response a valid json that fully answer the user query? "
|
||||
"If the response needs to be fixed or if an improvement is possible, provide a revised json. "
|
||||
"Otherwise, respond with the same json."
|
||||
)
|
||||
return {"role": "system", "content": reflexion_content}
|
||||
|
@ -1,3 +1,4 @@
|
||||
import json
|
||||
import math
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
@ -18,8 +19,9 @@ from danswer.configs.model_configs import OPENAI_MAX_OUTPUT_TOKENS
|
||||
from danswer.configs.model_configs import OPENAPI_MODEL_VERSION
|
||||
from danswer.direct_qa.interfaces import QAModel
|
||||
from danswer.direct_qa.qa_prompts import ANSWER_PAT
|
||||
from danswer.direct_qa.qa_prompts import generic_prompt_processor
|
||||
from danswer.direct_qa.qa_prompts import openai_chat_completion_processor
|
||||
from danswer.direct_qa.qa_prompts import get_chat_reflexion_msg
|
||||
from danswer.direct_qa.qa_prompts import json_chat_processor
|
||||
from danswer.direct_qa.qa_prompts import json_processor
|
||||
from danswer.direct_qa.qa_prompts import QUOTE_PAT
|
||||
from danswer.direct_qa.qa_prompts import UNCERTAINTY_PAT
|
||||
from danswer.utils.logging import setup_logger
|
||||
@ -33,10 +35,9 @@ logger = setup_logger()
|
||||
openai.api_key = OPENAI_API_KEY
|
||||
|
||||
|
||||
def separate_answer_quotes(
|
||||
def extract_answer_quotes_freeform(
|
||||
answer_raw: str,
|
||||
) -> Tuple[Optional[str], Optional[list[str]]]:
|
||||
"""Gives back the answer and quote sections"""
|
||||
null_answer_check = (
|
||||
answer_raw.replace(ANSWER_PAT, "").replace(QUOTE_PAT, "").strip()
|
||||
)
|
||||
@ -71,6 +72,27 @@ def separate_answer_quotes(
|
||||
return answer, sections_clean[1:]
|
||||
|
||||
|
||||
def extract_answer_quotes_json(
|
||||
answer_dict: dict[str, str | list[str]]
|
||||
) -> Tuple[Optional[str], Optional[list[str]]]:
|
||||
answer_dict = {k.lower(): v for k, v in answer_dict.items()}
|
||||
answer = str(answer_dict.get("answer"))
|
||||
quotes = answer_dict.get("quotes") or answer_dict.get("quote")
|
||||
if isinstance(quotes, str):
|
||||
quotes = [quotes]
|
||||
return answer, quotes
|
||||
|
||||
|
||||
def separate_answer_quotes(
|
||||
answer_raw: str,
|
||||
) -> Tuple[Optional[str], Optional[list[str]]]:
|
||||
try:
|
||||
model_raw_json = json.loads(answer_raw)
|
||||
return extract_answer_quotes_json(model_raw_json)
|
||||
except ValueError:
|
||||
return extract_answer_quotes_freeform(answer_raw)
|
||||
|
||||
|
||||
def match_quotes_to_docs(
|
||||
quotes: list[str],
|
||||
chunks: list[InferenceChunk],
|
||||
@ -140,7 +162,7 @@ def process_answer(
|
||||
class OpenAICompletionQA(QAModel):
|
||||
def __init__(
|
||||
self,
|
||||
prompt_processor: Callable[[str, list[str]], str] = generic_prompt_processor,
|
||||
prompt_processor: Callable[[str, list[str]], str] = json_processor,
|
||||
model_version: str = OPENAPI_MODEL_VERSION,
|
||||
max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
|
||||
) -> None:
|
||||
@ -185,13 +207,15 @@ class OpenAIChatCompletionQA(QAModel):
|
||||
self,
|
||||
prompt_processor: Callable[
|
||||
[str, list[str]], list[dict[str, str]]
|
||||
] = openai_chat_completion_processor,
|
||||
] = json_chat_processor,
|
||||
model_version: str = OPENAPI_MODEL_VERSION,
|
||||
max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
|
||||
reflexion_try_count: int = 0,
|
||||
) -> None:
|
||||
self.prompt_processor = prompt_processor
|
||||
self.model_version = model_version
|
||||
self.max_output_tokens = max_output_tokens
|
||||
self.reflexion_try_count = reflexion_try_count
|
||||
|
||||
@log_function_time()
|
||||
def answer_question(
|
||||
@ -200,24 +224,28 @@ class OpenAIChatCompletionQA(QAModel):
|
||||
top_contents = [ranked_chunk.content for ranked_chunk in context_docs]
|
||||
messages = self.prompt_processor(query, top_contents)
|
||||
logger.debug(messages)
|
||||
|
||||
try:
|
||||
response = openai.ChatCompletion.create(
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
model=self.model_version,
|
||||
max_tokens=self.max_output_tokens,
|
||||
)
|
||||
model_output = response["choices"][0]["message"]["content"].strip()
|
||||
logger.info(
|
||||
"OpenAI Token Usage: " + str(response["usage"]).replace("\n", "")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
model_output = "Model Failure"
|
||||
model_output = ""
|
||||
for _ in range(self.reflexion_try_count + 1):
|
||||
try:
|
||||
response = openai.ChatCompletion.create(
|
||||
messages=messages,
|
||||
temperature=0,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
model=self.model_version,
|
||||
max_tokens=self.max_output_tokens,
|
||||
)
|
||||
model_output = response["choices"][0]["message"]["content"].strip()
|
||||
assistant_msg = {"content": model_output, "role": "assistant"}
|
||||
messages.extend([assistant_msg, get_chat_reflexion_msg()])
|
||||
logger.info(
|
||||
"OpenAI Token Usage: " + str(response["usage"]).replace("\n", "")
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception(e)
|
||||
logger.warning(f"Model failure for query: {query}")
|
||||
return None, None
|
||||
|
||||
logger.debug(model_output)
|
||||
|
||||
|
@ -19,6 +19,12 @@ def shared_precompare_cleanup(text: str) -> str:
|
||||
text = text.replace("*", "")
|
||||
|
||||
# GPT models sometimes like to edit the quoting, ie "Title: Contents" becomes Title: "Contents"
|
||||
text = text.replace('\\"', "")
|
||||
text = text.replace('"', "")
|
||||
|
||||
# GPT models often change up punctuations to make the text flow better.
|
||||
text = text.replace(".", "")
|
||||
text = text.replace(":", "")
|
||||
text = text.replace(",", "")
|
||||
|
||||
return text
|
||||
|
Loading…
x
Reference in New Issue
Block a user