DAN 17 QOL Model Output (#20)

2025-09-29 13:25:50 +02:00 · 2023-05-08 20:57:52 -07:00
parent e8bf6b0364
commit e896d0786e
4 changed files with 188 additions and 27 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -54,7 +54,7 @@ CHUNK_OVERLAP = 5
 #####
 # Other API Keys
 #####
-OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
 #####
--- a/backend/danswer/direct_qa/qa_prompts.py
+++ b/backend/danswer/direct_qa/qa_prompts.py
@@ -1,11 +1,92 @@
 import json
 DOC_SEP_PAT = "---NEW DOCUMENT---"
 QUESTION_PAT = "Query:"
 ANSWER_PAT = "Answer:"
 UNCERTAINTY_PAT = "?"
 QUOTE_PAT = "Quote:"
 SYSTEM_ROLE = "You are a Question Answering system that answers queries based on provided documents. "
-def generic_prompt_processor(question: str, documents: list[str]) -> str:
+BASE_PROMPT = (
    f"Answer the query based on provided documents and quote relevant sections. "
    f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
 )
 UNABLE_TO_FIND_JSON_MSG = (
    "If the query cannot be answered based on the documents, respond with {}. "
 )
 SAMPLE_QUESTION = "Where is the Eiffel Tower?"
 SAMPLE_JSON_RESPONSE = {
    "answer": "The Eiffel Tower is located in Paris, France.",
    "quotes": [
        "The Eiffel Tower is an iconic symbol of Paris",
        "located on the Champ de Mars in France.",
    ],
 }
 def json_processor(question: str, documents: list[str]) -> str:
    prompt = (
        BASE_PROMPT + f"Sample response:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
    )
    for document in documents:
        prompt += f"\n{DOC_SEP_PAT}\n{document}"
    prompt += "\n\n---\n\n"
    prompt += f"{QUESTION_PAT}\n{question}\n"
    return prompt
 # Chain of Thought approach works however has higher token cost (more expensive) and is slower.
 # Should use this one if users ask questions that require logical reasoning.
 def json_cot_variant_processor(question: str, documents: list[str]) -> str:
    prompt = (
        f"Answer the query based on provided documents and quote relevant sections. "
        f'Respond with a freeform reasoning section followed by "Final Answer:" with a '
        f"json containing a concise answer to the query and up to three most relevant quotes from the documents.\n"
        f"Sample answer json:\n{json.dumps(SAMPLE_JSON_RESPONSE)}\n\n"
        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
    )
    for document in documents:
        prompt += f"\n{DOC_SEP_PAT}\n{document}"
    prompt += "\n\n---\n\n"
    prompt += f"{QUESTION_PAT}\n{question}\n"
    prompt += "Reasoning:\n"
    return prompt
 # This one seems largely useless with a single example
 # Model seems to take the one example of answering Yes and just does that too.
 def json_reflexion_processor(question: str, documents: list[str]) -> str:
    reflexion_str = "Does this fully answer the user query?"
    prompt = (
        BASE_PROMPT
        + f'After each generated json, ask "{reflexion_str}" and respond Yes or No. '
        f"If No, generate a better json response to the query.\n"
        f"Sample question and response:\n"
        f"{QUESTION_PAT}\n{SAMPLE_QUESTION}\n"
        f"{json.dumps(SAMPLE_JSON_RESPONSE)}\n"
        f"{reflexion_str} Yes\n\n"
        f'Each context document below is prefixed with "{DOC_SEP_PAT}".\n\n'
    )
    for document in documents:
        prompt += f"\n---NEW CONTEXT DOCUMENT---\n{document}"
    prompt += "\n\n---\n\n"
    prompt += f"{QUESTION_PAT}\n{question}\n"
    return prompt
 # Initial design, works pretty well but not optimal
 def freeform_processor(question: str, documents: list[str]) -> str:
    prompt = (
        f"Answer the query based on the documents below and quote the documents segments containing the answer. "
        f'Respond with one "{ANSWER_PAT}" section and as many "{QUOTE_PAT}" sections as is relevant. '
@@ -23,7 +104,40 @@ def generic_prompt_processor(question: str, documents: list[str]) -> str:
    return prompt
-def openai_chat_completion_processor(
+def json_chat_processor(question: str, documents: list[str]) -> list[dict[str, str]]:
    role_msg = (
        SYSTEM_ROLE
        + 'Start by reading the following documents and responding with "Acknowledged"'
    )
    messages = [{"role": "system", "content": role_msg}]
    for document in documents:
        messages.extend(
            [
                {
                    "role": "user",
                    "content": document,
                },
                {"role": "assistant", "content": "Acknowledged"},
            ]
        )
    sample_msg = (
        f"Now answer the user query based on documents above and quote relevant sections. "
        f"Respond with a json containing a concise answer and up to three most relevant quotes from the documents.\n"
        f"Sample response: {json.dumps(SAMPLE_JSON_RESPONSE)}"
    )
    messages.append({"role": "system", "content": sample_msg})
    messages.append({"role": "user", "content": f"{QUESTION_PAT}\n{question}\n"})
    # Note that the below will be dropped in reflexion if used
    messages.append({"role": "assistant", "content": "Answer Json:\n"})
    return messages
 def freeform_chat_processor(
    question: str, documents: list[str]
 ) -> list[dict[str, str]]:
    sample_quote = "Quote:\nThe hotdogs are freshly cooked.\n\nQuote:\nThey are very cheap at only a dollar each."
@@ -60,3 +174,16 @@ def openai_chat_completion_processor(
    )
    return messages
 # Not very useful, have not seen it improve an answer based on this
 # Sometimes gpt-3.5-turbo will just answer something worse like:
 # 'The response is a valid json that fully answers the user query with quotes exactly matching sections of the source
 # document. No revision is needed.'
 def get_chat_reflexion_msg() -> dict[str, str]:
    reflexion_content = (
        "Is the assistant response a valid json that fully answer the user query? "
        "If the response needs to be fixed or if an improvement is possible, provide a revised json. "
        "Otherwise, respond with the same json."
    )
    return {"role": "system", "content": reflexion_content}
--- a/backend/danswer/direct_qa/question_answer.py
+++ b/backend/danswer/direct_qa/question_answer.py
@@ -1,3 +1,4 @@
 import json
 import math
 import re
 from collections.abc import Callable
@@ -18,8 +19,9 @@ from danswer.configs.model_configs import OPENAI_MAX_OUTPUT_TOKENS
 from danswer.configs.model_configs import OPENAPI_MODEL_VERSION
 from danswer.direct_qa.interfaces import QAModel
 from danswer.direct_qa.qa_prompts import ANSWER_PAT
-from danswer.direct_qa.qa_prompts import generic_prompt_processor
+from danswer.direct_qa.qa_prompts import get_chat_reflexion_msg
-from danswer.direct_qa.qa_prompts import openai_chat_completion_processor
+from danswer.direct_qa.qa_prompts import json_chat_processor
 from danswer.direct_qa.qa_prompts import json_processor
 from danswer.direct_qa.qa_prompts import QUOTE_PAT
 from danswer.direct_qa.qa_prompts import UNCERTAINTY_PAT
 from danswer.utils.logging import setup_logger
@@ -33,10 +35,9 @@ logger = setup_logger()
 openai.api_key = OPENAI_API_KEY
-def separate_answer_quotes(
+def extract_answer_quotes_freeform(
    answer_raw: str,
 ) -> Tuple[Optional[str], Optional[list[str]]]:
    """Gives back the answer and quote sections"""
    null_answer_check = (
        answer_raw.replace(ANSWER_PAT, "").replace(QUOTE_PAT, "").strip()
    )
@@ -71,6 +72,27 @@ def separate_answer_quotes(
    return answer, sections_clean[1:]
 def extract_answer_quotes_json(
    answer_dict: dict[str, str | list[str]]
 ) -> Tuple[Optional[str], Optional[list[str]]]:
    answer_dict = {k.lower(): v for k, v in answer_dict.items()}
    answer = str(answer_dict.get("answer"))
    quotes = answer_dict.get("quotes") or answer_dict.get("quote")
    if isinstance(quotes, str):
        quotes = [quotes]
    return answer, quotes
 def separate_answer_quotes(
    answer_raw: str,
 ) -> Tuple[Optional[str], Optional[list[str]]]:
    try:
        model_raw_json = json.loads(answer_raw)
        return extract_answer_quotes_json(model_raw_json)
    except ValueError:
        return extract_answer_quotes_freeform(answer_raw)
 def match_quotes_to_docs(
    quotes: list[str],
    chunks: list[InferenceChunk],
@@ -140,7 +162,7 @@ def process_answer(
 class OpenAICompletionQA(QAModel):
    def __init__(
        self,
-        prompt_processor: Callable[[str, list[str]], str] = generic_prompt_processor,
+        prompt_processor: Callable[[str, list[str]], str] = json_processor,
        model_version: str = OPENAPI_MODEL_VERSION,
        max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
    ) -> None:
@@ -185,13 +207,15 @@ class OpenAIChatCompletionQA(QAModel):
        self,
        prompt_processor: Callable[
            [str, list[str]], list[dict[str, str]]
-        ] = openai_chat_completion_processor,
+        ] = json_chat_processor,
        model_version: str = OPENAPI_MODEL_VERSION,
        max_output_tokens: int = OPENAI_MAX_OUTPUT_TOKENS,
        reflexion_try_count: int = 0,
    ) -> None:
        self.prompt_processor = prompt_processor
        self.model_version = model_version
        self.max_output_tokens = max_output_tokens
        self.reflexion_try_count = reflexion_try_count
    @log_function_time()
    def answer_question(
@@ -200,7 +224,8 @@ class OpenAIChatCompletionQA(QAModel):
        top_contents = [ranked_chunk.content for ranked_chunk in context_docs]
        messages = self.prompt_processor(query, top_contents)
        logger.debug(messages)
-
+        model_output = ""
        for _ in range(self.reflexion_try_count + 1):
            try:
                response = openai.ChatCompletion.create(
                    messages=messages,
@@ -212,12 +237,15 @@ class OpenAIChatCompletionQA(QAModel):
                    max_tokens=self.max_output_tokens,
                )
                model_output = response["choices"][0]["message"]["content"].strip()
                assistant_msg = {"content": model_output, "role": "assistant"}
                messages.extend([assistant_msg, get_chat_reflexion_msg()])
                logger.info(
                    "OpenAI Token Usage: " + str(response["usage"]).replace("\n", "")
                )
            except Exception as e:
                logger.exception(e)
-            model_output = "Model Failure"
+                logger.warning(f"Model failure for query: {query}")
                return None, None
        logger.debug(model_output)
--- a/backend/danswer/utils/text_processing.py
+++ b/backend/danswer/utils/text_processing.py
@@ -19,6 +19,12 @@ def shared_precompare_cleanup(text: str) -> str:
    text = text.replace("*", "")
    # GPT models sometimes like to edit the quoting, ie "Title: Contents" becomes Title: "Contents"
    text = text.replace('\\"', "")
    text = text.replace('"', "")
    # GPT models often change up punctuations to make the text flow better.
    text = text.replace(".", "")
    text = text.replace(":", "")
    text = text.replace(",", "")
    return text