From eb1b604b8c472e31c17953745d4520c1f6b17fb3 Mon Sep 17 00:00:00 2001 From: mattboret Date: Sat, 11 May 2024 19:43:05 +0200 Subject: [PATCH] Allow to define custom conditions for the answer prompt answer validation (#1347) Co-authored-by: Matthieu Boret --- backend/danswer/configs/app_configs.py | 7 ++++ backend/danswer/prompts/answer_validation.py | 40 ++++++++++++++++---- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index e98c6ead6..88575300c 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -1,3 +1,4 @@ +import json import os import urllib.parse @@ -261,3 +262,9 @@ DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true" TOKEN_BUDGET_GLOBALLY_ENABLED = ( os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true" ) + +# Defined custom query/answer conditions to validate the query and the LLM answer. +# Format: list of strings +CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads( + os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]") +) diff --git a/backend/danswer/prompts/answer_validation.py b/backend/danswer/prompts/answer_validation.py index 8f159fc51..28d184aca 100644 --- a/backend/danswer/prompts/answer_validation.py +++ b/backend/danswer/prompts/answer_validation.py @@ -1,18 +1,46 @@ # The following prompts are used for verifying the LLM answer after it is already produced. # Reflexion flow essentially. This feature can be toggled on/off +from danswer.configs.app_configs import CUSTOM_ANSWER_VALIDITY_CONDITIONS from danswer.prompts.constants import ANSWER_PAT from danswer.prompts.constants import QUESTION_PAT - -ANSWER_VALIDITY_PROMPT = f""" -You are an assistant to identify invalid query/answer pairs coming from a large language model. -The query/answer pair is invalid if any of the following are True: +ANSWER_VALIDITY_CONDITIONS = ( + """ 1. Query is asking for information that varies by person or is subjective. If there is not a \ globally true answer, the language model should not respond, therefore any answer is invalid. 2. Answer addresses a related but different query. To be helpful, the model may provide \ related information about a query but it won't match what the user is asking, this is invalid. 3. Answer is just some form of "I don\'t know" or "not enough information" without significant \ additional useful information. Explaining why it does not know or cannot answer is invalid. +""" + if not CUSTOM_ANSWER_VALIDITY_CONDITIONS + else "\n".join( + [ + f"{indice+1}. {condition}" + for indice, condition in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS) + ] + ) +) + +ANSWER_FORMAT = ( + """ +1. True or False +2. True or False +3. True or False +""" + if not CUSTOM_ANSWER_VALIDITY_CONDITIONS + else "\n".join( + [ + f"{indice+1}. True or False" + for indice, _ in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS) + ] + ) +) + +ANSWER_VALIDITY_PROMPT = f""" +You are an assistant to identify invalid query/answer pairs coming from a large language model. +The query/answer pair is invalid if any of the following are True: +{ANSWER_VALIDITY_CONDITIONS} {QUESTION_PAT} {{user_query}} {ANSWER_PAT} {{llm_answer}} @@ -20,9 +48,7 @@ additional useful information. Explaining why it does not know or cannot answer ------------------------ You MUST answer in EXACTLY the following format: ``` -1. True or False -2. True or False -3. True or False +{ANSWER_FORMAT} Final Answer: Valid or Invalid ```