From eb1b604b8c472e31c17953745d4520c1f6b17fb3 Mon Sep 17 00:00:00 2001
From: mattboret <mattboret@gmail.com>
Date: Sat, 11 May 2024 19:43:05 +0200
Subject: [PATCH] Allow to define custom conditions for the answer prompt
 answer validation (#1347)

Co-authored-by: Matthieu Boret <matthieu.boret@fr.clara.net>
---
 backend/danswer/configs/app_configs.py       |  7 ++++
 backend/danswer/prompts/answer_validation.py | 40 ++++++++++++++++----
 2 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
index e98c6ead6..88575300c 100644
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -1,3 +1,4 @@
+import json
 import os
 import urllib.parse
 
@@ -261,3 +262,9 @@ DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
 TOKEN_BUDGET_GLOBALLY_ENABLED = (
     os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true"
 )
+
+# Defined custom query/answer conditions to validate the query and the LLM answer.
+# Format: list of strings
+CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
+    os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
+)
diff --git a/backend/danswer/prompts/answer_validation.py b/backend/danswer/prompts/answer_validation.py
index 8f159fc51..28d184aca 100644
--- a/backend/danswer/prompts/answer_validation.py
+++ b/backend/danswer/prompts/answer_validation.py
@@ -1,18 +1,46 @@
 # The following prompts are used for verifying the LLM answer after it is already produced.
 # Reflexion flow essentially. This feature can be toggled on/off
+from danswer.configs.app_configs import CUSTOM_ANSWER_VALIDITY_CONDITIONS
 from danswer.prompts.constants import ANSWER_PAT
 from danswer.prompts.constants import QUESTION_PAT
 
-
-ANSWER_VALIDITY_PROMPT = f"""
-You are an assistant to identify invalid query/answer pairs coming from a large language model.
-The query/answer pair is invalid if any of the following are True:
+ANSWER_VALIDITY_CONDITIONS = (
+    """
 1. Query is asking for information that varies by person or is subjective. If there is not a \
 globally true answer, the language model should not respond, therefore any answer is invalid.
 2. Answer addresses a related but different query. To be helpful, the model may provide \
 related information about a query but it won't match what the user is asking, this is invalid.
 3. Answer is just some form of "I don\'t know" or "not enough information" without significant \
 additional useful information. Explaining why it does not know or cannot answer is invalid.
+"""
+    if not CUSTOM_ANSWER_VALIDITY_CONDITIONS
+    else "\n".join(
+        [
+            f"{indice+1}. {condition}"
+            for indice, condition in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS)
+        ]
+    )
+)
+
+ANSWER_FORMAT = (
+    """
+1. True or False
+2. True or False
+3. True or False
+"""
+    if not CUSTOM_ANSWER_VALIDITY_CONDITIONS
+    else "\n".join(
+        [
+            f"{indice+1}. True or False"
+            for indice, _ in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS)
+        ]
+    )
+)
+
+ANSWER_VALIDITY_PROMPT = f"""
+You are an assistant to identify invalid query/answer pairs coming from a large language model.
+The query/answer pair is invalid if any of the following are True:
+{ANSWER_VALIDITY_CONDITIONS}
 
 {QUESTION_PAT} {{user_query}}
 {ANSWER_PAT} {{llm_answer}}
@@ -20,9 +48,7 @@ additional useful information. Explaining why it does not know or cannot answer
 ------------------------
 You MUST answer in EXACTLY the following format:
 ```
-1. True or False
-2. True or False
-3. True or False
+{ANSWER_FORMAT}
 Final Answer: Valid or Invalid
 ```