Allow to define custom conditions for the answer prompt answer validation (#1347)

Co-authored-by: Matthieu Boret <matthieu.boret@fr.clara.net>
2025-09-18 11:34:12 +02:00 · 2024-05-11 19:43:05 +02:00
parent b8af38bb95
commit eb1b604b8c
2 changed files with 40 additions and 7 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -1,3 +1,4 @@
+import json
 import os
 import urllib.parse

@@ -261,3 +262,9 @@ DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
 TOKEN_BUDGET_GLOBALLY_ENABLED = (
    os.environ.get("TOKEN_BUDGET_GLOBALLY_ENABLED", "").lower() == "true"
 )
+
+# Defined custom query/answer conditions to validate the query and the LLM answer.
+# Format: list of strings
+CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
+    os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
+)
--- a/backend/danswer/prompts/answer_validation.py
+++ b/backend/danswer/prompts/answer_validation.py
@@ -1,18 +1,46 @@
 # The following prompts are used for verifying the LLM answer after it is already produced.
 # Reflexion flow essentially. This feature can be toggled on/off
+from danswer.configs.app_configs import CUSTOM_ANSWER_VALIDITY_CONDITIONS
 from danswer.prompts.constants import ANSWER_PAT
 from danswer.prompts.constants import QUESTION_PAT

-
-ANSWER_VALIDITY_PROMPT = f"""
-You are an assistant to identify invalid query/answer pairs coming from a large language model.
-The query/answer pair is invalid if any of the following are True:
+ANSWER_VALIDITY_CONDITIONS = (
+    """
 1. Query is asking for information that varies by person or is subjective. If there is not a \
 globally true answer, the language model should not respond, therefore any answer is invalid.
 2. Answer addresses a related but different query. To be helpful, the model may provide \
 related information about a query but it won't match what the user is asking, this is invalid.
 3. Answer is just some form of "I don\'t know" or "not enough information" without significant \
 additional useful information. Explaining why it does not know or cannot answer is invalid.
+"""
+    if not CUSTOM_ANSWER_VALIDITY_CONDITIONS
+    else "\n".join(
+        [
+            f"{indice+1}. {condition}"
+            for indice, condition in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS)
+        ]
+    )
+)
+
+ANSWER_FORMAT = (
+    """
+1. True or False
+2. True or False
+3. True or False
+"""
+    if not CUSTOM_ANSWER_VALIDITY_CONDITIONS
+    else "\n".join(
+        [
+            f"{indice+1}. True or False"
+            for indice, _ in enumerate(CUSTOM_ANSWER_VALIDITY_CONDITIONS)
+        ]
+    )
+)
+
+ANSWER_VALIDITY_PROMPT = f"""
+You are an assistant to identify invalid query/answer pairs coming from a large language model.
+The query/answer pair is invalid if any of the following are True:
+{ANSWER_VALIDITY_CONDITIONS}

 {QUESTION_PAT} {{user_query}}
 {ANSWER_PAT} {{llm_answer}}
@@ -20,9 +48,7 @@ additional useful information. Explaining why it does not know or cannot answer
 ------------------------
 You MUST answer in EXACTLY the following format:
 ```
-1. True or False
-2. True or False
-3. True or False
+{ANSWER_FORMAT}
 Final Answer: Valid or Invalid
 ```