welcome to onyx

2025-07-08 13:40:46 +02:00 · 2024-12-13 09:48:43 -08:00
parent 54dcbfa288
commit 21ec5ed795
813 changed files with 7021 additions and 6824 deletions
--- a/backend/onyx/secondary_llm_flows/answer_validation.py
+++ b/backend/onyx/secondary_llm_flows/answer_validation.py
@ -0,0 +1,61 @@
+from onyx.llm.exceptions import GenAIDisabledException
+from onyx.llm.factory import get_default_llms
+from onyx.llm.utils import dict_based_prompt_to_langchain_prompt
+from onyx.llm.utils import message_to_string
+from onyx.prompts.answer_validation import ANSWER_VALIDITY_PROMPT
+from onyx.utils.logger import setup_logger
+from onyx.utils.timing import log_function_time
+
+logger = setup_logger()
+
+
+@log_function_time()
+def get_answer_validity(
+    query: str,
+    answer: str,
+) -> bool:
+    def _get_answer_validation_messages(
+        query: str, answer: str
+    ) -> list[dict[str, str]]:
+        # Below COT block is unused, keeping for reference. Chain of Thought here significantly increases the time to
+        # answer, we can get most of the way there but just having the model evaluate each individual condition with
+        # a single True/False.
+        # cot_block = (
+        #    f"{THOUGHT_PAT} Use this as a scratchpad to write out in a step by step manner your reasoning "
+        #    f"about EACH criterion to ensure that your conclusion is correct. "
+        #    f"Be brief when evaluating each condition.\n"
+        #    f"{FINAL_ANSWER_PAT} Valid or Invalid"
+        # )
+
+        messages = [
+            {
+                "role": "user",
+                "content": ANSWER_VALIDITY_PROMPT.format(
+                    user_query=query, llm_answer=answer
+                ),
+            },
+        ]
+
+        return messages
+
+    def _extract_validity(model_output: str) -> bool:
+        if model_output.strip().strip("```").strip().split()[-1].lower() == "invalid":
+            return False
+        return True  # If something is wrong, let's not toss away the answer
+
+    try:
+        llm, _ = get_default_llms()
+    except GenAIDisabledException:
+        return True
+
+    if not answer:
+        return False
+
+    messages = _get_answer_validation_messages(query, answer)
+    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
+    model_output = message_to_string(llm.invoke(filled_llm_prompt))
+    logger.debug(model_output)
+
+    validity = _extract_validity(model_output)
+
+    return validity