diff --git a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
index 2049bebfc..cec51bb64 100644
--- a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
+++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "05c07bf07c00"
down_revision = "b896bbd0d5a7"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None
def upgrade() -> None:
diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py
index 1fb8586f0..ffcf0e5f5 100644
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel):
class RelevanceChunk(BaseModel):
+ # TODO make this document level. Also slight misnomer here as this is actually
+ # done at the section level currently rather than the chunk
relevant: bool | None = None
content: str | None = None
diff --git a/backend/danswer/prompts/agentic_evaluation.py b/backend/danswer/prompts/agentic_evaluation.py
new file mode 100644
index 000000000..bf8852cf8
--- /dev/null
+++ b/backend/danswer/prompts/agentic_evaluation.py
@@ -0,0 +1,43 @@
+AGENTIC_SEARCH_SYSTEM_PROMPT = """
+You are an expert at evaluating the relevance of a document to a search query.
+Provided a document and a search query, you determine if the document is relevant to the user query.
+You ALWAYS output the 3 sections described below and every section always begins with the same header line.
+The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
+The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
+The "Final Relevance Determination" is always a single True or False.
+
+You always output your response following these 3 sections:
+
+1. Chain of Thought:
+Provide a chain of thought analysis considering:
+- The main purpose and content of the document
+- What the user is searching for
+- How the document relates to the query
+- Potential uses of the document for the given query
+Be thorough, but avoid unnecessary repetition. Think step by step.
+
+2. Useful Analysis:
+Summarize the contents of the document as it relates to the user query.
+BE ABSOLUTELY AS CONCISE AS POSSIBLE.
+If the document is not useful, briefly mention the what the document is about.
+Do NOT say whether this document is useful or not useful, ONLY provide the summary.
+If referring to the document, prefer using "this" document over "the" document.
+
+3. Final Relevance Determination:
+True or False
+"""
+
+AGENTIC_SEARCH_USER_PROMPT = """
+Document:
+```
+{content}
+```
+
+Query:
+{query}
+
+Be sure to run through the 3 steps of evaluation:
+1. Chain of Thought
+2. Useful Analysis
+3. Final Relevance Determination
+""".strip()
diff --git a/backend/danswer/prompts/miscellaneous_prompts.py b/backend/danswer/prompts/miscellaneous_prompts.py
index c57fe73ac..81ae51643 100644
--- a/backend/danswer/prompts/miscellaneous_prompts.py
+++ b/backend/danswer/prompts/miscellaneous_prompts.py
@@ -24,25 +24,6 @@ Query:
""".strip()
-AGENTIC_SEARCH_EVALUATION_PROMPT = """
-1. Chain of Thought Analysis:
-Provide a chain of thought analysis considering:
-- The main purpose and content of the document
-- What the user is searching for
-- How the document's topic relates to the query
-- Potential uses of the document for the given query
-Be thorough, but avoid unnecessary repetition. Think step by step.
-
-2. Useful Analysis:
-[ANALYSIS_START]
-State the most important point from the chain of thought.
-DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description.
-[ANALYSIS_END]
-
-3. Relevance Determination:
-RESULT: True (if potentially relevant)
-RESULT: False (if not relevant)
-""".strip()
# Use the following for easy viewing of prompts
if __name__ == "__main__":
print(LANGUAGE_REPHRASE_PROMPT)
diff --git a/backend/danswer/secondary_llm_flows/agentic_evaluation.py b/backend/danswer/secondary_llm_flows/agentic_evaluation.py
index 3f884e8f2..c35bf0542 100644
--- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py
+++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py
@@ -1,71 +1,70 @@
+import re
+
from danswer.chat.models import RelevanceChunk
from danswer.llm.interfaces import LLM
+from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
from danswer.llm.utils import message_to_string
-from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT
+from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT
+from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT
from danswer.search.models import InferenceSection
from danswer.utils.logger import setup_logger
logger = setup_logger()
+def _get_agent_eval_messages(
+ title: str, content: str, query: str
+) -> list[dict[str, str]]:
+ messages = [
+ {
+ "role": "system",
+ "content": AGENTIC_SEARCH_SYSTEM_PROMPT,
+ },
+ {
+ "role": "user",
+ "content": AGENTIC_SEARCH_USER_PROMPT.format(
+ title=title, content=content, query=query
+ ),
+ },
+ ]
+ return messages
+
+
def evaluate_inference_section(
document: InferenceSection, query: str, llm: LLM
) -> dict[str, RelevanceChunk]:
- relevance: RelevanceChunk = RelevanceChunk()
results = {}
- # At least for now, is the same doucment ID across chunks
document_id = document.center_chunk.document_id
+ semantic_id = document.center_chunk.semantic_identifier
+ contents = document.combined_content
chunk_id = document.center_chunk.chunk_id
- prompt = f"""
- Analyze the relevance of this document to the search query:
- Title: {document_id.split("/")[-1]}
- Blurb: {document.combined_content}
- Query: {query}
+ messages = _get_agent_eval_messages(
+ title=semantic_id, content=contents, query=query
+ )
+ filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
+ model_output = message_to_string(llm.invoke(filled_llm_prompt))
- {AGENTIC_SEARCH_EVALUATION_PROMPT}
- """
+ # Search for the "Useful Analysis" section in the model output
+ # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
+ # then any text up to "3. Final Relevance"
+ # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
+ # If no match is found, the entire model output is used as the analysis
+ analysis_match = re.search(
+ r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
+ model_output,
+ re.DOTALL,
+ )
+ analysis = analysis_match.group(1).strip() if analysis_match else model_output
- content = message_to_string(llm.invoke(prompt=prompt))
- analysis = ""
- relevant = False
- chain_of_thought = ""
+ # Get the last non-empty line
+ last_line = next(
+ (line for line in reversed(model_output.split("\n")) if line.strip()), ""
+ )
+ relevant = last_line.strip().lower().startswith("true")
- parts = content.split("[ANALYSIS_START]", 1)
- if len(parts) == 2:
- chain_of_thought, rest = parts
- else:
- logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}")
- rest = content
-
- parts = rest.split("[ANALYSIS_END]", 1)
- if len(parts) == 2:
- analysis, result = parts
- else:
- logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}")
- result = rest
-
- chain_of_thought = chain_of_thought.strip()
- analysis = analysis.strip()
- result = result.strip().lower()
-
- # Determine relevance
- if "result: true" in result:
- relevant = True
- elif "result: false" in result:
- relevant = False
- else:
- logger.warning(f"Invalid result format for document {document_id}")
-
- if not analysis:
- logger.warning(
- f"Couldn't extract proper analysis for document {document_id}. Using full content."
- )
- analysis = content
-
- relevance.content = analysis
- relevance.relevant = relevant
-
- results[f"{document_id}-{chunk_id}"] = relevance
+ results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
+ relevant=relevant, content=analysis
+ )
return results
diff --git a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
index 537347e79..e3ffa1c40 100644
--- a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
+++ b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
@@ -49,8 +49,10 @@ const ToggleSwitch = () => {
onClick={() => handleTabChange("search")}
>
+ Search + {commandSymbol}S +