Touchups (#1855)

2025-04-08 11:58:34 +02:00 · 2024-07-17 23:47:10 -07:00 · 2024-07-17 23:47:10 -07:00 · e4984153c0
commit e4984153c0
parent 87fadb07ea
7 changed files with 112 additions and 83 deletions
--- a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
+++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "05c07bf07c00"
 down_revision = "b896bbd0d5a7"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel):


 class RelevanceChunk(BaseModel):
+    # TODO make this document level. Also slight misnomer here as this is actually
+    # done at the section level currently rather than the chunk
    relevant: bool | None = None
    content: str | None = None

--- a/backend/danswer/prompts/agentic_evaluation.py
+++ b/backend/danswer/prompts/agentic_evaluation.py
@ -0,0 +1,43 @@
+AGENTIC_SEARCH_SYSTEM_PROMPT = """
+You are an expert at evaluating the relevance of a document to a search query.
+Provided a document and a search query, you determine if the document is relevant to the user query.
+You ALWAYS output the 3 sections described below and every section always begins with the same header line.
+The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
+The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
+The "Final Relevance Determination" is always a single True or False.
+
+You always output your response following these 3 sections:
+
+1. Chain of Thought:
+Provide a chain of thought analysis considering:
+- The main purpose and content of the document
+- What the user is searching for
+- How the document relates to the query
+- Potential uses of the document for the given query
+Be thorough, but avoid unnecessary repetition. Think step by step.
+
+2. Useful Analysis:
+Summarize the contents of the document as it relates to the user query.
+BE ABSOLUTELY AS CONCISE AS POSSIBLE.
+If the document is not useful, briefly mention the what the document is about.
+Do NOT say whether this document is useful or not useful, ONLY provide the summary.
+If referring to the document, prefer using "this" document over "the" document.
+
+3. Final Relevance Determination:
+True or False
+"""
+
+AGENTIC_SEARCH_USER_PROMPT = """
+Document:
+```
+{content}
+```
+
+Query:
+{query}
+
+Be sure to run through the 3 steps of evaluation:
+1. Chain of Thought
+2. Useful Analysis
+3. Final Relevance Determination
+""".strip()
--- a/backend/danswer/prompts/miscellaneous_prompts.py
+++ b/backend/danswer/prompts/miscellaneous_prompts.py
@ -24,25 +24,6 @@ Query:
 """.strip()


-AGENTIC_SEARCH_EVALUATION_PROMPT = """
-1. Chain of Thought Analysis:
-Provide a chain of thought analysis considering:
- The main purpose and content of the document
- What the user is searching for
- How the document's topic relates to the query
- Potential uses of the document for the given query
-Be thorough, but avoid unnecessary repetition. Think step by step.
-
-2. Useful Analysis:
-[ANALYSIS_START]
-State the most important point from the chain of thought.
-DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description.
-[ANALYSIS_END]
-
-3. Relevance Determination:
-RESULT: True (if potentially relevant)
-RESULT: False (if not relevant)
-""".strip()
 # Use the following for easy viewing of prompts
 if __name__ == "__main__":
    print(LANGUAGE_REPHRASE_PROMPT)
--- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py
+++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py
@ -1,71 +1,70 @@
+import re
+
 from danswer.chat.models import RelevanceChunk
 from danswer.llm.interfaces import LLM
+from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.llm.utils import message_to_string
-from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT
+from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT
+from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT
 from danswer.search.models import InferenceSection
 from danswer.utils.logger import setup_logger

 logger = setup_logger()


+def _get_agent_eval_messages(
+    title: str, content: str, query: str
+) -> list[dict[str, str]]:
+    messages = [
+        {
+            "role": "system",
+            "content": AGENTIC_SEARCH_SYSTEM_PROMPT,
+        },
+        {
+            "role": "user",
+            "content": AGENTIC_SEARCH_USER_PROMPT.format(
+                title=title, content=content, query=query
+            ),
+        },
+    ]
+    return messages
+
+
 def evaluate_inference_section(
    document: InferenceSection, query: str, llm: LLM
 ) -> dict[str, RelevanceChunk]:
-    relevance: RelevanceChunk = RelevanceChunk()
    results = {}

-    # At least for now, is the same doucment ID across chunks
    document_id = document.center_chunk.document_id
+    semantic_id = document.center_chunk.semantic_identifier
+    contents = document.combined_content
    chunk_id = document.center_chunk.chunk_id

-    prompt = f"""
-    Analyze the relevance of this document to the search query:
-    Title: {document_id.split("/")[-1]}
-    Blurb: {document.combined_content}
-    Query: {query}
+    messages = _get_agent_eval_messages(
+        title=semantic_id, content=contents, query=query
+    )
+    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
+    model_output = message_to_string(llm.invoke(filled_llm_prompt))

-    {AGENTIC_SEARCH_EVALUATION_PROMPT}
-    """
+    # Search for the "Useful Analysis" section in the model output
+    # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
+    # then any text up to "3. Final Relevance"
+    # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
+    # If no match is found, the entire model output is used as the analysis
+    analysis_match = re.search(
+        r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
+        model_output,
+        re.DOTALL,
+    )
+    analysis = analysis_match.group(1).strip() if analysis_match else model_output

-    content = message_to_string(llm.invoke(prompt=prompt))
-    analysis = ""
-    relevant = False
-    chain_of_thought = ""
+    # Get the last non-empty line
+    last_line = next(
+        (line for line in reversed(model_output.split("\n")) if line.strip()), ""
+    )
+    relevant = last_line.strip().lower().startswith("true")

-    parts = content.split("[ANALYSIS_START]", 1)
-    if len(parts) == 2:
-        chain_of_thought, rest = parts
-    else:
-        logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}")
-        rest = content
-
-    parts = rest.split("[ANALYSIS_END]", 1)
-    if len(parts) == 2:
-        analysis, result = parts
-    else:
-        logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}")
-        result = rest
-
-    chain_of_thought = chain_of_thought.strip()
-    analysis = analysis.strip()
-    result = result.strip().lower()
-
-    # Determine relevance
-    if "result: true" in result:
-        relevant = True
-    elif "result: false" in result:
-        relevant = False
-    else:
-        logger.warning(f"Invalid result format for document {document_id}")
-
-    if not analysis:
-        logger.warning(
-            f"Couldn't extract proper analysis for document {document_id}. Using full content."
-        )
-        analysis = content
-
-    relevance.content = analysis
-    relevance.relevant = relevant
-
-    results[f"{document_id}-{chunk_id}"] = relevance
+    results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
+        relevant=relevant, content=analysis
+    )
    return results
--- a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
+++ b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
@ -49,8 +49,10 @@ const ToggleSwitch = () => {
        onClick={() => handleTabChange("search")}
      >
        <SearchIcon size={16} className="mr-2" />
-        Search
-        <span className="text-xs ml-2">{commandSymbol}S</span>
+        <p className="items-baseline flex">
+          Search
+          <span className="text-xs ml-2">{commandSymbol}S</span>
+        </p>
      </button>
      <button
        className={`px-4 py-2 rounded-full text-sm font-medium transition-colors duration-300 ease-in-out flex  items-center relative z-10 ${
--- a/web/src/components/search/SearchResultsDisplay.tsx
+++ b/web/src/components/search/SearchResultsDisplay.tsx
@ -202,7 +202,7 @@ export const SearchResultsDisplay = ({
                      setShowAll((showAll) => !showAll);
                    }
                  }}
-                  className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-16 h-8 ${
+                  className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-20 h-8 ${
                    !sweep
                      ? "bg-green-500 text-text-800"
                      : "bg-rose-700 text-text-100"
@ -217,16 +217,18 @@ export const SearchResultsDisplay = ({
                    <span></span>
                    {!sweep
                      ? agenticResults
-                        ? "all"
-                        : "hide"
+                        ? "Show All"
+                        : "Focus"
                      : agenticResults
-                        ? "hide"
-                        : "undo"}
-                    {!sweep ? (
-                      <BroomIcon className="h-4 w-4" />
-                    ) : (
-                      <UndoIcon className="h-4 w-4" />
-                    )}
+                        ? "Focus"
+                        : "Show All"}
+                    <span className="ml-1">
+                      {!sweep ? (
+                        <BroomIcon className="h-4 w-4" />
+                      ) : (
+                        <UndoIcon className="h-4 w-4" />
+                      )}
+                    </span>
                  </div>
                </button>
              </Tooltip>