Touchups (#1855)

2025-10-11 05:36:03 +02:00 · 2024-07-17 23:47:10 -07:00
parent 87fadb07ea
commit e4984153c0
7 changed files with 112 additions and 83 deletions
--- a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
+++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py
@@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "05c07bf07c00"
 down_revision = "b896bbd0d5a7"
-branch_labels = None
+branch_labels: None = None
-depends_on = None
+depends_on: None = None
 def upgrade() -> None:
--- a/backend/danswer/chat/models.py
+++ b/backend/danswer/chat/models.py
@@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel):
 class RelevanceChunk(BaseModel):
    # TODO make this document level. Also slight misnomer here as this is actually
    # done at the section level currently rather than the chunk
    relevant: bool | None = None
    content: str | None = None
--- a/backend/danswer/prompts/agentic_evaluation.py
+++ b/backend/danswer/prompts/agentic_evaluation.py
@@ -0,0 +1,43 @@
 AGENTIC_SEARCH_SYSTEM_PROMPT = """
 You are an expert at evaluating the relevance of a document to a search query.
 Provided a document and a search query, you determine if the document is relevant to the user query.
 You ALWAYS output the 3 sections described below and every section always begins with the same header line.
 The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
 The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
 The "Final Relevance Determination" is always a single True or False.
 You always output your response following these 3 sections:
 1. Chain of Thought:
 Provide a chain of thought analysis considering:
 - The main purpose and content of the document
 - What the user is searching for
 - How the document relates to the query
 - Potential uses of the document for the given query
 Be thorough, but avoid unnecessary repetition. Think step by step.
 2. Useful Analysis:
 Summarize the contents of the document as it relates to the user query.
 BE ABSOLUTELY AS CONCISE AS POSSIBLE.
 If the document is not useful, briefly mention the what the document is about.
 Do NOT say whether this document is useful or not useful, ONLY provide the summary.
 If referring to the document, prefer using "this" document over "the" document.
 3. Final Relevance Determination:
 True or False
 """
 AGENTIC_SEARCH_USER_PROMPT = """
 Document:
 ```
 {content}
 ```
 Query:
 {query}
 Be sure to run through the 3 steps of evaluation:
 1. Chain of Thought
 2. Useful Analysis
 3. Final Relevance Determination
 """.strip()
--- a/backend/danswer/prompts/miscellaneous_prompts.py
+++ b/backend/danswer/prompts/miscellaneous_prompts.py
@@ -24,25 +24,6 @@ Query:
 """.strip()
 AGENTIC_SEARCH_EVALUATION_PROMPT = """
 1. Chain of Thought Analysis:
 Provide a chain of thought analysis considering:
 - The main purpose and content of the document
 - What the user is searching for
 - How the document's topic relates to the query
 - Potential uses of the document for the given query
 Be thorough, but avoid unnecessary repetition. Think step by step.
 2. Useful Analysis:
 [ANALYSIS_START]
 State the most important point from the chain of thought.
 DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description.
 [ANALYSIS_END]
 3. Relevance Determination:
 RESULT: True (if potentially relevant)
 RESULT: False (if not relevant)
 """.strip()
 # Use the following for easy viewing of prompts
 if __name__ == "__main__":
    print(LANGUAGE_REPHRASE_PROMPT)
--- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py
+++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py
@@ -1,71 +1,70 @@
 import re
 from danswer.chat.models import RelevanceChunk
 from danswer.llm.interfaces import LLM
 from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
 from danswer.llm.utils import message_to_string
-from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT
+from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT
 from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT
 from danswer.search.models import InferenceSection
 from danswer.utils.logger import setup_logger
 logger = setup_logger()
 def _get_agent_eval_messages(
    title: str, content: str, query: str
 ) -> list[dict[str, str]]:
    messages = [
        {
            "role": "system",
            "content": AGENTIC_SEARCH_SYSTEM_PROMPT,
        },
        {
            "role": "user",
            "content": AGENTIC_SEARCH_USER_PROMPT.format(
                title=title, content=content, query=query
            ),
        },
    ]
    return messages
 def evaluate_inference_section(
    document: InferenceSection, query: str, llm: LLM
 ) -> dict[str, RelevanceChunk]:
    relevance: RelevanceChunk = RelevanceChunk()
    results = {}
    # At least for now, is the same doucment ID across chunks
    document_id = document.center_chunk.document_id
    semantic_id = document.center_chunk.semantic_identifier
    contents = document.combined_content
    chunk_id = document.center_chunk.chunk_id
-    prompt = f"""
+    messages = _get_agent_eval_messages(
-    Analyze the relevance of this document to the search query:
+        title=semantic_id, content=contents, query=query
    Title: {document_id.split("/")[-1]}
    Blurb: {document.combined_content}
    Query: {query}
    {AGENTIC_SEARCH_EVALUATION_PROMPT}
    """
    content = message_to_string(llm.invoke(prompt=prompt))
    analysis = ""
    relevant = False
    chain_of_thought = ""
    parts = content.split("[ANALYSIS_START]", 1)
    if len(parts) == 2:
        chain_of_thought, rest = parts
    else:
        logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}")
        rest = content
    parts = rest.split("[ANALYSIS_END]", 1)
    if len(parts) == 2:
        analysis, result = parts
    else:
        logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}")
        result = rest
    chain_of_thought = chain_of_thought.strip()
    analysis = analysis.strip()
    result = result.strip().lower()
    # Determine relevance
    if "result: true" in result:
        relevant = True
    elif "result: false" in result:
        relevant = False
    else:
        logger.warning(f"Invalid result format for document {document_id}")
    if not analysis:
        logger.warning(
            f"Couldn't extract proper analysis for document {document_id}. Using full content."
    )
-        analysis = content
+    filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
    model_output = message_to_string(llm.invoke(filled_llm_prompt))
-    relevance.content = analysis
+    # Search for the "Useful Analysis" section in the model output
-    relevance.relevant = relevant
+    # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
    # then any text up to "3. Final Relevance"
    # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
    # If no match is found, the entire model output is used as the analysis
    analysis_match = re.search(
        r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
        model_output,
        re.DOTALL,
    )
    analysis = analysis_match.group(1).strip() if analysis_match else model_output
-    results[f"{document_id}-{chunk_id}"] = relevance
+    # Get the last non-empty line
    last_line = next(
        (line for line in reversed(model_output.split("\n")) if line.strip()), ""
    )
    relevant = last_line.strip().lower().startswith("true")
    results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
        relevant=relevant, content=analysis
    )
    return results
--- a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
+++ b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx
@@ -49,8 +49,10 @@ const ToggleSwitch = () => {
        onClick={() => handleTabChange("search")}
      >
        <SearchIcon size={16} className="mr-2" />
        <p className="items-baseline flex">
          Search
          <span className="text-xs ml-2">{commandSymbol}S</span>
        </p>
      </button>
      <button
        className={`px-4 py-2 rounded-full text-sm font-medium transition-colors duration-300 ease-in-out flex  items-center relative z-10 ${
--- a/web/src/components/search/SearchResultsDisplay.tsx
+++ b/web/src/components/search/SearchResultsDisplay.tsx
@@ -202,7 +202,7 @@ export const SearchResultsDisplay = ({
                      setShowAll((showAll) => !showAll);
                    }
                  }}
-                  className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-16 h-8 ${
+                  className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-20 h-8 ${
                    !sweep
                      ? "bg-green-500 text-text-800"
                      : "bg-rose-700 text-text-100"
@@ -217,16 +217,18 @@ export const SearchResultsDisplay = ({
                    <span></span>
                    {!sweep
                      ? agenticResults
-                        ? "all"
+                        ? "Show All"
-                        : "hide"
+                        : "Focus"
                      : agenticResults
-                        ? "hide"
+                        ? "Focus"
-                        : "undo"}
+                        : "Show All"}
                    <span className="ml-1">
                      {!sweep ? (
                        <BroomIcon className="h-4 w-4" />
                      ) : (
                        <UndoIcon className="h-4 w-4" />
                      )}
                    </span>
                  </div>
                </button>
              </Tooltip>