diff --git a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py index 2049bebfc..cec51bb64 100644 --- a/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py +++ b/backend/alembic/versions/05c07bf07c00_add_search_doc_relevance_details.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "05c07bf07c00" down_revision = "b896bbd0d5a7" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index 1fb8586f0..ffcf0e5f5 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel): class RelevanceChunk(BaseModel): + # TODO make this document level. Also slight misnomer here as this is actually + # done at the section level currently rather than the chunk relevant: bool | None = None content: str | None = None diff --git a/backend/danswer/prompts/agentic_evaluation.py b/backend/danswer/prompts/agentic_evaluation.py new file mode 100644 index 000000000..bf8852cf8 --- /dev/null +++ b/backend/danswer/prompts/agentic_evaluation.py @@ -0,0 +1,43 @@ +AGENTIC_SEARCH_SYSTEM_PROMPT = """ +You are an expert at evaluating the relevance of a document to a search query. +Provided a document and a search query, you determine if the document is relevant to the user query. +You ALWAYS output the 3 sections described below and every section always begins with the same header line. +The "Chain of Thought" is to help you understand the document and query and their relevance to one another. +The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them. +The "Final Relevance Determination" is always a single True or False. + +You always output your response following these 3 sections: + +1. Chain of Thought: +Provide a chain of thought analysis considering: +- The main purpose and content of the document +- What the user is searching for +- How the document relates to the query +- Potential uses of the document for the given query +Be thorough, but avoid unnecessary repetition. Think step by step. + +2. Useful Analysis: +Summarize the contents of the document as it relates to the user query. +BE ABSOLUTELY AS CONCISE AS POSSIBLE. +If the document is not useful, briefly mention the what the document is about. +Do NOT say whether this document is useful or not useful, ONLY provide the summary. +If referring to the document, prefer using "this" document over "the" document. + +3. Final Relevance Determination: +True or False +""" + +AGENTIC_SEARCH_USER_PROMPT = """ +Document: +``` +{content} +``` + +Query: +{query} + +Be sure to run through the 3 steps of evaluation: +1. Chain of Thought +2. Useful Analysis +3. Final Relevance Determination +""".strip() diff --git a/backend/danswer/prompts/miscellaneous_prompts.py b/backend/danswer/prompts/miscellaneous_prompts.py index c57fe73ac..81ae51643 100644 --- a/backend/danswer/prompts/miscellaneous_prompts.py +++ b/backend/danswer/prompts/miscellaneous_prompts.py @@ -24,25 +24,6 @@ Query: """.strip() -AGENTIC_SEARCH_EVALUATION_PROMPT = """ -1. Chain of Thought Analysis: -Provide a chain of thought analysis considering: -- The main purpose and content of the document -- What the user is searching for -- How the document's topic relates to the query -- Potential uses of the document for the given query -Be thorough, but avoid unnecessary repetition. Think step by step. - -2. Useful Analysis: -[ANALYSIS_START] -State the most important point from the chain of thought. -DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description. -[ANALYSIS_END] - -3. Relevance Determination: -RESULT: True (if potentially relevant) -RESULT: False (if not relevant) -""".strip() # Use the following for easy viewing of prompts if __name__ == "__main__": print(LANGUAGE_REPHRASE_PROMPT) diff --git a/backend/danswer/secondary_llm_flows/agentic_evaluation.py b/backend/danswer/secondary_llm_flows/agentic_evaluation.py index 3f884e8f2..c35bf0542 100644 --- a/backend/danswer/secondary_llm_flows/agentic_evaluation.py +++ b/backend/danswer/secondary_llm_flows/agentic_evaluation.py @@ -1,71 +1,70 @@ +import re + from danswer.chat.models import RelevanceChunk from danswer.llm.interfaces import LLM +from danswer.llm.utils import dict_based_prompt_to_langchain_prompt from danswer.llm.utils import message_to_string -from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT +from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT +from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT from danswer.search.models import InferenceSection from danswer.utils.logger import setup_logger logger = setup_logger() +def _get_agent_eval_messages( + title: str, content: str, query: str +) -> list[dict[str, str]]: + messages = [ + { + "role": "system", + "content": AGENTIC_SEARCH_SYSTEM_PROMPT, + }, + { + "role": "user", + "content": AGENTIC_SEARCH_USER_PROMPT.format( + title=title, content=content, query=query + ), + }, + ] + return messages + + def evaluate_inference_section( document: InferenceSection, query: str, llm: LLM ) -> dict[str, RelevanceChunk]: - relevance: RelevanceChunk = RelevanceChunk() results = {} - # At least for now, is the same doucment ID across chunks document_id = document.center_chunk.document_id + semantic_id = document.center_chunk.semantic_identifier + contents = document.combined_content chunk_id = document.center_chunk.chunk_id - prompt = f""" - Analyze the relevance of this document to the search query: - Title: {document_id.split("/")[-1]} - Blurb: {document.combined_content} - Query: {query} + messages = _get_agent_eval_messages( + title=semantic_id, content=contents, query=query + ) + filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) + model_output = message_to_string(llm.invoke(filled_llm_prompt)) - {AGENTIC_SEARCH_EVALUATION_PROMPT} - """ + # Search for the "Useful Analysis" section in the model output + # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon, + # then any text up to "3. Final Relevance" + # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines + # If no match is found, the entire model output is used as the analysis + analysis_match = re.search( + r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance", + model_output, + re.DOTALL, + ) + analysis = analysis_match.group(1).strip() if analysis_match else model_output - content = message_to_string(llm.invoke(prompt=prompt)) - analysis = "" - relevant = False - chain_of_thought = "" + # Get the last non-empty line + last_line = next( + (line for line in reversed(model_output.split("\n")) if line.strip()), "" + ) + relevant = last_line.strip().lower().startswith("true") - parts = content.split("[ANALYSIS_START]", 1) - if len(parts) == 2: - chain_of_thought, rest = parts - else: - logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}") - rest = content - - parts = rest.split("[ANALYSIS_END]", 1) - if len(parts) == 2: - analysis, result = parts - else: - logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}") - result = rest - - chain_of_thought = chain_of_thought.strip() - analysis = analysis.strip() - result = result.strip().lower() - - # Determine relevance - if "result: true" in result: - relevant = True - elif "result: false" in result: - relevant = False - else: - logger.warning(f"Invalid result format for document {document_id}") - - if not analysis: - logger.warning( - f"Couldn't extract proper analysis for document {document_id}. Using full content." - ) - analysis = content - - relevance.content = analysis - relevance.relevant = relevant - - results[f"{document_id}-{chunk_id}"] = relevance + results[f"{document_id}-{chunk_id}"] = RelevanceChunk( + relevant=relevant, content=analysis + ) return results diff --git a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx index 537347e79..e3ffa1c40 100644 --- a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx +++ b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx @@ -49,8 +49,10 @@ const ToggleSwitch = () => { onClick={() => handleTabChange("search")} > - Search - {commandSymbol}S +

+ Search + {commandSymbol}S +