mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-11 05:36:03 +02:00
Touchups (#1855)
This commit is contained in:
@@ -12,8 +12,8 @@ import sqlalchemy as sa
|
|||||||
# revision identifiers, used by Alembic.
|
# revision identifiers, used by Alembic.
|
||||||
revision = "05c07bf07c00"
|
revision = "05c07bf07c00"
|
||||||
down_revision = "b896bbd0d5a7"
|
down_revision = "b896bbd0d5a7"
|
||||||
branch_labels = None
|
branch_labels: None = None
|
||||||
depends_on = None
|
depends_on: None = None
|
||||||
|
|
||||||
|
|
||||||
def upgrade() -> None:
|
def upgrade() -> None:
|
||||||
|
@@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class RelevanceChunk(BaseModel):
|
class RelevanceChunk(BaseModel):
|
||||||
|
# TODO make this document level. Also slight misnomer here as this is actually
|
||||||
|
# done at the section level currently rather than the chunk
|
||||||
relevant: bool | None = None
|
relevant: bool | None = None
|
||||||
content: str | None = None
|
content: str | None = None
|
||||||
|
|
||||||
|
43
backend/danswer/prompts/agentic_evaluation.py
Normal file
43
backend/danswer/prompts/agentic_evaluation.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
AGENTIC_SEARCH_SYSTEM_PROMPT = """
|
||||||
|
You are an expert at evaluating the relevance of a document to a search query.
|
||||||
|
Provided a document and a search query, you determine if the document is relevant to the user query.
|
||||||
|
You ALWAYS output the 3 sections described below and every section always begins with the same header line.
|
||||||
|
The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
|
||||||
|
The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
|
||||||
|
The "Final Relevance Determination" is always a single True or False.
|
||||||
|
|
||||||
|
You always output your response following these 3 sections:
|
||||||
|
|
||||||
|
1. Chain of Thought:
|
||||||
|
Provide a chain of thought analysis considering:
|
||||||
|
- The main purpose and content of the document
|
||||||
|
- What the user is searching for
|
||||||
|
- How the document relates to the query
|
||||||
|
- Potential uses of the document for the given query
|
||||||
|
Be thorough, but avoid unnecessary repetition. Think step by step.
|
||||||
|
|
||||||
|
2. Useful Analysis:
|
||||||
|
Summarize the contents of the document as it relates to the user query.
|
||||||
|
BE ABSOLUTELY AS CONCISE AS POSSIBLE.
|
||||||
|
If the document is not useful, briefly mention the what the document is about.
|
||||||
|
Do NOT say whether this document is useful or not useful, ONLY provide the summary.
|
||||||
|
If referring to the document, prefer using "this" document over "the" document.
|
||||||
|
|
||||||
|
3. Final Relevance Determination:
|
||||||
|
True or False
|
||||||
|
"""
|
||||||
|
|
||||||
|
AGENTIC_SEARCH_USER_PROMPT = """
|
||||||
|
Document:
|
||||||
|
```
|
||||||
|
{content}
|
||||||
|
```
|
||||||
|
|
||||||
|
Query:
|
||||||
|
{query}
|
||||||
|
|
||||||
|
Be sure to run through the 3 steps of evaluation:
|
||||||
|
1. Chain of Thought
|
||||||
|
2. Useful Analysis
|
||||||
|
3. Final Relevance Determination
|
||||||
|
""".strip()
|
@@ -24,25 +24,6 @@ Query:
|
|||||||
""".strip()
|
""".strip()
|
||||||
|
|
||||||
|
|
||||||
AGENTIC_SEARCH_EVALUATION_PROMPT = """
|
|
||||||
1. Chain of Thought Analysis:
|
|
||||||
Provide a chain of thought analysis considering:
|
|
||||||
- The main purpose and content of the document
|
|
||||||
- What the user is searching for
|
|
||||||
- How the document's topic relates to the query
|
|
||||||
- Potential uses of the document for the given query
|
|
||||||
Be thorough, but avoid unnecessary repetition. Think step by step.
|
|
||||||
|
|
||||||
2. Useful Analysis:
|
|
||||||
[ANALYSIS_START]
|
|
||||||
State the most important point from the chain of thought.
|
|
||||||
DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description.
|
|
||||||
[ANALYSIS_END]
|
|
||||||
|
|
||||||
3. Relevance Determination:
|
|
||||||
RESULT: True (if potentially relevant)
|
|
||||||
RESULT: False (if not relevant)
|
|
||||||
""".strip()
|
|
||||||
# Use the following for easy viewing of prompts
|
# Use the following for easy viewing of prompts
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
print(LANGUAGE_REPHRASE_PROMPT)
|
print(LANGUAGE_REPHRASE_PROMPT)
|
||||||
|
@@ -1,71 +1,70 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
from danswer.chat.models import RelevanceChunk
|
from danswer.chat.models import RelevanceChunk
|
||||||
from danswer.llm.interfaces import LLM
|
from danswer.llm.interfaces import LLM
|
||||||
|
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
|
||||||
from danswer.llm.utils import message_to_string
|
from danswer.llm.utils import message_to_string
|
||||||
from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT
|
from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT
|
||||||
|
from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT
|
||||||
from danswer.search.models import InferenceSection
|
from danswer.search.models import InferenceSection
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_agent_eval_messages(
|
||||||
|
title: str, content: str, query: str
|
||||||
|
) -> list[dict[str, str]]:
|
||||||
|
messages = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": AGENTIC_SEARCH_SYSTEM_PROMPT,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": AGENTIC_SEARCH_USER_PROMPT.format(
|
||||||
|
title=title, content=content, query=query
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
def evaluate_inference_section(
|
def evaluate_inference_section(
|
||||||
document: InferenceSection, query: str, llm: LLM
|
document: InferenceSection, query: str, llm: LLM
|
||||||
) -> dict[str, RelevanceChunk]:
|
) -> dict[str, RelevanceChunk]:
|
||||||
relevance: RelevanceChunk = RelevanceChunk()
|
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
# At least for now, is the same doucment ID across chunks
|
|
||||||
document_id = document.center_chunk.document_id
|
document_id = document.center_chunk.document_id
|
||||||
|
semantic_id = document.center_chunk.semantic_identifier
|
||||||
|
contents = document.combined_content
|
||||||
chunk_id = document.center_chunk.chunk_id
|
chunk_id = document.center_chunk.chunk_id
|
||||||
|
|
||||||
prompt = f"""
|
messages = _get_agent_eval_messages(
|
||||||
Analyze the relevance of this document to the search query:
|
title=semantic_id, content=contents, query=query
|
||||||
Title: {document_id.split("/")[-1]}
|
|
||||||
Blurb: {document.combined_content}
|
|
||||||
Query: {query}
|
|
||||||
|
|
||||||
{AGENTIC_SEARCH_EVALUATION_PROMPT}
|
|
||||||
"""
|
|
||||||
|
|
||||||
content = message_to_string(llm.invoke(prompt=prompt))
|
|
||||||
analysis = ""
|
|
||||||
relevant = False
|
|
||||||
chain_of_thought = ""
|
|
||||||
|
|
||||||
parts = content.split("[ANALYSIS_START]", 1)
|
|
||||||
if len(parts) == 2:
|
|
||||||
chain_of_thought, rest = parts
|
|
||||||
else:
|
|
||||||
logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}")
|
|
||||||
rest = content
|
|
||||||
|
|
||||||
parts = rest.split("[ANALYSIS_END]", 1)
|
|
||||||
if len(parts) == 2:
|
|
||||||
analysis, result = parts
|
|
||||||
else:
|
|
||||||
logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}")
|
|
||||||
result = rest
|
|
||||||
|
|
||||||
chain_of_thought = chain_of_thought.strip()
|
|
||||||
analysis = analysis.strip()
|
|
||||||
result = result.strip().lower()
|
|
||||||
|
|
||||||
# Determine relevance
|
|
||||||
if "result: true" in result:
|
|
||||||
relevant = True
|
|
||||||
elif "result: false" in result:
|
|
||||||
relevant = False
|
|
||||||
else:
|
|
||||||
logger.warning(f"Invalid result format for document {document_id}")
|
|
||||||
|
|
||||||
if not analysis:
|
|
||||||
logger.warning(
|
|
||||||
f"Couldn't extract proper analysis for document {document_id}. Using full content."
|
|
||||||
)
|
)
|
||||||
analysis = content
|
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
|
||||||
|
model_output = message_to_string(llm.invoke(filled_llm_prompt))
|
||||||
|
|
||||||
relevance.content = analysis
|
# Search for the "Useful Analysis" section in the model output
|
||||||
relevance.relevant = relevant
|
# This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
|
||||||
|
# then any text up to "3. Final Relevance"
|
||||||
|
# The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
|
||||||
|
# If no match is found, the entire model output is used as the analysis
|
||||||
|
analysis_match = re.search(
|
||||||
|
r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
|
||||||
|
model_output,
|
||||||
|
re.DOTALL,
|
||||||
|
)
|
||||||
|
analysis = analysis_match.group(1).strip() if analysis_match else model_output
|
||||||
|
|
||||||
results[f"{document_id}-{chunk_id}"] = relevance
|
# Get the last non-empty line
|
||||||
|
last_line = next(
|
||||||
|
(line for line in reversed(model_output.split("\n")) if line.strip()), ""
|
||||||
|
)
|
||||||
|
relevant = last_line.strip().lower().startswith("true")
|
||||||
|
|
||||||
|
results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
|
||||||
|
relevant=relevant, content=analysis
|
||||||
|
)
|
||||||
return results
|
return results
|
||||||
|
@@ -49,8 +49,10 @@ const ToggleSwitch = () => {
|
|||||||
onClick={() => handleTabChange("search")}
|
onClick={() => handleTabChange("search")}
|
||||||
>
|
>
|
||||||
<SearchIcon size={16} className="mr-2" />
|
<SearchIcon size={16} className="mr-2" />
|
||||||
|
<p className="items-baseline flex">
|
||||||
Search
|
Search
|
||||||
<span className="text-xs ml-2">{commandSymbol}S</span>
|
<span className="text-xs ml-2">{commandSymbol}S</span>
|
||||||
|
</p>
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
className={`px-4 py-2 rounded-full text-sm font-medium transition-colors duration-300 ease-in-out flex items-center relative z-10 ${
|
className={`px-4 py-2 rounded-full text-sm font-medium transition-colors duration-300 ease-in-out flex items-center relative z-10 ${
|
||||||
|
@@ -202,7 +202,7 @@ export const SearchResultsDisplay = ({
|
|||||||
setShowAll((showAll) => !showAll);
|
setShowAll((showAll) => !showAll);
|
||||||
}
|
}
|
||||||
}}
|
}}
|
||||||
className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-16 h-8 ${
|
className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-20 h-8 ${
|
||||||
!sweep
|
!sweep
|
||||||
? "bg-green-500 text-text-800"
|
? "bg-green-500 text-text-800"
|
||||||
: "bg-rose-700 text-text-100"
|
: "bg-rose-700 text-text-100"
|
||||||
@@ -217,16 +217,18 @@ export const SearchResultsDisplay = ({
|
|||||||
<span></span>
|
<span></span>
|
||||||
{!sweep
|
{!sweep
|
||||||
? agenticResults
|
? agenticResults
|
||||||
? "all"
|
? "Show All"
|
||||||
: "hide"
|
: "Focus"
|
||||||
: agenticResults
|
: agenticResults
|
||||||
? "hide"
|
? "Focus"
|
||||||
: "undo"}
|
: "Show All"}
|
||||||
|
<span className="ml-1">
|
||||||
{!sweep ? (
|
{!sweep ? (
|
||||||
<BroomIcon className="h-4 w-4" />
|
<BroomIcon className="h-4 w-4" />
|
||||||
) : (
|
) : (
|
||||||
<UndoIcon className="h-4 w-4" />
|
<UndoIcon className="h-4 w-4" />
|
||||||
)}
|
)}
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
</button>
|
</button>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
Reference in New Issue
Block a user