mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 11:58:34 +02:00
Touchups (#1855)
This commit is contained in:
parent
87fadb07ea
commit
e4984153c0
@ -12,8 +12,8 @@ import sqlalchemy as sa
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "05c07bf07c00"
|
||||
down_revision = "b896bbd0d5a7"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
branch_labels: None = None
|
||||
depends_on: None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
|
@ -47,6 +47,8 @@ class LLMRelevanceFilterResponse(BaseModel):
|
||||
|
||||
|
||||
class RelevanceChunk(BaseModel):
|
||||
# TODO make this document level. Also slight misnomer here as this is actually
|
||||
# done at the section level currently rather than the chunk
|
||||
relevant: bool | None = None
|
||||
content: str | None = None
|
||||
|
||||
|
43
backend/danswer/prompts/agentic_evaluation.py
Normal file
43
backend/danswer/prompts/agentic_evaluation.py
Normal file
@ -0,0 +1,43 @@
|
||||
AGENTIC_SEARCH_SYSTEM_PROMPT = """
|
||||
You are an expert at evaluating the relevance of a document to a search query.
|
||||
Provided a document and a search query, you determine if the document is relevant to the user query.
|
||||
You ALWAYS output the 3 sections described below and every section always begins with the same header line.
|
||||
The "Chain of Thought" is to help you understand the document and query and their relevance to one another.
|
||||
The "Useful Analysis" is shown to the user to help them understand why the document is or is not useful for them.
|
||||
The "Final Relevance Determination" is always a single True or False.
|
||||
|
||||
You always output your response following these 3 sections:
|
||||
|
||||
1. Chain of Thought:
|
||||
Provide a chain of thought analysis considering:
|
||||
- The main purpose and content of the document
|
||||
- What the user is searching for
|
||||
- How the document relates to the query
|
||||
- Potential uses of the document for the given query
|
||||
Be thorough, but avoid unnecessary repetition. Think step by step.
|
||||
|
||||
2. Useful Analysis:
|
||||
Summarize the contents of the document as it relates to the user query.
|
||||
BE ABSOLUTELY AS CONCISE AS POSSIBLE.
|
||||
If the document is not useful, briefly mention the what the document is about.
|
||||
Do NOT say whether this document is useful or not useful, ONLY provide the summary.
|
||||
If referring to the document, prefer using "this" document over "the" document.
|
||||
|
||||
3. Final Relevance Determination:
|
||||
True or False
|
||||
"""
|
||||
|
||||
AGENTIC_SEARCH_USER_PROMPT = """
|
||||
Document:
|
||||
```
|
||||
{content}
|
||||
```
|
||||
|
||||
Query:
|
||||
{query}
|
||||
|
||||
Be sure to run through the 3 steps of evaluation:
|
||||
1. Chain of Thought
|
||||
2. Useful Analysis
|
||||
3. Final Relevance Determination
|
||||
""".strip()
|
@ -24,25 +24,6 @@ Query:
|
||||
""".strip()
|
||||
|
||||
|
||||
AGENTIC_SEARCH_EVALUATION_PROMPT = """
|
||||
1. Chain of Thought Analysis:
|
||||
Provide a chain of thought analysis considering:
|
||||
- The main purpose and content of the document
|
||||
- What the user is searching for
|
||||
- How the document's topic relates to the query
|
||||
- Potential uses of the document for the given query
|
||||
Be thorough, but avoid unnecessary repetition. Think step by step.
|
||||
|
||||
2. Useful Analysis:
|
||||
[ANALYSIS_START]
|
||||
State the most important point from the chain of thought.
|
||||
DO NOT refer to "the document" (describe it as "this")- ONLY state the core point in a description.
|
||||
[ANALYSIS_END]
|
||||
|
||||
3. Relevance Determination:
|
||||
RESULT: True (if potentially relevant)
|
||||
RESULT: False (if not relevant)
|
||||
""".strip()
|
||||
# Use the following for easy viewing of prompts
|
||||
if __name__ == "__main__":
|
||||
print(LANGUAGE_REPHRASE_PROMPT)
|
||||
|
@ -1,71 +1,70 @@
|
||||
import re
|
||||
|
||||
from danswer.chat.models import RelevanceChunk
|
||||
from danswer.llm.interfaces import LLM
|
||||
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt
|
||||
from danswer.llm.utils import message_to_string
|
||||
from danswer.prompts.miscellaneous_prompts import AGENTIC_SEARCH_EVALUATION_PROMPT
|
||||
from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_SYSTEM_PROMPT
|
||||
from danswer.prompts.agentic_evaluation import AGENTIC_SEARCH_USER_PROMPT
|
||||
from danswer.search.models import InferenceSection
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def _get_agent_eval_messages(
|
||||
title: str, content: str, query: str
|
||||
) -> list[dict[str, str]]:
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": AGENTIC_SEARCH_SYSTEM_PROMPT,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": AGENTIC_SEARCH_USER_PROMPT.format(
|
||||
title=title, content=content, query=query
|
||||
),
|
||||
},
|
||||
]
|
||||
return messages
|
||||
|
||||
|
||||
def evaluate_inference_section(
|
||||
document: InferenceSection, query: str, llm: LLM
|
||||
) -> dict[str, RelevanceChunk]:
|
||||
relevance: RelevanceChunk = RelevanceChunk()
|
||||
results = {}
|
||||
|
||||
# At least for now, is the same doucment ID across chunks
|
||||
document_id = document.center_chunk.document_id
|
||||
semantic_id = document.center_chunk.semantic_identifier
|
||||
contents = document.combined_content
|
||||
chunk_id = document.center_chunk.chunk_id
|
||||
|
||||
prompt = f"""
|
||||
Analyze the relevance of this document to the search query:
|
||||
Title: {document_id.split("/")[-1]}
|
||||
Blurb: {document.combined_content}
|
||||
Query: {query}
|
||||
messages = _get_agent_eval_messages(
|
||||
title=semantic_id, content=contents, query=query
|
||||
)
|
||||
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
|
||||
model_output = message_to_string(llm.invoke(filled_llm_prompt))
|
||||
|
||||
{AGENTIC_SEARCH_EVALUATION_PROMPT}
|
||||
"""
|
||||
# Search for the "Useful Analysis" section in the model output
|
||||
# This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
|
||||
# then any text up to "3. Final Relevance"
|
||||
# The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
|
||||
# If no match is found, the entire model output is used as the analysis
|
||||
analysis_match = re.search(
|
||||
r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
|
||||
model_output,
|
||||
re.DOTALL,
|
||||
)
|
||||
analysis = analysis_match.group(1).strip() if analysis_match else model_output
|
||||
|
||||
content = message_to_string(llm.invoke(prompt=prompt))
|
||||
analysis = ""
|
||||
relevant = False
|
||||
chain_of_thought = ""
|
||||
# Get the last non-empty line
|
||||
last_line = next(
|
||||
(line for line in reversed(model_output.split("\n")) if line.strip()), ""
|
||||
)
|
||||
relevant = last_line.strip().lower().startswith("true")
|
||||
|
||||
parts = content.split("[ANALYSIS_START]", 1)
|
||||
if len(parts) == 2:
|
||||
chain_of_thought, rest = parts
|
||||
else:
|
||||
logger.warning(f"Missing [ANALYSIS_START] tag for document {document_id}")
|
||||
rest = content
|
||||
|
||||
parts = rest.split("[ANALYSIS_END]", 1)
|
||||
if len(parts) == 2:
|
||||
analysis, result = parts
|
||||
else:
|
||||
logger.warning(f"Missing [ANALYSIS_END] tag for document {document_id}")
|
||||
result = rest
|
||||
|
||||
chain_of_thought = chain_of_thought.strip()
|
||||
analysis = analysis.strip()
|
||||
result = result.strip().lower()
|
||||
|
||||
# Determine relevance
|
||||
if "result: true" in result:
|
||||
relevant = True
|
||||
elif "result: false" in result:
|
||||
relevant = False
|
||||
else:
|
||||
logger.warning(f"Invalid result format for document {document_id}")
|
||||
|
||||
if not analysis:
|
||||
logger.warning(
|
||||
f"Couldn't extract proper analysis for document {document_id}. Using full content."
|
||||
)
|
||||
analysis = content
|
||||
|
||||
relevance.content = analysis
|
||||
relevance.relevant = relevant
|
||||
|
||||
results[f"{document_id}-{chunk_id}"] = relevance
|
||||
results[f"{document_id}-{chunk_id}"] = RelevanceChunk(
|
||||
relevant=relevant, content=analysis
|
||||
)
|
||||
return results
|
||||
|
@ -49,8 +49,10 @@ const ToggleSwitch = () => {
|
||||
onClick={() => handleTabChange("search")}
|
||||
>
|
||||
<SearchIcon size={16} className="mr-2" />
|
||||
Search
|
||||
<span className="text-xs ml-2">{commandSymbol}S</span>
|
||||
<p className="items-baseline flex">
|
||||
Search
|
||||
<span className="text-xs ml-2">{commandSymbol}S</span>
|
||||
</p>
|
||||
</button>
|
||||
<button
|
||||
className={`px-4 py-2 rounded-full text-sm font-medium transition-colors duration-300 ease-in-out flex items-center relative z-10 ${
|
||||
|
@ -202,7 +202,7 @@ export const SearchResultsDisplay = ({
|
||||
setShowAll((showAll) => !showAll);
|
||||
}
|
||||
}}
|
||||
className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-16 h-8 ${
|
||||
className={`flex items-center justify-center animate-fade-in-up rounded-lg p-1 text-xs transition-all duration-300 w-20 h-8 ${
|
||||
!sweep
|
||||
? "bg-green-500 text-text-800"
|
||||
: "bg-rose-700 text-text-100"
|
||||
@ -217,16 +217,18 @@ export const SearchResultsDisplay = ({
|
||||
<span></span>
|
||||
{!sweep
|
||||
? agenticResults
|
||||
? "all"
|
||||
: "hide"
|
||||
? "Show All"
|
||||
: "Focus"
|
||||
: agenticResults
|
||||
? "hide"
|
||||
: "undo"}
|
||||
{!sweep ? (
|
||||
<BroomIcon className="h-4 w-4" />
|
||||
) : (
|
||||
<UndoIcon className="h-4 w-4" />
|
||||
)}
|
||||
? "Focus"
|
||||
: "Show All"}
|
||||
<span className="ml-1">
|
||||
{!sweep ? (
|
||||
<BroomIcon className="h-4 w-4" />
|
||||
) : (
|
||||
<UndoIcon className="h-4 w-4" />
|
||||
)}
|
||||
</span>
|
||||
</div>
|
||||
</button>
|
||||
</Tooltip>
|
||||
|
Loading…
x
Reference in New Issue
Block a user