Catch LLM Eval Failures (#2272)

This commit is contained in:
Yuhong Sun 2024-08-30 17:42:58 -07:00 committed by GitHub
parent 76db4b765a
commit f01027cfb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 72 additions and 49 deletions

View File

@ -360,10 +360,10 @@ class SearchPipeline:
try:
results = run_functions_in_parallel(function_calls=functions)
self._section_relevance = list(results.values())
except Exception:
except Exception as e:
raise ValueError(
"An issue occured during the agentic evaluation proecss."
)
"An issue occured during the agentic evaluation process."
) from e
elif self.search_query.evaluation_type == LLMEvaluationType.BASIC:
if DISABLE_LLM_DOC_RELEVANCE:

View File

@ -58,25 +58,30 @@ def evaluate_inference_section(
center_metadata=center_metadata_str,
)
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
model_output = message_to_string(llm.invoke(filled_llm_prompt))
try:
model_output = message_to_string(llm.invoke(filled_llm_prompt))
# Search for the "Useful Analysis" section in the model output
# This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
# then any text up to "3. Final Relevance"
# The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
# If no match is found, the entire model output is used as the analysis
analysis_match = re.search(
r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
model_output,
re.DOTALL,
)
analysis = analysis_match.group(1).strip() if analysis_match else model_output
# Search for the "Useful Analysis" section in the model output
# This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
# then any text up to "3. Final Relevance"
# The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
# If no match is found, the entire model output is used as the analysis
analysis_match = re.search(
r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
model_output,
re.DOTALL,
)
analysis = analysis_match.group(1).strip() if analysis_match else model_output
# Get the last non-empty line
last_line = next(
(line for line in reversed(model_output.split("\n")) if line.strip()), ""
)
relevant = last_line.strip().lower().startswith("true")
# Get the last non-empty line
last_line = next(
(line for line in reversed(model_output.split("\n")) if line.strip()), ""
)
relevant = last_line.strip().lower().startswith("true")
except Exception as e:
logger.exception(f"An issue occured during the agentic evaluation process. {e}")
relevant = False
analysis = ""
return SectionRelevancePiece(
document_id=document_id,

View File

@ -19,6 +19,7 @@ import { FiTag } from "react-icons/fi";
import { DISABLE_LLM_DOC_RELEVANCE } from "@/lib/constants";
import { SettingsContext } from "../settings/SettingsProvider";
import { CustomTooltip, TooltipGroup } from "../tooltip/CustomTooltip";
import { WarningCircle } from "@phosphor-icons/react";
export const buildDocumentSummaryDisplay = (
matchHighlights: string[],
@ -230,7 +231,7 @@ export const DocumentDisplay = ({
{document.semantic_identifier || document.document_id}
</p>
</a>
<div className="ml-auto flex gap-x-2">
<div className="ml-auto flex items-center">
<TooltipGroup>
{isHovered && messageId && (
<DocumentFeedbackBlock
@ -252,7 +253,7 @@ export const DocumentDisplay = ({
>
<CustomTooltip showTick line content="Toggle content">
<LightBulbIcon
className={`${settings?.isMobile && alternativeToggled ? "text-green-600" : "text-blue-600"} h-4 w-4 cursor-pointer`}
className={`${settings?.isMobile && alternativeToggled ? "text-green-600" : "text-blue-600"} my-auto ml-2 h-4 w-4 cursor-pointer`}
/>
</CustomTooltip>
</button>
@ -326,31 +327,32 @@ export const AgenticDocumentDisplay = ({
</p>
</a>
<div className="ml-auto flex gap-x-2">
{isHovered && messageId && (
<DocumentFeedbackBlock
documentId={document.document_id}
messageId={messageId}
documentRank={documentRank}
setPopup={setPopup}
/>
)}
{(contentEnriched || additional_relevance) &&
relevance_explanation &&
(isHovered || alternativeToggled) && (
<button
onClick={() =>
setAlternativeToggled(
(alternativeToggled) => !alternativeToggled
)
}
>
<CustomTooltip showTick line content="Toggle content">
<BookIcon className="text-blue-400" />
</CustomTooltip>
</button>
<div className="ml-auto items-center flex">
<TooltipGroup>
{isHovered && messageId && (
<DocumentFeedbackBlock
documentId={document.document_id}
messageId={messageId}
documentRank={documentRank}
setPopup={setPopup}
/>
)}
{(contentEnriched || additional_relevance) &&
(isHovered || alternativeToggled) && (
<button
onClick={() =>
setAlternativeToggled(
(alternativeToggled) => !alternativeToggled
)
}
>
<CustomTooltip showTick line content="Toggle content">
<BookIcon className="ml-2 my-auto text-blue-400" />
</CustomTooltip>
</button>
)}
</TooltipGroup>
</div>
</div>
<div className="mt-1">
@ -367,7 +369,13 @@ export const AgenticDocumentDisplay = ({
document.match_highlights,
document.blurb
)
: relevance_explanation}
: relevance_explanation || (
<span className="flex gap-x-1 items-center">
{" "}
<WarningCircle />
Model failed to produce an analysis of the document
</span>
)}
</p>
</div>
</div>

View File

@ -579,6 +579,14 @@ export const SearchSection = ({
const { popup, setPopup } = usePopup();
const shouldUseAgenticDisplay =
agenticResults &&
(searchResponse.documents || []).some(
(document) =>
searchResponse.additional_relevance &&
searchResponse.additional_relevance[document.document_id] !== undefined
);
return (
<>
<div className="flex relative pr-[8px] h-full text-default">
@ -756,7 +764,9 @@ export const SearchSection = ({
contentEnriched={contentEnriched}
comments={comments}
sweep={sweep}
agenticResults={agenticResults && !disabledAgentic}
agenticResults={
shouldUseAgenticDisplay && !disabledAgentic
}
performSweep={performSweep}
searchResponse={searchResponse}
isFetching={isFetching}

View File

@ -98,7 +98,7 @@ export const searchRequestStreamed = async ({
}
previousPartialChunk = partialChunk as string | null;
completedChunks.forEach((chunk) => {
// check for answer peice / end of answer
// check for answer piece / end of answer
if (Object.hasOwn(chunk, "relevance_summaries")) {
const relevanceChunk = chunk as RelevanceChunk;