Catch LLM Eval Failures (#2272)

This commit is contained in:
Yuhong Sun
2024-08-30 17:42:58 -07:00
committed by GitHub
parent 76db4b765a
commit f01027cfb7
5 changed files with 72 additions and 49 deletions

View File

@@ -360,10 +360,10 @@ class SearchPipeline:
try: try:
results = run_functions_in_parallel(function_calls=functions) results = run_functions_in_parallel(function_calls=functions)
self._section_relevance = list(results.values()) self._section_relevance = list(results.values())
except Exception: except Exception as e:
raise ValueError( raise ValueError(
"An issue occured during the agentic evaluation proecss." "An issue occured during the agentic evaluation process."
) ) from e
elif self.search_query.evaluation_type == LLMEvaluationType.BASIC: elif self.search_query.evaluation_type == LLMEvaluationType.BASIC:
if DISABLE_LLM_DOC_RELEVANCE: if DISABLE_LLM_DOC_RELEVANCE:

View File

@@ -58,25 +58,30 @@ def evaluate_inference_section(
center_metadata=center_metadata_str, center_metadata=center_metadata_str,
) )
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages)
model_output = message_to_string(llm.invoke(filled_llm_prompt)) try:
model_output = message_to_string(llm.invoke(filled_llm_prompt))
# Search for the "Useful Analysis" section in the model output # Search for the "Useful Analysis" section in the model output
# This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon, # This regex looks for "2. Useful Analysis" (case-insensitive) followed by an optional colon,
# then any text up to "3. Final Relevance" # then any text up to "3. Final Relevance"
# The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines # The (?i) flag makes it case-insensitive, and re.DOTALL allows the dot to match newlines
# If no match is found, the entire model output is used as the analysis # If no match is found, the entire model output is used as the analysis
analysis_match = re.search( analysis_match = re.search(
r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance", r"(?i)2\.\s*useful analysis:?\s*(.+?)\n\n3\.\s*final relevance",
model_output, model_output,
re.DOTALL, re.DOTALL,
) )
analysis = analysis_match.group(1).strip() if analysis_match else model_output analysis = analysis_match.group(1).strip() if analysis_match else model_output
# Get the last non-empty line # Get the last non-empty line
last_line = next( last_line = next(
(line for line in reversed(model_output.split("\n")) if line.strip()), "" (line for line in reversed(model_output.split("\n")) if line.strip()), ""
) )
relevant = last_line.strip().lower().startswith("true") relevant = last_line.strip().lower().startswith("true")
except Exception as e:
logger.exception(f"An issue occured during the agentic evaluation process. {e}")
relevant = False
analysis = ""
return SectionRelevancePiece( return SectionRelevancePiece(
document_id=document_id, document_id=document_id,

View File

@@ -19,6 +19,7 @@ import { FiTag } from "react-icons/fi";
import { DISABLE_LLM_DOC_RELEVANCE } from "@/lib/constants"; import { DISABLE_LLM_DOC_RELEVANCE } from "@/lib/constants";
import { SettingsContext } from "../settings/SettingsProvider"; import { SettingsContext } from "../settings/SettingsProvider";
import { CustomTooltip, TooltipGroup } from "../tooltip/CustomTooltip"; import { CustomTooltip, TooltipGroup } from "../tooltip/CustomTooltip";
import { WarningCircle } from "@phosphor-icons/react";
export const buildDocumentSummaryDisplay = ( export const buildDocumentSummaryDisplay = (
matchHighlights: string[], matchHighlights: string[],
@@ -230,7 +231,7 @@ export const DocumentDisplay = ({
{document.semantic_identifier || document.document_id} {document.semantic_identifier || document.document_id}
</p> </p>
</a> </a>
<div className="ml-auto flex gap-x-2"> <div className="ml-auto flex items-center">
<TooltipGroup> <TooltipGroup>
{isHovered && messageId && ( {isHovered && messageId && (
<DocumentFeedbackBlock <DocumentFeedbackBlock
@@ -252,7 +253,7 @@ export const DocumentDisplay = ({
> >
<CustomTooltip showTick line content="Toggle content"> <CustomTooltip showTick line content="Toggle content">
<LightBulbIcon <LightBulbIcon
className={`${settings?.isMobile && alternativeToggled ? "text-green-600" : "text-blue-600"} h-4 w-4 cursor-pointer`} className={`${settings?.isMobile && alternativeToggled ? "text-green-600" : "text-blue-600"} my-auto ml-2 h-4 w-4 cursor-pointer`}
/> />
</CustomTooltip> </CustomTooltip>
</button> </button>
@@ -326,31 +327,32 @@ export const AgenticDocumentDisplay = ({
</p> </p>
</a> </a>
<div className="ml-auto flex gap-x-2"> <div className="ml-auto items-center flex">
{isHovered && messageId && ( <TooltipGroup>
<DocumentFeedbackBlock {isHovered && messageId && (
documentId={document.document_id} <DocumentFeedbackBlock
messageId={messageId} documentId={document.document_id}
documentRank={documentRank} messageId={messageId}
setPopup={setPopup} documentRank={documentRank}
/> setPopup={setPopup}
)} />
{(contentEnriched || additional_relevance) &&
relevance_explanation &&
(isHovered || alternativeToggled) && (
<button
onClick={() =>
setAlternativeToggled(
(alternativeToggled) => !alternativeToggled
)
}
>
<CustomTooltip showTick line content="Toggle content">
<BookIcon className="text-blue-400" />
</CustomTooltip>
</button>
)} )}
{(contentEnriched || additional_relevance) &&
(isHovered || alternativeToggled) && (
<button
onClick={() =>
setAlternativeToggled(
(alternativeToggled) => !alternativeToggled
)
}
>
<CustomTooltip showTick line content="Toggle content">
<BookIcon className="ml-2 my-auto text-blue-400" />
</CustomTooltip>
</button>
)}
</TooltipGroup>
</div> </div>
</div> </div>
<div className="mt-1"> <div className="mt-1">
@@ -367,7 +369,13 @@ export const AgenticDocumentDisplay = ({
document.match_highlights, document.match_highlights,
document.blurb document.blurb
) )
: relevance_explanation} : relevance_explanation || (
<span className="flex gap-x-1 items-center">
{" "}
<WarningCircle />
Model failed to produce an analysis of the document
</span>
)}
</p> </p>
</div> </div>
</div> </div>

View File

@@ -579,6 +579,14 @@ export const SearchSection = ({
const { popup, setPopup } = usePopup(); const { popup, setPopup } = usePopup();
const shouldUseAgenticDisplay =
agenticResults &&
(searchResponse.documents || []).some(
(document) =>
searchResponse.additional_relevance &&
searchResponse.additional_relevance[document.document_id] !== undefined
);
return ( return (
<> <>
<div className="flex relative pr-[8px] h-full text-default"> <div className="flex relative pr-[8px] h-full text-default">
@@ -756,7 +764,9 @@ export const SearchSection = ({
contentEnriched={contentEnriched} contentEnriched={contentEnriched}
comments={comments} comments={comments}
sweep={sweep} sweep={sweep}
agenticResults={agenticResults && !disabledAgentic} agenticResults={
shouldUseAgenticDisplay && !disabledAgentic
}
performSweep={performSweep} performSweep={performSweep}
searchResponse={searchResponse} searchResponse={searchResponse}
isFetching={isFetching} isFetching={isFetching}

View File

@@ -98,7 +98,7 @@ export const searchRequestStreamed = async ({
} }
previousPartialChunk = partialChunk as string | null; previousPartialChunk = partialChunk as string | null;
completedChunks.forEach((chunk) => { completedChunks.forEach((chunk) => {
// check for answer peice / end of answer // check for answer piece / end of answer
if (Object.hasOwn(chunk, "relevance_summaries")) { if (Object.hasOwn(chunk, "relevance_summaries")) {
const relevanceChunk = chunk as RelevanceChunk; const relevanceChunk = chunk as RelevanceChunk;