Add error table to re-indexing (#2388)

* add error table to re-indexing

* robustify

* update with proper comment

* add popup

* update typo
This commit is contained in:
pablodanswer 2024-09-11 15:55:55 -07:00 committed by GitHub
parent d90c90dd92
commit 2c77dd241b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 336 additions and 13 deletions

View File

@ -181,6 +181,45 @@ def get_last_attempt(
return db_session.execute(stmt).scalars().first()
def get_latest_index_attempts_by_status(
secondary_index: bool,
db_session: Session,
status: IndexingStatus,
) -> Sequence[IndexAttempt]:
"""
Retrieves the most recent index attempt with the specified status for each connector_credential_pair.
Filters attempts based on the secondary_index flag to get either future or present index attempts.
Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair.
"""
latest_failed_attempts = (
select(
IndexAttempt.connector_credential_pair_id,
func.max(IndexAttempt.id).label("max_failed_id"),
)
.join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id)
.where(
SearchSettings.status
== (
IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT
),
IndexAttempt.status == status,
)
.group_by(IndexAttempt.connector_credential_pair_id)
.subquery()
)
stmt = select(IndexAttempt).join(
latest_failed_attempts,
(
IndexAttempt.connector_credential_pair_id
== latest_failed_attempts.c.connector_credential_pair_id
)
& (IndexAttempt.id == latest_failed_attempts.c.max_failed_id),
)
return db_session.execute(stmt).scalars().all()
def get_latest_index_attempts(
secondary_index: bool,
db_session: Session,

View File

@ -68,6 +68,8 @@ from danswer.db.index_attempt import create_index_attempt
from danswer.db.index_attempt import get_index_attempts_for_cc_pair
from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id
from danswer.db.index_attempt import get_latest_index_attempts
from danswer.db.index_attempt import get_latest_index_attempts_by_status
from danswer.db.models import IndexingStatus
from danswer.db.models import User
from danswer.db.models import UserRole
from danswer.db.search_settings import get_current_search_settings
@ -81,6 +83,7 @@ from danswer.server.documents.models import ConnectorSnapshot
from danswer.server.documents.models import ConnectorUpdateRequest
from danswer.server.documents.models import CredentialBase
from danswer.server.documents.models import CredentialSnapshot
from danswer.server.documents.models import FailedConnectorIndexingStatus
from danswer.server.documents.models import FileUploadResponse
from danswer.server.documents.models import GDriveCallback
from danswer.server.documents.models import GmailCallback
@ -376,6 +379,99 @@ def upload_files(
return FileUploadResponse(file_paths=deduped_file_paths)
# Retrieves most recent failure cases for connectors that are currently failing
@router.get("/admin/connector/failed-indexing-status")
def get_currently_failed_indexing_status(
secondary_index: bool = False,
user: User = Depends(current_curator_or_admin_user),
db_session: Session = Depends(get_session),
get_editable: bool = Query(
False, description="If true, return editable document sets"
),
) -> list[FailedConnectorIndexingStatus]:
# Get the latest failed indexing attempts
latest_failed_indexing_attempts = get_latest_index_attempts_by_status(
secondary_index=secondary_index,
db_session=db_session,
status=IndexingStatus.FAILED,
)
# Get the latest successful indexing attempts
latest_successful_indexing_attempts = get_latest_index_attempts_by_status(
secondary_index=secondary_index,
db_session=db_session,
status=IndexingStatus.SUCCESS,
)
# Get all connector credential pairs
cc_pairs = get_connector_credential_pairs(
db_session=db_session,
user=user,
get_editable=get_editable,
)
# Filter out failed attempts that have a more recent successful attempt
filtered_failed_attempts = [
failed_attempt
for failed_attempt in latest_failed_indexing_attempts
if not any(
success_attempt.connector_credential_pair_id
== failed_attempt.connector_credential_pair_id
and success_attempt.time_updated > failed_attempt.time_updated
for success_attempt in latest_successful_indexing_attempts
)
]
# Filter cc_pairs to include only those with failed attempts or no attempts
cc_pairs = [
cc_pair
for cc_pair in cc_pairs
if not any(
attempt.connector_credential_pair == cc_pair
for attempt in latest_failed_indexing_attempts
)
or any(
attempt.connector_credential_pair == cc_pair
for attempt in filtered_failed_attempts
)
]
# Create a mapping of cc_pair_id to its latest failed index attempt
cc_pair_to_latest_index_attempt = {
attempt.connector_credential_pair_id: attempt
for attempt in filtered_failed_attempts
}
indexing_statuses = []
for cc_pair in cc_pairs:
# Skip DefaultCCPair
if cc_pair.name == "DefaultCCPair":
continue
latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id)
indexing_statuses.append(
FailedConnectorIndexingStatus(
cc_pair_id=cc_pair.id,
name=cc_pair.name,
error_msg=(
latest_index_attempt.error_msg if latest_index_attempt else None
),
connector_id=cc_pair.connector_id,
credential_id=cc_pair.credential_id,
is_deletable=check_deletion_attempt_is_allowed(
connector_credential_pair=cc_pair,
db_session=db_session,
allow_scheduled=True,
)
is None,
)
)
return indexing_statuses
@router.get("/admin/connector/indexing-status")
def get_connector_indexing_status(
secondary_index: bool = False,

View File

@ -265,6 +265,17 @@ class CCPairFullInfo(BaseModel):
)
class FailedConnectorIndexingStatus(BaseModel):
"""Simplified version of ConnectorIndexingStatus for failed indexing attempts"""
cc_pair_id: int
name: str | None
error_msg: str | None
is_deletable: bool
connector_id: int
credential_id: int
class ConnectorIndexingStatus(BaseModel):
"""Represents the latest indexing status of a connector"""

View File

@ -1,7 +1,11 @@
import { ThreeDotsLoader } from "@/components/Loading";
import { Modal } from "@/components/Modal";
import { errorHandlingFetcher } from "@/lib/fetcher";
import { ConnectorIndexingStatus, ValidStatuses } from "@/lib/types";
import {
ConnectorIndexingStatus,
FailedConnectorIndexingStatus,
ValidStatuses,
} from "@/lib/types";
import { Button, Text, Title } from "@tremor/react";
import { useMemo, useState } from "react";
import useSWR, { mutate } from "swr";
@ -12,6 +16,8 @@ import {
HostedEmbeddingModel,
} from "../../../../components/embedding/interfaces";
import { Connector } from "@/lib/connectors/connectors";
import { FailedReIndexAttempts } from "@/components/embedding/FailedReIndexAttempts";
import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup";
export default function UpgradingPage({
futureEmbeddingModel,
@ -20,6 +26,7 @@ export default function UpgradingPage({
}) {
const [isCancelling, setIsCancelling] = useState<boolean>(false);
const { setPopup, popup } = usePopup();
const { data: connectors } = useSWR<Connector<any>[]>(
"/api/manage/connector",
errorHandlingFetcher,
@ -35,6 +42,14 @@ export default function UpgradingPage({
{ refreshInterval: 5000 } // 5 seconds
);
const { data: failedIndexingStatus } = useSWR<
FailedConnectorIndexingStatus[]
>(
"/api/manage/admin/connector/failed-indexing-status?secondary_index=true",
errorHandlingFetcher,
{ refreshInterval: 5000 } // 5 seconds
);
const onCancel = async () => {
const response = await fetch("/api/search-settings/cancel-new-embedding", {
method: "POST",
@ -72,8 +87,13 @@ export default function UpgradingPage({
});
}, [ongoingReIndexingStatus]);
if (!failedIndexingStatus) {
return <div>No failed index attempts</div>;
}
return (
<>
{popup}
{isCancelling && (
<Modal
onOutsideClick={() => setIsCancelling(false)}
@ -113,6 +133,12 @@ export default function UpgradingPage({
>
Cancel
</Button>
{failedIndexingStatus.length > 0 && (
<FailedReIndexAttempts
failedIndexingStatuses={failedIndexingStatus}
setPopup={setPopup}
/>
)}
<Text className="my-4">
The table below shows the re-indexing progress of all existing

View File

@ -0,0 +1,152 @@
import { buildCCPairInfoUrl } from "@/app/admin/connector/[ccPairId]/lib";
import { PageSelector } from "@/components/PageSelector";
import { IndexAttemptStatus } from "@/components/Status";
import { deleteCCPair } from "@/lib/documentDeletion";
import {
ConnectorIndexingStatus,
FailedConnectorIndexingStatus,
} from "@/lib/types";
import {
Button,
Table,
TableBody,
TableCell,
TableHead,
TableHeaderCell,
TableRow,
Text,
} from "@tremor/react";
import Link from "next/link";
import { useState } from "react";
import { FiLink, FiMaximize2, FiTrash, FiTrash2 } from "react-icons/fi";
import { mutate } from "swr";
import { PopupSpec } from "../admin/connectors/Popup";
export function FailedReIndexAttempts({
failedIndexingStatuses,
setPopup,
}: {
failedIndexingStatuses: FailedConnectorIndexingStatus[];
setPopup: (popupSpec: PopupSpec | null) => void;
}) {
const numToDisplay = 10;
const [page, setPage] = useState(1);
const anyDeletable = failedIndexingStatuses.some(
(status) => status.is_deletable
);
return (
<div className="mt-6 mb-8 p-4 border border-red-300 rounded-lg bg-red-50">
<Text className="text-red-700 font-semibold mb-2">
Failed Re-indexing Attempts
</Text>
<Text className="text-red-600 mb-4">
The table below shows only the failed re-indexing attempts for existing
connectors. These failures require immediate attention. Once all
connectors have been re-indexed successfully, the new model will be used
for all search queries.
</Text>
<div>
<Table>
<TableHead>
<TableRow>
<TableHeaderCell className="w-1/8 sm:w-1/6">
Connector Name
</TableHeaderCell>
<TableHeaderCell className="w-1/8 sm:w-1/6">
Status
</TableHeaderCell>
<TableHeaderCell className="w-4/8 sm:w-2/6">
Error Message
</TableHeaderCell>
<TableHeaderCell className="w-1/8 sm:w-1/6">
Visit Connector
</TableHeaderCell>
{anyDeletable && (
<TableHeaderCell className="w-1/8 sm:w-2/6">
Delete Connector
</TableHeaderCell>
)}
</TableRow>
</TableHead>
<TableBody>
{failedIndexingStatuses
.slice(numToDisplay * (page - 1), numToDisplay * page)
.map((reindexingProgress) => {
return (
<TableRow key={reindexingProgress.name}>
<TableCell>
<Link
href={`/admin/connector/${reindexingProgress.cc_pair_id}`}
className="text-link cursor-pointer flex"
>
<FiMaximize2 className="my-auto mr-1" />
{reindexingProgress.name}
</Link>
</TableCell>
<TableCell>
<IndexAttemptStatus status="failed" />
</TableCell>
<TableCell>
<div>
<Text className="flex flex-wrap whitespace-normal">
{reindexingProgress.error_msg || "-"}
</Text>
</div>
</TableCell>
<TableCell>
<Link
href={`/admin/connector/${reindexingProgress.cc_pair_id}`}
className="ctext-link cursor-pointer flex"
>
<FiLink className="my-auto mr-1" />
Visit Connector
</Link>
</TableCell>
<TableCell>
<Button
size="xs"
color="red"
onClick={() =>
deleteCCPair(
reindexingProgress.connector_id,
reindexingProgress.credential_id,
setPopup,
() =>
mutate(
buildCCPairInfoUrl(
reindexingProgress.cc_pair_id
)
)
)
}
icon={FiTrash}
disabled={reindexingProgress.is_deletable}
>
Delete
</Button>
</TableCell>
</TableRow>
);
})}
</TableBody>
</Table>
<div className="mt-3 flex">
<div className="mx-auto">
<PageSelector
totalPages={Math.ceil(
failedIndexingStatuses.length / numToDisplay
)}
currentPage={page}
onPageChange={(newPage) => setPage(newPage)}
/>
</div>
</div>
</div>
</div>
);
}

View File

@ -30,13 +30,10 @@ export function ReindexingProgressTable({
<TableHeaderCell className="w-1/7 sm:w-1/5">
Connector Name
</TableHeaderCell>
<TableHeaderCell className="w-1/7 sm:w-1/5">Status</TableHeaderCell>
<TableHeaderCell className="w-1/7 sm:w-1/5">
<TableHeaderCell className="w-3/7 sm:w-1/5">Status</TableHeaderCell>
<TableHeaderCell className="w-3/7 sm:w-1/5">
Docs Re-Indexed
</TableHeaderCell>
<TableHeaderCell className="w-4/7 sm:w-2/5">
Error Message
</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
@ -65,13 +62,6 @@ export function ReindexingProgressTable({
{reindexingProgress?.latest_index_attempt
?.total_docs_indexed || "-"}
</TableCell>
<TableCell>
<div>
<Text className="flex flex-wrap whitespace-normal">
{reindexingProgress.error_msg || "-"}
</Text>
</div>
</TableCell>
</TableRow>
);
})}

View File

@ -58,6 +58,15 @@ export interface DocumentBoostStatus {
hidden: boolean;
}
export interface FailedConnectorIndexingStatus {
cc_pair_id: number;
name: string | null;
error_msg: string | null;
is_deletable: boolean;
connector_id: number;
credential_id: number;
}
export interface IndexAttemptSnapshot {
id: number;
status: ValidStatuses | null;