From 2c77dd241bd45fe428d322e664964a19ad7f3c75 Mon Sep 17 00:00:00 2001 From: pablodanswer Date: Wed, 11 Sep 2024 15:55:55 -0700 Subject: [PATCH] Add error table to re-indexing (#2388) * add error table to re-indexing * robustify * update with proper comment * add popup * update typo --- backend/danswer/db/index_attempt.py | 39 +++++ backend/danswer/server/documents/connector.py | 96 +++++++++++ backend/danswer/server/documents/models.py | 11 ++ .../configuration/search/UpgradingPage.tsx | 28 +++- .../embedding/FailedReIndexAttempts.tsx | 152 ++++++++++++++++++ .../embedding/ReindexingProgressTable.tsx | 14 +- web/src/lib/types.ts | 9 ++ 7 files changed, 336 insertions(+), 13 deletions(-) create mode 100644 web/src/components/embedding/FailedReIndexAttempts.tsx diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index d94cbe294..32e20d065 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -181,6 +181,45 @@ def get_last_attempt( return db_session.execute(stmt).scalars().first() +def get_latest_index_attempts_by_status( + secondary_index: bool, + db_session: Session, + status: IndexingStatus, +) -> Sequence[IndexAttempt]: + """ + Retrieves the most recent index attempt with the specified status for each connector_credential_pair. + Filters attempts based on the secondary_index flag to get either future or present index attempts. + Returns a sequence of IndexAttempt objects, one for each unique connector_credential_pair. + """ + latest_failed_attempts = ( + select( + IndexAttempt.connector_credential_pair_id, + func.max(IndexAttempt.id).label("max_failed_id"), + ) + .join(SearchSettings, IndexAttempt.search_settings_id == SearchSettings.id) + .where( + SearchSettings.status + == ( + IndexModelStatus.FUTURE if secondary_index else IndexModelStatus.PRESENT + ), + IndexAttempt.status == status, + ) + .group_by(IndexAttempt.connector_credential_pair_id) + .subquery() + ) + + stmt = select(IndexAttempt).join( + latest_failed_attempts, + ( + IndexAttempt.connector_credential_pair_id + == latest_failed_attempts.c.connector_credential_pair_id + ) + & (IndexAttempt.id == latest_failed_attempts.c.max_failed_id), + ) + + return db_session.execute(stmt).scalars().all() + + def get_latest_index_attempts( secondary_index: bool, db_session: Session, diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index cc27d1cab..2702e4f6c 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -68,6 +68,8 @@ from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempts_for_cc_pair from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id from danswer.db.index_attempt import get_latest_index_attempts +from danswer.db.index_attempt import get_latest_index_attempts_by_status +from danswer.db.models import IndexingStatus from danswer.db.models import User from danswer.db.models import UserRole from danswer.db.search_settings import get_current_search_settings @@ -81,6 +83,7 @@ from danswer.server.documents.models import ConnectorSnapshot from danswer.server.documents.models import ConnectorUpdateRequest from danswer.server.documents.models import CredentialBase from danswer.server.documents.models import CredentialSnapshot +from danswer.server.documents.models import FailedConnectorIndexingStatus from danswer.server.documents.models import FileUploadResponse from danswer.server.documents.models import GDriveCallback from danswer.server.documents.models import GmailCallback @@ -376,6 +379,99 @@ def upload_files( return FileUploadResponse(file_paths=deduped_file_paths) +# Retrieves most recent failure cases for connectors that are currently failing +@router.get("/admin/connector/failed-indexing-status") +def get_currently_failed_indexing_status( + secondary_index: bool = False, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), + get_editable: bool = Query( + False, description="If true, return editable document sets" + ), +) -> list[FailedConnectorIndexingStatus]: + # Get the latest failed indexing attempts + latest_failed_indexing_attempts = get_latest_index_attempts_by_status( + secondary_index=secondary_index, + db_session=db_session, + status=IndexingStatus.FAILED, + ) + + # Get the latest successful indexing attempts + latest_successful_indexing_attempts = get_latest_index_attempts_by_status( + secondary_index=secondary_index, + db_session=db_session, + status=IndexingStatus.SUCCESS, + ) + + # Get all connector credential pairs + cc_pairs = get_connector_credential_pairs( + db_session=db_session, + user=user, + get_editable=get_editable, + ) + + # Filter out failed attempts that have a more recent successful attempt + filtered_failed_attempts = [ + failed_attempt + for failed_attempt in latest_failed_indexing_attempts + if not any( + success_attempt.connector_credential_pair_id + == failed_attempt.connector_credential_pair_id + and success_attempt.time_updated > failed_attempt.time_updated + for success_attempt in latest_successful_indexing_attempts + ) + ] + + # Filter cc_pairs to include only those with failed attempts or no attempts + cc_pairs = [ + cc_pair + for cc_pair in cc_pairs + if not any( + attempt.connector_credential_pair == cc_pair + for attempt in latest_failed_indexing_attempts + ) + or any( + attempt.connector_credential_pair == cc_pair + for attempt in filtered_failed_attempts + ) + ] + + # Create a mapping of cc_pair_id to its latest failed index attempt + cc_pair_to_latest_index_attempt = { + attempt.connector_credential_pair_id: attempt + for attempt in filtered_failed_attempts + } + + indexing_statuses = [] + + for cc_pair in cc_pairs: + # Skip DefaultCCPair + if cc_pair.name == "DefaultCCPair": + continue + + latest_index_attempt = cc_pair_to_latest_index_attempt.get(cc_pair.id) + + indexing_statuses.append( + FailedConnectorIndexingStatus( + cc_pair_id=cc_pair.id, + name=cc_pair.name, + error_msg=( + latest_index_attempt.error_msg if latest_index_attempt else None + ), + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + is_deletable=check_deletion_attempt_is_allowed( + connector_credential_pair=cc_pair, + db_session=db_session, + allow_scheduled=True, + ) + is None, + ) + ) + + return indexing_statuses + + @router.get("/admin/connector/indexing-status") def get_connector_indexing_status( secondary_index: bool = False, diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index 2bed0cf54..61e386638 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -265,6 +265,17 @@ class CCPairFullInfo(BaseModel): ) +class FailedConnectorIndexingStatus(BaseModel): + """Simplified version of ConnectorIndexingStatus for failed indexing attempts""" + + cc_pair_id: int + name: str | None + error_msg: str | None + is_deletable: bool + connector_id: int + credential_id: int + + class ConnectorIndexingStatus(BaseModel): """Represents the latest indexing status of a connector""" diff --git a/web/src/app/admin/configuration/search/UpgradingPage.tsx b/web/src/app/admin/configuration/search/UpgradingPage.tsx index ff707e932..6e41f4cf4 100644 --- a/web/src/app/admin/configuration/search/UpgradingPage.tsx +++ b/web/src/app/admin/configuration/search/UpgradingPage.tsx @@ -1,7 +1,11 @@ import { ThreeDotsLoader } from "@/components/Loading"; import { Modal } from "@/components/Modal"; import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ConnectorIndexingStatus, ValidStatuses } from "@/lib/types"; +import { + ConnectorIndexingStatus, + FailedConnectorIndexingStatus, + ValidStatuses, +} from "@/lib/types"; import { Button, Text, Title } from "@tremor/react"; import { useMemo, useState } from "react"; import useSWR, { mutate } from "swr"; @@ -12,6 +16,8 @@ import { HostedEmbeddingModel, } from "../../../../components/embedding/interfaces"; import { Connector } from "@/lib/connectors/connectors"; +import { FailedReIndexAttempts } from "@/components/embedding/FailedReIndexAttempts"; +import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; export default function UpgradingPage({ futureEmbeddingModel, @@ -20,6 +26,7 @@ export default function UpgradingPage({ }) { const [isCancelling, setIsCancelling] = useState(false); + const { setPopup, popup } = usePopup(); const { data: connectors } = useSWR[]>( "/api/manage/connector", errorHandlingFetcher, @@ -35,6 +42,14 @@ export default function UpgradingPage({ { refreshInterval: 5000 } // 5 seconds ); + const { data: failedIndexingStatus } = useSWR< + FailedConnectorIndexingStatus[] + >( + "/api/manage/admin/connector/failed-indexing-status?secondary_index=true", + errorHandlingFetcher, + { refreshInterval: 5000 } // 5 seconds + ); + const onCancel = async () => { const response = await fetch("/api/search-settings/cancel-new-embedding", { method: "POST", @@ -72,8 +87,13 @@ export default function UpgradingPage({ }); }, [ongoingReIndexingStatus]); + if (!failedIndexingStatus) { + return
No failed index attempts
; + } + return ( <> + {popup} {isCancelling && ( setIsCancelling(false)} @@ -113,6 +133,12 @@ export default function UpgradingPage({ > Cancel + {failedIndexingStatus.length > 0 && ( + + )} The table below shows the re-indexing progress of all existing diff --git a/web/src/components/embedding/FailedReIndexAttempts.tsx b/web/src/components/embedding/FailedReIndexAttempts.tsx new file mode 100644 index 000000000..4ab78af96 --- /dev/null +++ b/web/src/components/embedding/FailedReIndexAttempts.tsx @@ -0,0 +1,152 @@ +import { buildCCPairInfoUrl } from "@/app/admin/connector/[ccPairId]/lib"; +import { PageSelector } from "@/components/PageSelector"; +import { IndexAttemptStatus } from "@/components/Status"; +import { deleteCCPair } from "@/lib/documentDeletion"; +import { + ConnectorIndexingStatus, + FailedConnectorIndexingStatus, +} from "@/lib/types"; +import { + Button, + Table, + TableBody, + TableCell, + TableHead, + TableHeaderCell, + TableRow, + Text, +} from "@tremor/react"; +import Link from "next/link"; +import { useState } from "react"; +import { FiLink, FiMaximize2, FiTrash, FiTrash2 } from "react-icons/fi"; +import { mutate } from "swr"; +import { PopupSpec } from "../admin/connectors/Popup"; + +export function FailedReIndexAttempts({ + failedIndexingStatuses, + setPopup, +}: { + failedIndexingStatuses: FailedConnectorIndexingStatus[]; + setPopup: (popupSpec: PopupSpec | null) => void; +}) { + const numToDisplay = 10; + const [page, setPage] = useState(1); + + const anyDeletable = failedIndexingStatuses.some( + (status) => status.is_deletable + ); + + return ( +
+ + Failed Re-indexing Attempts + + + The table below shows only the failed re-indexing attempts for existing + connectors. These failures require immediate attention. Once all + connectors have been re-indexed successfully, the new model will be used + for all search queries. + + +
+ + + + + Connector Name + + + Status + + + Error Message + + + Visit Connector + + {anyDeletable && ( + + Delete Connector + + )} + + + + {failedIndexingStatuses + .slice(numToDisplay * (page - 1), numToDisplay * page) + .map((reindexingProgress) => { + return ( + + + + + {reindexingProgress.name} + + + + + + + +
+ + {reindexingProgress.error_msg || "-"} + +
+
+ + + + Visit Connector + + + + + +
+ ); + })} +
+
+ +
+
+ setPage(newPage)} + /> +
+
+
+
+ ); +} diff --git a/web/src/components/embedding/ReindexingProgressTable.tsx b/web/src/components/embedding/ReindexingProgressTable.tsx index 882b25910..3706133d0 100644 --- a/web/src/components/embedding/ReindexingProgressTable.tsx +++ b/web/src/components/embedding/ReindexingProgressTable.tsx @@ -30,13 +30,10 @@ export function ReindexingProgressTable({ Connector Name - Status - + Status + Docs Re-Indexed - - Error Message - @@ -65,13 +62,6 @@ export function ReindexingProgressTable({ {reindexingProgress?.latest_index_attempt ?.total_docs_indexed || "-"} - -
- - {reindexingProgress.error_msg || "-"} - -
-
); })} diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index c178fa599..23fab5764 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -58,6 +58,15 @@ export interface DocumentBoostStatus { hidden: boolean; } +export interface FailedConnectorIndexingStatus { + cc_pair_id: number; + name: string | null; + error_msg: string | null; + is_deletable: boolean; + connector_id: number; + credential_id: number; +} + export interface IndexAttemptSnapshot { id: number; status: ValidStatuses | null;