diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py index 009e45205..1cdc40090 100644 --- a/backend/danswer/server/models.py +++ b/backend/danswer/server/models.py @@ -311,7 +311,8 @@ class IndexAttemptRequest(BaseModel): class IndexAttemptSnapshot(BaseModel): id: int status: IndexingStatus | None - new_docs_indexed: int + new_docs_indexed: int # only includes completely new docs + total_docs_indexed: int # includes docs that are updated error_msg: str | None time_started: str | None time_updated: str @@ -324,6 +325,7 @@ class IndexAttemptSnapshot(BaseModel): id=index_attempt.id, status=index_attempt.status, new_docs_indexed=index_attempt.new_docs_indexed or 0, + total_docs_indexed=index_attempt.total_docs_indexed or 0, error_msg=index_attempt.error_msg, time_started=index_attempt.time_started.isoformat() if index_attempt.time_started diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx index e31baa3e4..6a1fb188c 100644 --- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx +++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx @@ -1,7 +1,6 @@ "use client"; import { - Card, Table, TableHead, TableRow, @@ -15,29 +14,13 @@ import { CCPairFullInfo } from "./types"; import { useState } from "react"; import { PageSelector } from "@/components/PageSelector"; import { localizeAndPrettify } from "@/lib/time"; +import { getDocsProcessedPerMinute } from "@/lib/indexAttempt"; const NUM_IN_PAGE = 8; export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { const [page, setPage] = useState(1); - // figure out if we need to artificially inflate the number of new docs indexed - // for the ongoing indexing attempt. This is required since the total number of - // docs indexed by a CC Pair is updated before the net new docs for an indexing - // attempt. If we don't do this, there is a mismatch between these two numbers - // which may confuse users. - let newDocsIndexedAdjustment = 0; - const sumOfNewDocs = ccPair.index_attempts.reduce( - (partialSum, indexAttempt) => partialSum + indexAttempt.new_docs_indexed, - 0 - ); - if ( - sumOfNewDocs < ccPair.num_docs_indexed && - ccPair.index_attempts[0]?.status === "in_progress" - ) { - newDocsIndexedAdjustment = ccPair.num_docs_indexed - sumOfNewDocs; - } - return ( <> @@ -45,37 +28,45 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { Time Started Status - Num New Docs + New Doc Cnt + Total Doc Cnt Error Msg {ccPair.index_attempts .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page) - .map((indexAttempt, ind) => ( - - - {indexAttempt.time_started - ? localizeAndPrettify(indexAttempt.time_started) - : "-"} - - - - - - {indexAttempt.new_docs_indexed + - (page === 1 && ind === 0 ? newDocsIndexedAdjustment : 0)} - - - - {indexAttempt.error_msg || "-"} - - - - ))} + .map((indexAttempt) => { + const docsPerMinute = + getDocsProcessedPerMinute(indexAttempt)?.toFixed(2); + return ( + + + {indexAttempt.time_started + ? localizeAndPrettify(indexAttempt.time_started) + : "-"} + + + + {docsPerMinute && ( +
+ {docsPerMinute} docs / min +
+ )} +
+ {indexAttempt.new_docs_indexed} + {indexAttempt.total_docs_indexed} + + + {indexAttempt.error_msg || "-"} + + +
+ ); + })}
{ccPair.index_attempts.length > NUM_IN_PAGE && ( diff --git a/web/src/app/admin/connector/[ccPairId]/page.tsx b/web/src/app/admin/connector/[ccPairId]/page.tsx index fab716309..50686a2ef 100644 --- a/web/src/app/admin/connector/[ccPairId]/page.tsx +++ b/web/src/app/admin/connector/[ccPairId]/page.tsx @@ -37,6 +37,16 @@ export default async function Page({ const lastIndexAttempt = ccPair.index_attempts[0]; const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt); + // figure out if we need to artificially deflate the number of docs indexed. + // This is required since the total number of docs indexed by a CC Pair is + // updated before the new docs for an indexing attempt. If we don't do this, + // there is a mismatch between these two numbers which may confuse users. + const totalDocsIndexed = + lastIndexAttempt?.status === "in_progress" && + ccPair.index_attempts.length === 1 + ? lastIndexAttempt.total_docs_indexed + : ccPair.num_docs_indexed; + return ( <> @@ -62,7 +72,7 @@ export default async function Page({
Total Documents Indexed:{" "} - {ccPair.num_docs_indexed} + {totalDocsIndexed}
diff --git a/web/src/app/admin/connectors/google-drive/page.tsx b/web/src/app/admin/connectors/google-drive/page.tsx index 49eec13e5..013ff4528 100644 --- a/web/src/app/admin/connectors/google-drive/page.tsx +++ b/web/src/app/admin/connectors/google-drive/page.tsx @@ -321,8 +321,7 @@ const Main = () => { | Credential | undefined = credentialsData.find( (credential) => - credential.credential_json?.google_drive_tokens && - credential.is_admin + credential.credential_json?.google_drive_tokens && credential.is_admin ); const googleDriveServiceAccountCredential: | Credential diff --git a/web/src/lib/indexAttempt.ts b/web/src/lib/indexAttempt.ts index 78fa4a379..6bc533516 100644 --- a/web/src/lib/indexAttempt.ts +++ b/web/src/lib/indexAttempt.ts @@ -7,7 +7,7 @@ export const getDocsProcessedPerMinute = ( !indexAttempt || !indexAttempt.time_started || !indexAttempt.time_updated || - indexAttempt.new_docs_indexed === 0 + indexAttempt.total_docs_indexed === 0 ) { return null; } @@ -16,11 +16,5 @@ export const getDocsProcessedPerMinute = ( const timeUpdated = new Date(indexAttempt.time_updated); const timeDiff = timeUpdated.getTime() - timeStarted.getTime(); const seconds = timeDiff / 1000; - // due to some issues with `time_updated` having delayed updates, - // the docs / min will be really high at first. To avoid this, - // we can wait a little bit to let the updated_at catch up a bit - if (seconds < 10) { - return null; - } - return (indexAttempt.new_docs_indexed / seconds) * 60; + return (indexAttempt.total_docs_indexed / seconds) * 60; }; diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index a4f85ebad..381b3a147 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -132,6 +132,7 @@ export interface IndexAttemptSnapshot { id: number; status: ValidStatuses | null; new_docs_indexed: number; + total_docs_indexed: number; error_msg: string | null; time_started: string | null; time_updated: string;