diff --git a/backend/danswer/server/models.py b/backend/danswer/server/models.py
index 009e45205..1cdc40090 100644
--- a/backend/danswer/server/models.py
+++ b/backend/danswer/server/models.py
@@ -311,7 +311,8 @@ class IndexAttemptRequest(BaseModel):
class IndexAttemptSnapshot(BaseModel):
id: int
status: IndexingStatus | None
- new_docs_indexed: int
+ new_docs_indexed: int # only includes completely new docs
+ total_docs_indexed: int # includes docs that are updated
error_msg: str | None
time_started: str | None
time_updated: str
@@ -324,6 +325,7 @@ class IndexAttemptSnapshot(BaseModel):
id=index_attempt.id,
status=index_attempt.status,
new_docs_indexed=index_attempt.new_docs_indexed or 0,
+ total_docs_indexed=index_attempt.total_docs_indexed or 0,
error_msg=index_attempt.error_msg,
time_started=index_attempt.time_started.isoformat()
if index_attempt.time_started
diff --git a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
index e31baa3e4..6a1fb188c 100644
--- a/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/IndexingAttemptsTable.tsx
@@ -1,7 +1,6 @@
"use client";
import {
- Card,
Table,
TableHead,
TableRow,
@@ -15,29 +14,13 @@ import { CCPairFullInfo } from "./types";
import { useState } from "react";
import { PageSelector } from "@/components/PageSelector";
import { localizeAndPrettify } from "@/lib/time";
+import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
const NUM_IN_PAGE = 8;
export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
const [page, setPage] = useState(1);
- // figure out if we need to artificially inflate the number of new docs indexed
- // for the ongoing indexing attempt. This is required since the total number of
- // docs indexed by a CC Pair is updated before the net new docs for an indexing
- // attempt. If we don't do this, there is a mismatch between these two numbers
- // which may confuse users.
- let newDocsIndexedAdjustment = 0;
- const sumOfNewDocs = ccPair.index_attempts.reduce(
- (partialSum, indexAttempt) => partialSum + indexAttempt.new_docs_indexed,
- 0
- );
- if (
- sumOfNewDocs < ccPair.num_docs_indexed &&
- ccPair.index_attempts[0]?.status === "in_progress"
- ) {
- newDocsIndexedAdjustment = ccPair.num_docs_indexed - sumOfNewDocs;
- }
-
return (
<>
@@ -45,37 +28,45 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
Time Started
Status
- Num New Docs
+ New Doc Cnt
+ Total Doc Cnt
Error Msg
{ccPair.index_attempts
.slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
- .map((indexAttempt, ind) => (
-
-
- {indexAttempt.time_started
- ? localizeAndPrettify(indexAttempt.time_started)
- : "-"}
-
-
-
-
-
- {indexAttempt.new_docs_indexed +
- (page === 1 && ind === 0 ? newDocsIndexedAdjustment : 0)}
-
-
-
- {indexAttempt.error_msg || "-"}
-
-
-
- ))}
+ .map((indexAttempt) => {
+ const docsPerMinute =
+ getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
+ return (
+
+
+ {indexAttempt.time_started
+ ? localizeAndPrettify(indexAttempt.time_started)
+ : "-"}
+
+
+
+ {docsPerMinute && (
+
+ {docsPerMinute} docs / min
+
+ )}
+
+ {indexAttempt.new_docs_indexed}
+ {indexAttempt.total_docs_indexed}
+
+
+ {indexAttempt.error_msg || "-"}
+
+
+
+ );
+ })}
{ccPair.index_attempts.length > NUM_IN_PAGE && (
diff --git a/web/src/app/admin/connector/[ccPairId]/page.tsx b/web/src/app/admin/connector/[ccPairId]/page.tsx
index fab716309..50686a2ef 100644
--- a/web/src/app/admin/connector/[ccPairId]/page.tsx
+++ b/web/src/app/admin/connector/[ccPairId]/page.tsx
@@ -37,6 +37,16 @@ export default async function Page({
const lastIndexAttempt = ccPair.index_attempts[0];
const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt);
+ // figure out if we need to artificially deflate the number of docs indexed.
+ // This is required since the total number of docs indexed by a CC Pair is
+ // updated before the new docs for an indexing attempt. If we don't do this,
+ // there is a mismatch between these two numbers which may confuse users.
+ const totalDocsIndexed =
+ lastIndexAttempt?.status === "in_progress" &&
+ ccPair.index_attempts.length === 1
+ ? lastIndexAttempt.total_docs_indexed
+ : ccPair.num_docs_indexed;
+
return (
<>
@@ -62,7 +72,7 @@ export default async function Page({
Total Documents Indexed:{" "}
- {ccPair.num_docs_indexed}
+ {totalDocsIndexed}
diff --git a/web/src/app/admin/connectors/google-drive/page.tsx b/web/src/app/admin/connectors/google-drive/page.tsx
index 49eec13e5..013ff4528 100644
--- a/web/src/app/admin/connectors/google-drive/page.tsx
+++ b/web/src/app/admin/connectors/google-drive/page.tsx
@@ -321,8 +321,7 @@ const Main = () => {
| Credential
| undefined = credentialsData.find(
(credential) =>
- credential.credential_json?.google_drive_tokens &&
- credential.is_admin
+ credential.credential_json?.google_drive_tokens && credential.is_admin
);
const googleDriveServiceAccountCredential:
| Credential
diff --git a/web/src/lib/indexAttempt.ts b/web/src/lib/indexAttempt.ts
index 78fa4a379..6bc533516 100644
--- a/web/src/lib/indexAttempt.ts
+++ b/web/src/lib/indexAttempt.ts
@@ -7,7 +7,7 @@ export const getDocsProcessedPerMinute = (
!indexAttempt ||
!indexAttempt.time_started ||
!indexAttempt.time_updated ||
- indexAttempt.new_docs_indexed === 0
+ indexAttempt.total_docs_indexed === 0
) {
return null;
}
@@ -16,11 +16,5 @@ export const getDocsProcessedPerMinute = (
const timeUpdated = new Date(indexAttempt.time_updated);
const timeDiff = timeUpdated.getTime() - timeStarted.getTime();
const seconds = timeDiff / 1000;
- // due to some issues with `time_updated` having delayed updates,
- // the docs / min will be really high at first. To avoid this,
- // we can wait a little bit to let the updated_at catch up a bit
- if (seconds < 10) {
- return null;
- }
- return (indexAttempt.new_docs_indexed / seconds) * 60;
+ return (indexAttempt.total_docs_indexed / seconds) * 60;
};
diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts
index a4f85ebad..381b3a147 100644
--- a/web/src/lib/types.ts
+++ b/web/src/lib/types.ts
@@ -132,6 +132,7 @@ export interface IndexAttemptSnapshot {
id: number;
status: ValidStatuses | null;
new_docs_indexed: number;
+ total_docs_indexed: number;
error_msg: string | null;
time_started: string | null;
time_updated: string;