mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-09 20:39:29 +02:00
Improve index attempt display
This commit is contained in:
parent
d9adee168b
commit
e8f778ccb5
@ -311,7 +311,8 @@ class IndexAttemptRequest(BaseModel):
|
||||
class IndexAttemptSnapshot(BaseModel):
|
||||
id: int
|
||||
status: IndexingStatus | None
|
||||
new_docs_indexed: int
|
||||
new_docs_indexed: int # only includes completely new docs
|
||||
total_docs_indexed: int # includes docs that are updated
|
||||
error_msg: str | None
|
||||
time_started: str | None
|
||||
time_updated: str
|
||||
@ -324,6 +325,7 @@ class IndexAttemptSnapshot(BaseModel):
|
||||
id=index_attempt.id,
|
||||
status=index_attempt.status,
|
||||
new_docs_indexed=index_attempt.new_docs_indexed or 0,
|
||||
total_docs_indexed=index_attempt.total_docs_indexed or 0,
|
||||
error_msg=index_attempt.error_msg,
|
||||
time_started=index_attempt.time_started.isoformat()
|
||||
if index_attempt.time_started
|
||||
|
@ -1,7 +1,6 @@
|
||||
"use client";
|
||||
|
||||
import {
|
||||
Card,
|
||||
Table,
|
||||
TableHead,
|
||||
TableRow,
|
||||
@ -15,29 +14,13 @@ import { CCPairFullInfo } from "./types";
|
||||
import { useState } from "react";
|
||||
import { PageSelector } from "@/components/PageSelector";
|
||||
import { localizeAndPrettify } from "@/lib/time";
|
||||
import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
|
||||
|
||||
const NUM_IN_PAGE = 8;
|
||||
|
||||
export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
|
||||
const [page, setPage] = useState(1);
|
||||
|
||||
// figure out if we need to artificially inflate the number of new docs indexed
|
||||
// for the ongoing indexing attempt. This is required since the total number of
|
||||
// docs indexed by a CC Pair is updated before the net new docs for an indexing
|
||||
// attempt. If we don't do this, there is a mismatch between these two numbers
|
||||
// which may confuse users.
|
||||
let newDocsIndexedAdjustment = 0;
|
||||
const sumOfNewDocs = ccPair.index_attempts.reduce(
|
||||
(partialSum, indexAttempt) => partialSum + indexAttempt.new_docs_indexed,
|
||||
0
|
||||
);
|
||||
if (
|
||||
sumOfNewDocs < ccPair.num_docs_indexed &&
|
||||
ccPair.index_attempts[0]?.status === "in_progress"
|
||||
) {
|
||||
newDocsIndexedAdjustment = ccPair.num_docs_indexed - sumOfNewDocs;
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<Table>
|
||||
@ -45,37 +28,45 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
|
||||
<TableRow>
|
||||
<TableHeaderCell>Time Started</TableHeaderCell>
|
||||
<TableHeaderCell>Status</TableHeaderCell>
|
||||
<TableHeaderCell>Num New Docs</TableHeaderCell>
|
||||
<TableHeaderCell>New Doc Cnt</TableHeaderCell>
|
||||
<TableHeaderCell>Total Doc Cnt</TableHeaderCell>
|
||||
<TableHeaderCell>Error Msg</TableHeaderCell>
|
||||
</TableRow>
|
||||
</TableHead>
|
||||
<TableBody>
|
||||
{ccPair.index_attempts
|
||||
.slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
|
||||
.map((indexAttempt, ind) => (
|
||||
<TableRow key={indexAttempt.id}>
|
||||
<TableCell>
|
||||
{indexAttempt.time_started
|
||||
? localizeAndPrettify(indexAttempt.time_started)
|
||||
: "-"}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<IndexAttemptStatus
|
||||
status={indexAttempt.status || "not_started"}
|
||||
size="xs"
|
||||
/>
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
{indexAttempt.new_docs_indexed +
|
||||
(page === 1 && ind === 0 ? newDocsIndexedAdjustment : 0)}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<Text className="flex flex-wrap whitespace-normal">
|
||||
{indexAttempt.error_msg || "-"}
|
||||
</Text>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
))}
|
||||
.map((indexAttempt) => {
|
||||
const docsPerMinute =
|
||||
getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
|
||||
return (
|
||||
<TableRow key={indexAttempt.id}>
|
||||
<TableCell>
|
||||
{indexAttempt.time_started
|
||||
? localizeAndPrettify(indexAttempt.time_started)
|
||||
: "-"}
|
||||
</TableCell>
|
||||
<TableCell>
|
||||
<IndexAttemptStatus
|
||||
status={indexAttempt.status || "not_started"}
|
||||
size="xs"
|
||||
/>
|
||||
{docsPerMinute && (
|
||||
<div className="text-xs mt-1">
|
||||
{docsPerMinute} docs / min
|
||||
</div>
|
||||
)}
|
||||
</TableCell>
|
||||
<TableCell>{indexAttempt.new_docs_indexed}</TableCell>
|
||||
<TableCell>{indexAttempt.total_docs_indexed}</TableCell>
|
||||
<TableCell>
|
||||
<Text className="flex flex-wrap whitespace-normal">
|
||||
{indexAttempt.error_msg || "-"}
|
||||
</Text>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
);
|
||||
})}
|
||||
</TableBody>
|
||||
</Table>
|
||||
{ccPair.index_attempts.length > NUM_IN_PAGE && (
|
||||
|
@ -37,6 +37,16 @@ export default async function Page({
|
||||
const lastIndexAttempt = ccPair.index_attempts[0];
|
||||
const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt);
|
||||
|
||||
// figure out if we need to artificially deflate the number of docs indexed.
|
||||
// This is required since the total number of docs indexed by a CC Pair is
|
||||
// updated before the new docs for an indexing attempt. If we don't do this,
|
||||
// there is a mismatch between these two numbers which may confuse users.
|
||||
const totalDocsIndexed =
|
||||
lastIndexAttempt?.status === "in_progress" &&
|
||||
ccPair.index_attempts.length === 1
|
||||
? lastIndexAttempt.total_docs_indexed
|
||||
: ccPair.num_docs_indexed;
|
||||
|
||||
return (
|
||||
<>
|
||||
<SSRAutoRefresh />
|
||||
@ -62,7 +72,7 @@ export default async function Page({
|
||||
|
||||
<div className="text-gray-400 text-sm mt-1">
|
||||
Total Documents Indexed:{" "}
|
||||
<b className="text-gray-300">{ccPair.num_docs_indexed}</b>
|
||||
<b className="text-gray-300">{totalDocsIndexed}</b>
|
||||
</div>
|
||||
|
||||
<Divider />
|
||||
|
@ -321,8 +321,7 @@ const Main = () => {
|
||||
| Credential<GoogleDriveCredentialJson>
|
||||
| undefined = credentialsData.find(
|
||||
(credential) =>
|
||||
credential.credential_json?.google_drive_tokens &&
|
||||
credential.is_admin
|
||||
credential.credential_json?.google_drive_tokens && credential.is_admin
|
||||
);
|
||||
const googleDriveServiceAccountCredential:
|
||||
| Credential<GoogleDriveServiceAccountCredentialJson>
|
||||
|
@ -7,7 +7,7 @@ export const getDocsProcessedPerMinute = (
|
||||
!indexAttempt ||
|
||||
!indexAttempt.time_started ||
|
||||
!indexAttempt.time_updated ||
|
||||
indexAttempt.new_docs_indexed === 0
|
||||
indexAttempt.total_docs_indexed === 0
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
@ -16,11 +16,5 @@ export const getDocsProcessedPerMinute = (
|
||||
const timeUpdated = new Date(indexAttempt.time_updated);
|
||||
const timeDiff = timeUpdated.getTime() - timeStarted.getTime();
|
||||
const seconds = timeDiff / 1000;
|
||||
// due to some issues with `time_updated` having delayed updates,
|
||||
// the docs / min will be really high at first. To avoid this,
|
||||
// we can wait a little bit to let the updated_at catch up a bit
|
||||
if (seconds < 10) {
|
||||
return null;
|
||||
}
|
||||
return (indexAttempt.new_docs_indexed / seconds) * 60;
|
||||
return (indexAttempt.total_docs_indexed / seconds) * 60;
|
||||
};
|
||||
|
@ -132,6 +132,7 @@ export interface IndexAttemptSnapshot {
|
||||
id: number;
|
||||
status: ValidStatuses | null;
|
||||
new_docs_indexed: number;
|
||||
total_docs_indexed: number;
|
||||
error_msg: string | null;
|
||||
time_started: string | null;
|
||||
time_updated: string;
|
||||
|
Loading…
x
Reference in New Issue
Block a user