Improve index attempt display

This commit is contained in:
Weves 2023-11-01 18:31:18 -07:00 committed by Chris Weaver
parent d9adee168b
commit e8f778ccb5
6 changed files with 52 additions and 55 deletions

View File

@ -311,7 +311,8 @@ class IndexAttemptRequest(BaseModel):
class IndexAttemptSnapshot(BaseModel):
id: int
status: IndexingStatus | None
new_docs_indexed: int
new_docs_indexed: int # only includes completely new docs
total_docs_indexed: int # includes docs that are updated
error_msg: str | None
time_started: str | None
time_updated: str
@ -324,6 +325,7 @@ class IndexAttemptSnapshot(BaseModel):
id=index_attempt.id,
status=index_attempt.status,
new_docs_indexed=index_attempt.new_docs_indexed or 0,
total_docs_indexed=index_attempt.total_docs_indexed or 0,
error_msg=index_attempt.error_msg,
time_started=index_attempt.time_started.isoformat()
if index_attempt.time_started

View File

@ -1,7 +1,6 @@
"use client";
import {
Card,
Table,
TableHead,
TableRow,
@ -15,29 +14,13 @@ import { CCPairFullInfo } from "./types";
import { useState } from "react";
import { PageSelector } from "@/components/PageSelector";
import { localizeAndPrettify } from "@/lib/time";
import { getDocsProcessedPerMinute } from "@/lib/indexAttempt";
const NUM_IN_PAGE = 8;
export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
const [page, setPage] = useState(1);
// figure out if we need to artificially inflate the number of new docs indexed
// for the ongoing indexing attempt. This is required since the total number of
// docs indexed by a CC Pair is updated before the net new docs for an indexing
// attempt. If we don't do this, there is a mismatch between these two numbers
// which may confuse users.
let newDocsIndexedAdjustment = 0;
const sumOfNewDocs = ccPair.index_attempts.reduce(
(partialSum, indexAttempt) => partialSum + indexAttempt.new_docs_indexed,
0
);
if (
sumOfNewDocs < ccPair.num_docs_indexed &&
ccPair.index_attempts[0]?.status === "in_progress"
) {
newDocsIndexedAdjustment = ccPair.num_docs_indexed - sumOfNewDocs;
}
return (
<>
<Table>
@ -45,37 +28,45 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) {
<TableRow>
<TableHeaderCell>Time Started</TableHeaderCell>
<TableHeaderCell>Status</TableHeaderCell>
<TableHeaderCell>Num New Docs</TableHeaderCell>
<TableHeaderCell>New Doc Cnt</TableHeaderCell>
<TableHeaderCell>Total Doc Cnt</TableHeaderCell>
<TableHeaderCell>Error Msg</TableHeaderCell>
</TableRow>
</TableHead>
<TableBody>
{ccPair.index_attempts
.slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page)
.map((indexAttempt, ind) => (
<TableRow key={indexAttempt.id}>
<TableCell>
{indexAttempt.time_started
? localizeAndPrettify(indexAttempt.time_started)
: "-"}
</TableCell>
<TableCell>
<IndexAttemptStatus
status={indexAttempt.status || "not_started"}
size="xs"
/>
</TableCell>
<TableCell>
{indexAttempt.new_docs_indexed +
(page === 1 && ind === 0 ? newDocsIndexedAdjustment : 0)}
</TableCell>
<TableCell>
<Text className="flex flex-wrap whitespace-normal">
{indexAttempt.error_msg || "-"}
</Text>
</TableCell>
</TableRow>
))}
.map((indexAttempt) => {
const docsPerMinute =
getDocsProcessedPerMinute(indexAttempt)?.toFixed(2);
return (
<TableRow key={indexAttempt.id}>
<TableCell>
{indexAttempt.time_started
? localizeAndPrettify(indexAttempt.time_started)
: "-"}
</TableCell>
<TableCell>
<IndexAttemptStatus
status={indexAttempt.status || "not_started"}
size="xs"
/>
{docsPerMinute && (
<div className="text-xs mt-1">
{docsPerMinute} docs / min
</div>
)}
</TableCell>
<TableCell>{indexAttempt.new_docs_indexed}</TableCell>
<TableCell>{indexAttempt.total_docs_indexed}</TableCell>
<TableCell>
<Text className="flex flex-wrap whitespace-normal">
{indexAttempt.error_msg || "-"}
</Text>
</TableCell>
</TableRow>
);
})}
</TableBody>
</Table>
{ccPair.index_attempts.length > NUM_IN_PAGE && (

View File

@ -37,6 +37,16 @@ export default async function Page({
const lastIndexAttempt = ccPair.index_attempts[0];
const isDeleting = isCurrentlyDeleting(ccPair.latest_deletion_attempt);
// figure out if we need to artificially deflate the number of docs indexed.
// This is required since the total number of docs indexed by a CC Pair is
// updated before the new docs for an indexing attempt. If we don't do this,
// there is a mismatch between these two numbers which may confuse users.
const totalDocsIndexed =
lastIndexAttempt?.status === "in_progress" &&
ccPair.index_attempts.length === 1
? lastIndexAttempt.total_docs_indexed
: ccPair.num_docs_indexed;
return (
<>
<SSRAutoRefresh />
@ -62,7 +72,7 @@ export default async function Page({
<div className="text-gray-400 text-sm mt-1">
Total Documents Indexed:{" "}
<b className="text-gray-300">{ccPair.num_docs_indexed}</b>
<b className="text-gray-300">{totalDocsIndexed}</b>
</div>
<Divider />

View File

@ -321,8 +321,7 @@ const Main = () => {
| Credential<GoogleDriveCredentialJson>
| undefined = credentialsData.find(
(credential) =>
credential.credential_json?.google_drive_tokens &&
credential.is_admin
credential.credential_json?.google_drive_tokens && credential.is_admin
);
const googleDriveServiceAccountCredential:
| Credential<GoogleDriveServiceAccountCredentialJson>

View File

@ -7,7 +7,7 @@ export const getDocsProcessedPerMinute = (
!indexAttempt ||
!indexAttempt.time_started ||
!indexAttempt.time_updated ||
indexAttempt.new_docs_indexed === 0
indexAttempt.total_docs_indexed === 0
) {
return null;
}
@ -16,11 +16,5 @@ export const getDocsProcessedPerMinute = (
const timeUpdated = new Date(indexAttempt.time_updated);
const timeDiff = timeUpdated.getTime() - timeStarted.getTime();
const seconds = timeDiff / 1000;
// due to some issues with `time_updated` having delayed updates,
// the docs / min will be really high at first. To avoid this,
// we can wait a little bit to let the updated_at catch up a bit
if (seconds < 10) {
return null;
}
return (indexAttempt.new_docs_indexed / seconds) * 60;
return (indexAttempt.total_docs_indexed / seconds) * 60;
};

View File

@ -132,6 +132,7 @@ export interface IndexAttemptSnapshot {
id: number;
status: ValidStatuses | null;
new_docs_indexed: number;
total_docs_indexed: number;
error_msg: string | null;
time_started: string | null;
time_updated: string;