Fix secondary index attempts showing up as the primary index status + scheduling while in-progress (#2039)

This commit is contained in:
Chris Weaver 2024-08-04 13:29:44 -07:00 committed by GitHub
parent 876feecd6f
commit 9d7100a287
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 76 additions and 32 deletions

View File

@ -93,11 +93,15 @@ def _should_create_new_indexing(
if connector.refresh_freq is None:
return False
# Only one scheduled job per connector at a time
# Can schedule another one if the current one is already running however
# Because the currently running one will not be until the latest time
# Note, this last index is for the given embedding model
if last_index.status == IndexingStatus.NOT_STARTED:
# Only one scheduled/ongoing job per connector at a time
# this prevents cases where
# (1) the "latest" index_attempt is scheduled so we show
# that in the UI despite another index_attempt being in-progress
# (2) multiple scheduled index_attempts at a time
if (
last_index.status == IndexingStatus.NOT_STARTED
or last_index.status == IndexingStatus.IN_PROGRESS
):
return False
current_db_time = get_db_current_time(db_session)

View File

@ -247,19 +247,25 @@ def get_index_attempts_for_connector(
def get_latest_finished_index_attempt_for_cc_pair(
connector_credential_pair_id: int,
secondary_index: bool,
db_session: Session,
) -> IndexAttempt | None:
stmt = (
select(IndexAttempt)
.where(
IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
IndexAttempt.status.not_in(
[IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
),
)
.order_by(desc(IndexAttempt.time_created))
.limit(1)
stmt = select(IndexAttempt).where(
IndexAttempt.connector_credential_pair_id == connector_credential_pair_id,
IndexAttempt.status.not_in(
[IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
),
)
if secondary_index:
stmt = stmt.join(EmbeddingModel).where(
EmbeddingModel.status == IndexModelStatus.FUTURE
)
else:
stmt = stmt.join(EmbeddingModel).where(
EmbeddingModel.status == IndexModelStatus.PRESENT
)
stmt = stmt.order_by(desc(IndexAttempt.time_created))
stmt = stmt.limit(1)
return db_session.execute(stmt).scalar_one_or_none()

View File

@ -422,7 +422,9 @@ def get_connector_indexing_status(
)
latest_finished_attempt = get_latest_finished_index_attempt_for_cc_pair(
connector_credential_pair_id=cc_pair.id, db_session=db_session
connector_credential_pair_id=cc_pair.id,
secondary_index=secondary_index,
db_session=db_session,
)
indexing_statuses.append(
@ -433,24 +435,26 @@ def get_connector_indexing_status(
credential=CredentialSnapshot.from_credential_db_model(credential),
public_doc=cc_pair.is_public,
owner=credential.user.email if credential.user else "",
last_finished_status=latest_finished_attempt.status
if latest_finished_attempt
else None,
last_status=latest_index_attempt.status
if latest_index_attempt
else None,
last_finished_status=(
latest_finished_attempt.status if latest_finished_attempt else None
),
last_status=(
latest_index_attempt.status if latest_index_attempt else None
),
last_success=cc_pair.last_successful_index_time,
docs_indexed=cc_pair_to_document_cnt.get(
(connector.id, credential.id), 0
),
error_msg=latest_index_attempt.error_msg
if latest_index_attempt
else None,
latest_index_attempt=IndexAttemptSnapshot.from_index_attempt_db_model(
latest_index_attempt
)
if latest_index_attempt
else None,
error_msg=(
latest_index_attempt.error_msg if latest_index_attempt else None
),
latest_index_attempt=(
IndexAttemptSnapshot.from_index_attempt_db_model(
latest_index_attempt
)
if latest_index_attempt
else None
),
deletion_attempt=get_deletion_status(
connector_id=connector.id,
credential_id=credential.id,

View File

@ -130,7 +130,7 @@ function ConnectorRow({
ccPairsIndexingStatus,
invisible,
}: {
ccPairsIndexingStatus: any;
ccPairsIndexingStatus: ConnectorIndexingStatus<any, any>;
invisible?: boolean;
}) {
const router = useRouter();
@ -209,7 +209,9 @@ function ConnectorRow({
return (
<TableRow
className={`hover:bg-hover-light ${invisible ? "invisible h-0 !-mb-10" : "border border-border !border-b"} w-full cursor-pointer relative`}
className={`hover:bg-hover-light ${
invisible ? "invisible h-0 !-mb-10" : "border border-border !border-b"
} w-full cursor-pointer relative`}
onClick={() =>
router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`)
}
@ -354,12 +356,40 @@ export function CCPairIndexingStatusTable({
name: "Sample File Connector",
last_status: "success",
connector: {
name: "Sample File Connector",
source: "file",
input_type: "poll",
connector_specific_config: {
file_locations: ["/path/to/sample/file.txt"],
},
refresh_freq: 86400,
prune_freq: null,
indexing_start: new Date("2023-07-01T12:00:00Z"),
disabled: false,
id: 1,
credential_ids: [],
time_created: "2023-07-01T12:00:00Z",
time_updated: "2023-07-01T12:00:00Z",
},
credential: {
id: 1,
name: "Sample Credential",
source: "file",
user_id: "1",
time_created: "2023-07-01T12:00:00Z",
time_updated: "2023-07-01T12:00:00Z",
credential_json: {},
admin_public: false,
},
public_doc: true,
docs_indexed: 1000,
last_success: "2023-07-01T12:00:00Z",
last_finished_status: "success",
latest_index_attempt: null,
owner: "1",
error_msg: "",
deletion_attempt: null,
is_deletable: true,
}}
/>
<div className="-mb-10" />