diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py index b1f1acdc89..d7bce60108 100644 --- a/backend/danswer/background/celery/celery_app.py +++ b/backend/danswer/background/celery/celery_app.py @@ -103,7 +103,7 @@ def cleanup_connector_credential_pair_task( @build_celery_task_wrapper(name_cc_prune_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) def prune_documents_task(connector_id: int, credential_id: int) -> None: - """connector pruning task. For a cc pair, this task pulls all docuement IDs from the source + """connector pruning task. For a cc pair, this task pulls all document IDs from the source and compares those IDs to locally stored documents and deletes all locally stored IDs missing from the most recently pulled document ID list""" with Session(get_sqlalchemy_engine()) as db_session: diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 359053969b..2649be0fd1 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -72,10 +72,18 @@ def _should_create_new_indexing( # When switching over models, always index at least once if model.status == IndexModelStatus.FUTURE: if last_index: - # secondary indexes should not index again after success - # or else the model will never be able to swap + # No new index if the last index attempt succeeded + # Once is enough. The model will never be able to swap otherwise. if last_index.status == IndexingStatus.SUCCESS: return False + + # No new index if the last index attempt is waiting to start + if last_index.status == IndexingStatus.NOT_STARTED: + return False + + # No new index if the last index attempt is running + if last_index.status == IndexingStatus.IN_PROGRESS: + return False else: if connector.id == 0: # Ingestion API return False diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 51064f78e2..2cd563a60c 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -277,7 +277,7 @@ def mark_cc_pair__document_set_relationships_to_be_deleted__no_commit( `cc_pair_id` as not current and returns the list of all document set IDs affected. - NOTE: rases a `ValueError` if any of the document sets are currently syncing + NOTE: raises a `ValueError` if any of the document sets are currently syncing to avoid getting into a bad state.""" document_set__cc_pair_relationships = db_session.scalars( select(DocumentSet__ConnectorCredentialPair).where(