diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 4580140a5..df42d869d 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -295,6 +295,9 @@ def count_unique_cc_pairs_with_successful_index_attempts( embedding_model_id: int | None, db_session: Session, ) -> int: + """Collect all of the Index Attempts that are successful and for the specified embedding model + Then do distinct by connector_id and credential_id which is equivalent to the cc-pair. Finally, + do a count to get the total number of unique cc-pairs with successful attempts""" unique_pairs_count = ( db_session.query(IndexAttempt.connector_id, IndexAttempt.credential_id) .filter( diff --git a/backend/danswer/db/swap_index.py b/backend/danswer/db/swap_index.py index 93eb4714a..f14a45f29 100644 --- a/backend/danswer/db/swap_index.py +++ b/backend/danswer/db/swap_index.py @@ -31,8 +31,10 @@ def check_index_swap(db_session: Session) -> None: embedding_model_id=embedding_model.id, db_session=db_session ) + # Index Attempts are cleaned up as well when the cc-pair is deleted so the logic in this + # function is correct. The unique_cc_indexings are specifically for the existing cc-pairs if unique_cc_indexings > cc_pair_count: - raise RuntimeError("More unique indexings than cc pairs, should not occur") + logger.error("More unique indexings than cc pairs, should not occur") if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings: # Swap indices