From 5f25826a98db6ebdc201719f8c0ec506370569ff Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 26 Sep 2023 17:13:16 -0700 Subject: [PATCH] Handle document_set deletion better + prevent updates while syncing --- backend/danswer/db/document_set.py | 55 +++++++++++++++++--- backend/danswer/document_set/document_set.py | 27 ++++++++-- backend/danswer/server/document_set.py | 4 +- web/src/app/admin/documents/sets/page.tsx | 42 ++++++++++++--- 4 files changed, 108 insertions(+), 20 deletions(-) diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index aacc2243da48..1e2f581b056e 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -5,6 +5,7 @@ from uuid import UUID from sqlalchemy import and_ from sqlalchemy import delete from sqlalchemy import func +from sqlalchemy import or_ from sqlalchemy import select from sqlalchemy.orm import Session @@ -101,8 +102,14 @@ def update_document_set( ) if document_set_row is None: raise ValueError( - f"No document set with ID {document_set_update_request.id}" + f"No document set with ID '{document_set_update_request.id}'" ) + if not document_set_row.is_up_to_date: + raise ValueError( + "Cannot update document set while it is syncing. Please wait " + "for it to finish syncing, and then try again." + ) + document_set_row.description = document_set_update_request.description document_set_row.is_up_to_date = False @@ -144,7 +151,23 @@ def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> No db_session.commit() -def delete_document_set(document_set_id: int, db_session: Session) -> None: +def delete_document_set( + document_set_row: DocumentSetDBModel, db_session: Session +) -> None: + # delete all relationships to CC pairs + _delete_document_set_cc_pairs( + db_session=db_session, document_set_id=document_set_row.id + ) + db_session.delete(document_set_row) + db_session.commit() + + +def mark_document_set_as_to_be_deleted( + document_set_id: int, db_session: Session +) -> None: + """Cleans up all document_set -> cc_pair relationships and marks the document set + as needing an update. The actual document set row will be deleted by the background + job which syncs these changes to Vespa.""" # start a transaction db_session.begin() @@ -154,13 +177,19 @@ def delete_document_set(document_set_id: int, db_session: Session) -> None: ) if document_set_row is None: raise ValueError(f"No document set with ID: '{document_set_id}'") + if not document_set_row.is_up_to_date: + raise ValueError( + "Cannot delete document set while it is syncing. Please wait " + "for it to finish syncing, and then try again." + ) # delete all relationships to CC pairs _delete_document_set_cc_pairs( db_session=db_session, document_set_id=document_set_id ) - # delete the actual document set row - db_session.delete(document_set_row) + # mark the row as needing a sync, it will be deleted there since there + # are no more relationships to cc pairs + document_set_row.is_up_to_date = False db_session.commit() except: db_session.rollback() @@ -174,21 +203,27 @@ def fetch_document_sets( 1. The document set itself 2. All CC pairs associated with the document set""" results = cast( - list[tuple[DocumentSetDBModel, ConnectorCredentialPair]], + list[tuple[DocumentSetDBModel, ConnectorCredentialPair | None]], db_session.execute( select(DocumentSetDBModel, ConnectorCredentialPair) .join( DocumentSet__ConnectorCredentialPair, DocumentSetDBModel.id == DocumentSet__ConnectorCredentialPair.document_set_id, + isouter=True, # outer join is needed to also fetch document sets with no cc pairs ) .join( ConnectorCredentialPair, ConnectorCredentialPair.id == DocumentSet__ConnectorCredentialPair.connector_credential_pair_id, + isouter=True, # outer join is needed to also fetch document sets with no cc pairs ) .where( - DocumentSet__ConnectorCredentialPair.is_current == True # noqa: E712 + or_( + DocumentSet__ConnectorCredentialPair.is_current + == True, # noqa: E712 + DocumentSet__ConnectorCredentialPair.is_current.is_(None), + ) ) ).all(), ) @@ -198,9 +233,13 @@ def fetch_document_sets( ] = {} for document_set, cc_pair in results: if document_set.id not in aggregated_results: - aggregated_results[document_set.id] = (document_set, [cc_pair]) + aggregated_results[document_set.id] = ( + document_set, + [cc_pair] if cc_pair else [], + ) else: - aggregated_results[document_set.id][1].append(cc_pair) + if cc_pair: + aggregated_results[document_set.id][1].append(cc_pair) return [ (document_set, cc_pairs) diff --git a/backend/danswer/document_set/document_set.py b/backend/danswer/document_set/document_set.py index 0b9ca99b5a99..e18d995b7558 100644 --- a/backend/danswer/document_set/document_set.py +++ b/backend/danswer/document_set/document_set.py @@ -1,13 +1,18 @@ +from typing import cast + from sqlalchemy.orm import Session from danswer.datastores.document_index import get_default_document_index from danswer.datastores.interfaces import DocumentIndex from danswer.datastores.interfaces import UpdateRequest from danswer.db.document import prepare_to_modify_documents +from danswer.db.document_set import delete_document_set from danswer.db.document_set import fetch_document_sets_for_documents from danswer.db.document_set import fetch_documents_for_document_set +from danswer.db.document_set import get_document_set_by_id from danswer.db.document_set import mark_document_set_as_synced from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.models import DocumentSet from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger @@ -59,7 +64,21 @@ def sync_document_set(document_set_id: int) -> None: document_index=document_index, ) - mark_document_set_as_synced( - document_set_id=document_set_id, db_session=db_session - ) - logger.info(f"Document set sync for '{document_set_id}' complete!") + # if there are no connectors, then delete the document set. Otherwise, just + # mark it as successfully synced. + document_set = cast( + DocumentSet, + get_document_set_by_id( + db_session=db_session, document_set_id=document_set_id + ), + ) # casting since we "know" a document set with this ID exists + if not document_set.connector_credential_pairs: + delete_document_set(document_set_row=document_set, db_session=db_session) + logger.info( + f"Successfully deleted document set with ID: '{document_set_id}'!" + ) + else: + mark_document_set_as_synced( + document_set_id=document_set_id, db_session=db_session + ) + logger.info(f"Document set sync for '{document_set_id}' complete!") diff --git a/backend/danswer/server/document_set.py b/backend/danswer/server/document_set.py index f0a2f9886816..013069409817 100644 --- a/backend/danswer/server/document_set.py +++ b/backend/danswer/server/document_set.py @@ -5,9 +5,9 @@ from sqlalchemy.orm import Session from danswer.auth.users import current_admin_user from danswer.auth.users import current_user -from danswer.db.document_set import delete_document_set as delete_document_set_from_db from danswer.db.document_set import fetch_document_sets from danswer.db.document_set import insert_document_set +from danswer.db.document_set import mark_document_set_as_to_be_deleted from danswer.db.document_set import update_document_set from danswer.db.engine import get_session from danswer.db.models import User @@ -61,7 +61,7 @@ def delete_document_set( db_session: Session = Depends(get_session), ) -> None: try: - delete_document_set_from_db( + mark_document_set_as_to_be_deleted( document_set_id=document_set_id, db_session=db_session ) except Exception as e: diff --git a/web/src/app/admin/documents/sets/page.tsx b/web/src/app/admin/documents/sets/page.tsx index 6876559b0d0d..7b6afb2c8dae 100644 --- a/web/src/app/admin/documents/sets/page.tsx +++ b/web/src/app/admin/documents/sets/page.tsx @@ -4,7 +4,12 @@ import { Button } from "@/components/Button"; import { LoadingAnimation, ThreeDotsLoader } from "@/components/Loading"; import { PageSelector } from "@/components/PageSelector"; import { BasicTable } from "@/components/admin/connectors/BasicTable"; -import { BookmarkIcon, EditIcon, TrashIcon } from "@/components/icons/icons"; +import { + BookmarkIcon, + EditIcon, + InfoIcon, + TrashIcon, +} from "@/components/icons/icons"; import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types"; import { useState } from "react"; @@ -28,8 +33,9 @@ const EditRow = ({ refreshDocumentSets: () => void; }) => { const [isEditPopupOpen, setEditPopupOpen] = useState(false); + const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false); return ( - <> +
{isEditPopupOpen && ( )} + {isSyncingTooltipOpen && ( +
+ {" "} + Cannot update while syncing! Wait for the sync to finish, then try + again. +
+ )}
setEditPopupOpen(true)} + className={ + "my-auto" + (documentSet.is_up_to_date ? " cursor-pointer" : "") + } + onClick={() => { + if (documentSet.is_up_to_date) { + setEditPopupOpen(true); + } + }} + onMouseEnter={() => { + if (!documentSet.is_up_to_date) { + setIsSyncingTooltipOpen(true); + } + }} + onMouseLeave={() => { + if (!documentSet.is_up_to_date) { + setIsSyncingTooltipOpen(false); + } + }} >
- +
); }; @@ -100,6 +129,7 @@ const DocumentSetTable = ({ }, ]} data={documentSets + .filter((documentSet) => documentSet.cc_pair_descriptors.length > 0) .slice((page - 1) * numToDisplay, page * numToDisplay) .map((documentSet) => { return { @@ -156,7 +186,7 @@ const DocumentSetTable = ({ type: "success", }); } else { - const errorMsg = await response.text(); + const errorMsg = (await response.json()).detail; setPopup({ message: `Failed to delete document set - ${errorMsg}`, type: "error",