Handle document_set deletion better + prevent updates while syncing

This commit is contained in:
Weves
2023-09-26 17:13:16 -07:00
committed by Chris Weaver
parent 60cddee310
commit 5f25826a98
4 changed files with 108 additions and 20 deletions

View File

@@ -5,6 +5,7 @@ from uuid import UUID
from sqlalchemy import and_
from sqlalchemy import delete
from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -101,8 +102,14 @@ def update_document_set(
)
if document_set_row is None:
raise ValueError(
f"No document set with ID {document_set_update_request.id}"
f"No document set with ID '{document_set_update_request.id}'"
)
if not document_set_row.is_up_to_date:
raise ValueError(
"Cannot update document set while it is syncing. Please wait "
"for it to finish syncing, and then try again."
)
document_set_row.description = document_set_update_request.description
document_set_row.is_up_to_date = False
@@ -144,7 +151,23 @@ def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> No
db_session.commit()
def delete_document_set(document_set_id: int, db_session: Session) -> None:
def delete_document_set(
document_set_row: DocumentSetDBModel, db_session: Session
) -> None:
# delete all relationships to CC pairs
_delete_document_set_cc_pairs(
db_session=db_session, document_set_id=document_set_row.id
)
db_session.delete(document_set_row)
db_session.commit()
def mark_document_set_as_to_be_deleted(
document_set_id: int, db_session: Session
) -> None:
"""Cleans up all document_set -> cc_pair relationships and marks the document set
as needing an update. The actual document set row will be deleted by the background
job which syncs these changes to Vespa."""
# start a transaction
db_session.begin()
@@ -154,13 +177,19 @@ def delete_document_set(document_set_id: int, db_session: Session) -> None:
)
if document_set_row is None:
raise ValueError(f"No document set with ID: '{document_set_id}'")
if not document_set_row.is_up_to_date:
raise ValueError(
"Cannot delete document set while it is syncing. Please wait "
"for it to finish syncing, and then try again."
)
# delete all relationships to CC pairs
_delete_document_set_cc_pairs(
db_session=db_session, document_set_id=document_set_id
)
# delete the actual document set row
db_session.delete(document_set_row)
# mark the row as needing a sync, it will be deleted there since there
# are no more relationships to cc pairs
document_set_row.is_up_to_date = False
db_session.commit()
except:
db_session.rollback()
@@ -174,21 +203,27 @@ def fetch_document_sets(
1. The document set itself
2. All CC pairs associated with the document set"""
results = cast(
list[tuple[DocumentSetDBModel, ConnectorCredentialPair]],
list[tuple[DocumentSetDBModel, ConnectorCredentialPair | None]],
db_session.execute(
select(DocumentSetDBModel, ConnectorCredentialPair)
.join(
DocumentSet__ConnectorCredentialPair,
DocumentSetDBModel.id
== DocumentSet__ConnectorCredentialPair.document_set_id,
isouter=True, # outer join is needed to also fetch document sets with no cc pairs
)
.join(
ConnectorCredentialPair,
ConnectorCredentialPair.id
== DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,
isouter=True, # outer join is needed to also fetch document sets with no cc pairs
)
.where(
DocumentSet__ConnectorCredentialPair.is_current == True # noqa: E712
or_(
DocumentSet__ConnectorCredentialPair.is_current
== True, # noqa: E712
DocumentSet__ConnectorCredentialPair.is_current.is_(None),
)
)
).all(),
)
@@ -198,8 +233,12 @@ def fetch_document_sets(
] = {}
for document_set, cc_pair in results:
if document_set.id not in aggregated_results:
aggregated_results[document_set.id] = (document_set, [cc_pair])
aggregated_results[document_set.id] = (
document_set,
[cc_pair] if cc_pair else [],
)
else:
if cc_pair:
aggregated_results[document_set.id][1].append(cc_pair)
return [

View File

@@ -1,13 +1,18 @@
from typing import cast
from sqlalchemy.orm import Session
from danswer.datastores.document_index import get_default_document_index
from danswer.datastores.interfaces import DocumentIndex
from danswer.datastores.interfaces import UpdateRequest
from danswer.db.document import prepare_to_modify_documents
from danswer.db.document_set import delete_document_set
from danswer.db.document_set import fetch_document_sets_for_documents
from danswer.db.document_set import fetch_documents_for_document_set
from danswer.db.document_set import get_document_set_by_id
from danswer.db.document_set import mark_document_set_as_synced
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.models import DocumentSet
from danswer.utils.batching import batch_generator
from danswer.utils.logger import setup_logger
@@ -59,6 +64,20 @@ def sync_document_set(document_set_id: int) -> None:
document_index=document_index,
)
# if there are no connectors, then delete the document set. Otherwise, just
# mark it as successfully synced.
document_set = cast(
DocumentSet,
get_document_set_by_id(
db_session=db_session, document_set_id=document_set_id
),
) # casting since we "know" a document set with this ID exists
if not document_set.connector_credential_pairs:
delete_document_set(document_set_row=document_set, db_session=db_session)
logger.info(
f"Successfully deleted document set with ID: '{document_set_id}'!"
)
else:
mark_document_set_as_synced(
document_set_id=document_set_id, db_session=db_session
)

View File

@@ -5,9 +5,9 @@ from sqlalchemy.orm import Session
from danswer.auth.users import current_admin_user
from danswer.auth.users import current_user
from danswer.db.document_set import delete_document_set as delete_document_set_from_db
from danswer.db.document_set import fetch_document_sets
from danswer.db.document_set import insert_document_set
from danswer.db.document_set import mark_document_set_as_to_be_deleted
from danswer.db.document_set import update_document_set
from danswer.db.engine import get_session
from danswer.db.models import User
@@ -61,7 +61,7 @@ def delete_document_set(
db_session: Session = Depends(get_session),
) -> None:
try:
delete_document_set_from_db(
mark_document_set_as_to_be_deleted(
document_set_id=document_set_id, db_session=db_session
)
except Exception as e:

View File

@@ -4,7 +4,12 @@ import { Button } from "@/components/Button";
import { LoadingAnimation, ThreeDotsLoader } from "@/components/Loading";
import { PageSelector } from "@/components/PageSelector";
import { BasicTable } from "@/components/admin/connectors/BasicTable";
import { BookmarkIcon, EditIcon, TrashIcon } from "@/components/icons/icons";
import {
BookmarkIcon,
EditIcon,
InfoIcon,
TrashIcon,
} from "@/components/icons/icons";
import { useConnectorCredentialIndexingStatus } from "@/lib/hooks";
import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types";
import { useState } from "react";
@@ -28,8 +33,9 @@ const EditRow = ({
refreshDocumentSets: () => void;
}) => {
const [isEditPopupOpen, setEditPopupOpen] = useState(false);
const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false);
return (
<>
<div className="relative flex">
{isEditPopupOpen && (
<DocumentSetCreationForm
ccPairs={ccPairs}
@@ -41,13 +47,36 @@ const EditRow = ({
existingDocumentSet={documentSet}
/>
)}
{isSyncingTooltipOpen && (
<div className="flex flex-nowrap absolute w-64 top-0 left-0 mt-8 bg-gray-700 px-3 py-2 rounded shadow-lg">
<InfoIcon className="mt-1 flex flex-shrink-0 mr-2 text-gray-300" />{" "}
Cannot update while syncing! Wait for the sync to finish, then try
again.
</div>
)}
<div
className="cursor-pointer my-auto"
onClick={() => setEditPopupOpen(true)}
className={
"my-auto" + (documentSet.is_up_to_date ? " cursor-pointer" : "")
}
onClick={() => {
if (documentSet.is_up_to_date) {
setEditPopupOpen(true);
}
}}
onMouseEnter={() => {
if (!documentSet.is_up_to_date) {
setIsSyncingTooltipOpen(true);
}
}}
onMouseLeave={() => {
if (!documentSet.is_up_to_date) {
setIsSyncingTooltipOpen(false);
}
}}
>
<EditIcon />
</div>
</>
</div>
);
};
@@ -100,6 +129,7 @@ const DocumentSetTable = ({
},
]}
data={documentSets
.filter((documentSet) => documentSet.cc_pair_descriptors.length > 0)
.slice((page - 1) * numToDisplay, page * numToDisplay)
.map((documentSet) => {
return {
@@ -156,7 +186,7 @@ const DocumentSetTable = ({
type: "success",
});
} else {
const errorMsg = await response.text();
const errorMsg = (await response.json()).detail;
setPopup({
message: `Failed to delete document set - ${errorMsg}`,
type: "error",