mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-22 17:16:20 +02:00
Handle document_set deletion better + prevent updates while syncing
This commit is contained in:
@@ -5,6 +5,7 @@ from uuid import UUID
|
||||
from sqlalchemy import and_
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -101,8 +102,14 @@ def update_document_set(
|
||||
)
|
||||
if document_set_row is None:
|
||||
raise ValueError(
|
||||
f"No document set with ID {document_set_update_request.id}"
|
||||
f"No document set with ID '{document_set_update_request.id}'"
|
||||
)
|
||||
if not document_set_row.is_up_to_date:
|
||||
raise ValueError(
|
||||
"Cannot update document set while it is syncing. Please wait "
|
||||
"for it to finish syncing, and then try again."
|
||||
)
|
||||
|
||||
document_set_row.description = document_set_update_request.description
|
||||
document_set_row.is_up_to_date = False
|
||||
|
||||
@@ -144,7 +151,23 @@ def mark_document_set_as_synced(document_set_id: int, db_session: Session) -> No
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def delete_document_set(document_set_id: int, db_session: Session) -> None:
|
||||
def delete_document_set(
|
||||
document_set_row: DocumentSetDBModel, db_session: Session
|
||||
) -> None:
|
||||
# delete all relationships to CC pairs
|
||||
_delete_document_set_cc_pairs(
|
||||
db_session=db_session, document_set_id=document_set_row.id
|
||||
)
|
||||
db_session.delete(document_set_row)
|
||||
db_session.commit()
|
||||
|
||||
|
||||
def mark_document_set_as_to_be_deleted(
|
||||
document_set_id: int, db_session: Session
|
||||
) -> None:
|
||||
"""Cleans up all document_set -> cc_pair relationships and marks the document set
|
||||
as needing an update. The actual document set row will be deleted by the background
|
||||
job which syncs these changes to Vespa."""
|
||||
# start a transaction
|
||||
db_session.begin()
|
||||
|
||||
@@ -154,13 +177,19 @@ def delete_document_set(document_set_id: int, db_session: Session) -> None:
|
||||
)
|
||||
if document_set_row is None:
|
||||
raise ValueError(f"No document set with ID: '{document_set_id}'")
|
||||
if not document_set_row.is_up_to_date:
|
||||
raise ValueError(
|
||||
"Cannot delete document set while it is syncing. Please wait "
|
||||
"for it to finish syncing, and then try again."
|
||||
)
|
||||
|
||||
# delete all relationships to CC pairs
|
||||
_delete_document_set_cc_pairs(
|
||||
db_session=db_session, document_set_id=document_set_id
|
||||
)
|
||||
# delete the actual document set row
|
||||
db_session.delete(document_set_row)
|
||||
# mark the row as needing a sync, it will be deleted there since there
|
||||
# are no more relationships to cc pairs
|
||||
document_set_row.is_up_to_date = False
|
||||
db_session.commit()
|
||||
except:
|
||||
db_session.rollback()
|
||||
@@ -174,21 +203,27 @@ def fetch_document_sets(
|
||||
1. The document set itself
|
||||
2. All CC pairs associated with the document set"""
|
||||
results = cast(
|
||||
list[tuple[DocumentSetDBModel, ConnectorCredentialPair]],
|
||||
list[tuple[DocumentSetDBModel, ConnectorCredentialPair | None]],
|
||||
db_session.execute(
|
||||
select(DocumentSetDBModel, ConnectorCredentialPair)
|
||||
.join(
|
||||
DocumentSet__ConnectorCredentialPair,
|
||||
DocumentSetDBModel.id
|
||||
== DocumentSet__ConnectorCredentialPair.document_set_id,
|
||||
isouter=True, # outer join is needed to also fetch document sets with no cc pairs
|
||||
)
|
||||
.join(
|
||||
ConnectorCredentialPair,
|
||||
ConnectorCredentialPair.id
|
||||
== DocumentSet__ConnectorCredentialPair.connector_credential_pair_id,
|
||||
isouter=True, # outer join is needed to also fetch document sets with no cc pairs
|
||||
)
|
||||
.where(
|
||||
DocumentSet__ConnectorCredentialPair.is_current == True # noqa: E712
|
||||
or_(
|
||||
DocumentSet__ConnectorCredentialPair.is_current
|
||||
== True, # noqa: E712
|
||||
DocumentSet__ConnectorCredentialPair.is_current.is_(None),
|
||||
)
|
||||
)
|
||||
).all(),
|
||||
)
|
||||
@@ -198,8 +233,12 @@ def fetch_document_sets(
|
||||
] = {}
|
||||
for document_set, cc_pair in results:
|
||||
if document_set.id not in aggregated_results:
|
||||
aggregated_results[document_set.id] = (document_set, [cc_pair])
|
||||
aggregated_results[document_set.id] = (
|
||||
document_set,
|
||||
[cc_pair] if cc_pair else [],
|
||||
)
|
||||
else:
|
||||
if cc_pair:
|
||||
aggregated_results[document_set.id][1].append(cc_pair)
|
||||
|
||||
return [
|
||||
|
@@ -1,13 +1,18 @@
|
||||
from typing import cast
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.datastores.document_index import get_default_document_index
|
||||
from danswer.datastores.interfaces import DocumentIndex
|
||||
from danswer.datastores.interfaces import UpdateRequest
|
||||
from danswer.db.document import prepare_to_modify_documents
|
||||
from danswer.db.document_set import delete_document_set
|
||||
from danswer.db.document_set import fetch_document_sets_for_documents
|
||||
from danswer.db.document_set import fetch_documents_for_document_set
|
||||
from danswer.db.document_set import get_document_set_by_id
|
||||
from danswer.db.document_set import mark_document_set_as_synced
|
||||
from danswer.db.engine import get_sqlalchemy_engine
|
||||
from danswer.db.models import DocumentSet
|
||||
from danswer.utils.batching import batch_generator
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
@@ -59,6 +64,20 @@ def sync_document_set(document_set_id: int) -> None:
|
||||
document_index=document_index,
|
||||
)
|
||||
|
||||
# if there are no connectors, then delete the document set. Otherwise, just
|
||||
# mark it as successfully synced.
|
||||
document_set = cast(
|
||||
DocumentSet,
|
||||
get_document_set_by_id(
|
||||
db_session=db_session, document_set_id=document_set_id
|
||||
),
|
||||
) # casting since we "know" a document set with this ID exists
|
||||
if not document_set.connector_credential_pairs:
|
||||
delete_document_set(document_set_row=document_set, db_session=db_session)
|
||||
logger.info(
|
||||
f"Successfully deleted document set with ID: '{document_set_id}'!"
|
||||
)
|
||||
else:
|
||||
mark_document_set_as_synced(
|
||||
document_set_id=document_set_id, db_session=db_session
|
||||
)
|
||||
|
@@ -5,9 +5,9 @@ from sqlalchemy.orm import Session
|
||||
|
||||
from danswer.auth.users import current_admin_user
|
||||
from danswer.auth.users import current_user
|
||||
from danswer.db.document_set import delete_document_set as delete_document_set_from_db
|
||||
from danswer.db.document_set import fetch_document_sets
|
||||
from danswer.db.document_set import insert_document_set
|
||||
from danswer.db.document_set import mark_document_set_as_to_be_deleted
|
||||
from danswer.db.document_set import update_document_set
|
||||
from danswer.db.engine import get_session
|
||||
from danswer.db.models import User
|
||||
@@ -61,7 +61,7 @@ def delete_document_set(
|
||||
db_session: Session = Depends(get_session),
|
||||
) -> None:
|
||||
try:
|
||||
delete_document_set_from_db(
|
||||
mark_document_set_as_to_be_deleted(
|
||||
document_set_id=document_set_id, db_session=db_session
|
||||
)
|
||||
except Exception as e:
|
||||
|
@@ -4,7 +4,12 @@ import { Button } from "@/components/Button";
|
||||
import { LoadingAnimation, ThreeDotsLoader } from "@/components/Loading";
|
||||
import { PageSelector } from "@/components/PageSelector";
|
||||
import { BasicTable } from "@/components/admin/connectors/BasicTable";
|
||||
import { BookmarkIcon, EditIcon, TrashIcon } from "@/components/icons/icons";
|
||||
import {
|
||||
BookmarkIcon,
|
||||
EditIcon,
|
||||
InfoIcon,
|
||||
TrashIcon,
|
||||
} from "@/components/icons/icons";
|
||||
import { useConnectorCredentialIndexingStatus } from "@/lib/hooks";
|
||||
import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types";
|
||||
import { useState } from "react";
|
||||
@@ -28,8 +33,9 @@ const EditRow = ({
|
||||
refreshDocumentSets: () => void;
|
||||
}) => {
|
||||
const [isEditPopupOpen, setEditPopupOpen] = useState(false);
|
||||
const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false);
|
||||
return (
|
||||
<>
|
||||
<div className="relative flex">
|
||||
{isEditPopupOpen && (
|
||||
<DocumentSetCreationForm
|
||||
ccPairs={ccPairs}
|
||||
@@ -41,13 +47,36 @@ const EditRow = ({
|
||||
existingDocumentSet={documentSet}
|
||||
/>
|
||||
)}
|
||||
{isSyncingTooltipOpen && (
|
||||
<div className="flex flex-nowrap absolute w-64 top-0 left-0 mt-8 bg-gray-700 px-3 py-2 rounded shadow-lg">
|
||||
<InfoIcon className="mt-1 flex flex-shrink-0 mr-2 text-gray-300" />{" "}
|
||||
Cannot update while syncing! Wait for the sync to finish, then try
|
||||
again.
|
||||
</div>
|
||||
)}
|
||||
<div
|
||||
className="cursor-pointer my-auto"
|
||||
onClick={() => setEditPopupOpen(true)}
|
||||
className={
|
||||
"my-auto" + (documentSet.is_up_to_date ? " cursor-pointer" : "")
|
||||
}
|
||||
onClick={() => {
|
||||
if (documentSet.is_up_to_date) {
|
||||
setEditPopupOpen(true);
|
||||
}
|
||||
}}
|
||||
onMouseEnter={() => {
|
||||
if (!documentSet.is_up_to_date) {
|
||||
setIsSyncingTooltipOpen(true);
|
||||
}
|
||||
}}
|
||||
onMouseLeave={() => {
|
||||
if (!documentSet.is_up_to_date) {
|
||||
setIsSyncingTooltipOpen(false);
|
||||
}
|
||||
}}
|
||||
>
|
||||
<EditIcon />
|
||||
</div>
|
||||
</>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
@@ -100,6 +129,7 @@ const DocumentSetTable = ({
|
||||
},
|
||||
]}
|
||||
data={documentSets
|
||||
.filter((documentSet) => documentSet.cc_pair_descriptors.length > 0)
|
||||
.slice((page - 1) * numToDisplay, page * numToDisplay)
|
||||
.map((documentSet) => {
|
||||
return {
|
||||
@@ -156,7 +186,7 @@ const DocumentSetTable = ({
|
||||
type: "success",
|
||||
});
|
||||
} else {
|
||||
const errorMsg = await response.text();
|
||||
const errorMsg = (await response.json()).detail;
|
||||
setPopup({
|
||||
message: `Failed to delete document set - ${errorMsg}`,
|
||||
type: "error",
|
||||
|
Reference in New Issue
Block a user