mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-30 01:30:45 +02:00
Fully remove visit API (#3621)
* v1 * update indexing logic * update updates * nit * clean up args * update for clarity + best practices * nit + logs * fix * minor clean up * remove logs * quick nit
This commit is contained in:
@ -5,6 +5,7 @@ import sys
|
||||
from sqlalchemy import delete
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.db.document import delete_documents_complete__no_commit
|
||||
from onyx.db.enums import ConnectorCredentialPairStatus
|
||||
|
||||
# Modify sys.path
|
||||
@ -38,7 +39,6 @@ from onyx.db.engine import get_session_context_manager
|
||||
from onyx.document_index.factory import get_default_document_index
|
||||
from onyx.file_store.file_store import get_default_file_store
|
||||
from onyx.document_index.document_index_utils import get_both_index_names
|
||||
from onyx.db.document import delete_documents_complete__no_commit
|
||||
|
||||
# pylint: enable=E402
|
||||
# flake8: noqa: E402
|
||||
@ -71,13 +71,16 @@ def _unsafe_deletion(
|
||||
if not documents:
|
||||
break
|
||||
|
||||
document_ids = [document.id for document in documents]
|
||||
for doc_id in document_ids:
|
||||
document_index.delete_single(doc_id)
|
||||
for document in documents:
|
||||
document_index.delete_single(
|
||||
doc_id=document.id,
|
||||
tenant_id=None,
|
||||
chunk_count=document.chunk_count,
|
||||
)
|
||||
|
||||
delete_documents_complete__no_commit(
|
||||
db_session=db_session,
|
||||
document_ids=document_ids,
|
||||
document_ids=[document.id for document in documents],
|
||||
)
|
||||
|
||||
num_docs_deleted += len(documents)
|
||||
@ -216,6 +219,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument(
|
||||
"connector_id", type=int, help="The ID of the connector to delete"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
with get_session_context_manager() as db_session:
|
||||
_delete_connector(args.connector_id, db_session)
|
||||
|
@ -15,6 +15,7 @@ from onyx.db.engine import get_session_context_manager # noqa: E402
|
||||
from onyx.db.document import delete_documents_complete__no_commit # noqa: E402
|
||||
from onyx.db.search_settings import get_current_search_settings # noqa: E402
|
||||
from onyx.document_index.vespa.index import VespaIndex # noqa: E402
|
||||
from onyx.db.document import get_document # noqa: E402
|
||||
|
||||
BATCH_SIZE = 100
|
||||
|
||||
@ -63,6 +64,9 @@ def main() -> None:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
|
||||
|
||||
def process_doc(doc_id: str) -> str | None:
|
||||
document = get_document(doc_id, db_session)
|
||||
if not document:
|
||||
return None
|
||||
# Check if document exists in Vespa first
|
||||
try:
|
||||
chunks = vespa_index.id_based_retrieval(
|
||||
@ -83,7 +87,9 @@ def main() -> None:
|
||||
|
||||
try:
|
||||
print(f"Deleting document {doc_id} in Vespa")
|
||||
chunks_deleted = vespa_index.delete_single(doc_id)
|
||||
chunks_deleted = vespa_index.delete_single(
|
||||
doc_id, tenant_id=None, chunk_count=document.chunk_count
|
||||
)
|
||||
if chunks_deleted > 0:
|
||||
print(
|
||||
f"Deleted {chunks_deleted} chunks for document {doc_id}"
|
||||
|
Reference in New Issue
Block a user