mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-30 17:50:27 +02:00
* wip checkpointing/continue on failure more stuff for checkpointing Basic implementation FE stuff More checkpointing/failure handling rebase rebase initial scaffolding for IT IT to test checkpointing Cleanup cleanup Fix it Rebase Add todo Fix actions IT Test more Pagination + fixes + cleanup Fix IT networking fix it * rebase * Address misc comments * Address comments * Remove unused router * rebase * Fix mypy * Fixes * fix it * Fix tests * Add drop index * Add retries * reset lock timeout * Try hard drop of schema * Add timeout/retries to downgrade * rebase * test * test * test * Close all connections * test closing idle only * Fix it * fix * try using null pool * Test * fix * rebase * log * Fix * apply null pool * Fix other test * Fix quality checks * Test not using the fixture * Fix ordering * fix test * Change pooling behavior
100 lines
3.4 KiB
Python
100 lines
3.4 KiB
Python
import time
|
|
from collections import defaultdict
|
|
from http import HTTPStatus
|
|
|
|
import httpx
|
|
|
|
from onyx.connectors.models import ConnectorFailure
|
|
from onyx.connectors.models import DocumentFailure
|
|
from onyx.document_index.interfaces import DocumentIndex
|
|
from onyx.document_index.interfaces import DocumentInsertionRecord
|
|
from onyx.document_index.interfaces import IndexBatchParams
|
|
from onyx.indexing.models import DocMetadataAwareIndexChunk
|
|
from onyx.utils.logger import setup_logger
|
|
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
def _log_insufficient_storage_error(e: Exception) -> None:
|
|
if isinstance(e, httpx.HTTPStatusError):
|
|
if e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:
|
|
logger.error(
|
|
"NOTE: HTTP Status 507 Insufficient Storage indicates "
|
|
"you need to allocate more memory or disk space to the "
|
|
"Vespa/index container."
|
|
)
|
|
|
|
|
|
def write_chunks_to_vector_db_with_backoff(
|
|
document_index: DocumentIndex,
|
|
chunks: list[DocMetadataAwareIndexChunk],
|
|
index_batch_params: IndexBatchParams,
|
|
) -> tuple[list[DocumentInsertionRecord], list[ConnectorFailure]]:
|
|
"""Tries to insert all chunks in one large batch. If that batch fails for any reason,
|
|
goes document by document to isolate the failure(s).
|
|
|
|
IMPORTANT: must pass in whole documents at a time not individual chunks, since the
|
|
vector DB interface assumes that all chunks for a single document are present.
|
|
"""
|
|
|
|
# first try to write the chunks to the vector db
|
|
try:
|
|
return (
|
|
list(
|
|
document_index.index(
|
|
chunks=chunks,
|
|
index_batch_params=index_batch_params,
|
|
)
|
|
),
|
|
[],
|
|
)
|
|
except Exception as e:
|
|
logger.exception(
|
|
"Failed to write chunk batch to vector db. Trying individual docs."
|
|
)
|
|
|
|
# give some specific logging on this common failure case.
|
|
_log_insufficient_storage_error(e)
|
|
|
|
# wait a couple seconds just to give the vector db a chance to recover
|
|
time.sleep(2)
|
|
|
|
# try writing each doc one by one
|
|
chunks_for_docs: dict[str, list[DocMetadataAwareIndexChunk]] = defaultdict(list)
|
|
for chunk in chunks:
|
|
chunks_for_docs[chunk.source_document.id].append(chunk)
|
|
|
|
insertion_records: list[DocumentInsertionRecord] = []
|
|
failures: list[ConnectorFailure] = []
|
|
for doc_id, chunks_for_doc in chunks_for_docs.items():
|
|
try:
|
|
insertion_records.extend(
|
|
document_index.index(
|
|
chunks=chunks_for_doc,
|
|
index_batch_params=index_batch_params,
|
|
)
|
|
)
|
|
except Exception as e:
|
|
logger.exception(
|
|
f"Failed to write document chunks for '{doc_id}' to vector db"
|
|
)
|
|
|
|
# give some specific logging on this common failure case.
|
|
_log_insufficient_storage_error(e)
|
|
|
|
failures.append(
|
|
ConnectorFailure(
|
|
failed_document=DocumentFailure(
|
|
document_id=doc_id,
|
|
document_link=(
|
|
chunks_for_doc[0].get_link() if chunks_for_doc else None
|
|
),
|
|
),
|
|
failure_message=str(e),
|
|
exception=e,
|
|
)
|
|
)
|
|
|
|
return insertion_records, failures
|