From af66650ee39188d66b773bdfb4c2d473b6e79742 Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Tue, 3 Sep 2024 10:01:17 -0700 Subject: [PATCH] fail safely if lookup for document fails (#2309) --- backend/danswer/indexing/indexing_pipeline.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 3517b55767d2..afe825d11ec4 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -7,6 +7,7 @@ from pydantic import ConfigDict from sqlalchemy.orm import Session from danswer.access.access import get_access_for_documents +from danswer.access.models import DocumentAccess from danswer.configs.app_configs import ENABLE_MULTIPASS_INDEXING from danswer.configs.app_configs import INDEXING_EXCEPTION_LIMIT from danswer.configs.constants import DEFAULT_BOOST @@ -263,6 +264,8 @@ def index_doc_batch( Note that the documents should already be batched at this point so that it does not inflate the memory requirements""" + no_access = DocumentAccess.build([], [], False) + ctx = index_doc_batch_prepare( document_batch=document_batch, index_attempt_metadata=index_attempt_metadata, @@ -307,7 +310,9 @@ def index_doc_batch( access_aware_chunks = [ DocMetadataAwareIndexChunk.from_index_chunk( index_chunk=chunk, - access=document_id_to_access_info[chunk.source_document.id], + access=document_id_to_access_info.get( + chunk.source_document.id, no_access + ), document_sets=set( document_id_to_document_set.get(chunk.source_document.id, []) ),