From 8c312482c16e6a7d95f4d5fc9d5c80a088728535 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Thu, 11 Jul 2024 20:38:12 -0700 Subject: [PATCH] fixed id retrieval from zip metadata (#1813) --- backend/danswer/connectors/file/connector.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 77d01394d..998a82f23 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -85,6 +85,11 @@ def _process_file( all_metadata = {**metadata, **file_metadata} if metadata else file_metadata + # add a prefix to avoid conflicts with other connectors + doc_id = f"FILE_CONNECTOR__{file_name}" + if metadata: + doc_id = metadata.get("id") or doc_id + # If this is set, we will show this in the UI as the "name" of the file file_display_name = all_metadata.get("file_display_name") or os.path.basename( file_name @@ -132,7 +137,7 @@ def _process_file( return [ Document( - id=f"FILE_CONNECTOR__{file_name}", # add a prefix to avoid conflicts with other connectors + id=doc_id, sections=[ Section(link=all_metadata.get("link"), text=file_content_raw.strip()) ],