From 820f8b7b4880e654384fd59a15ec120e688b7f3a Mon Sep 17 00:00:00 2001 From: Weves Date: Tue, 15 Aug 2023 18:05:39 -0700 Subject: [PATCH] Add document-level logging for each batch of indexed documents --- backend/danswer/background/update.py | 4 ++++ backend/danswer/connectors/models.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index f16d16275..c5617da15 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -197,6 +197,10 @@ def run_indexing_jobs(db_session: Session) -> None: document_count = 0 chunk_count = 0 for doc_batch in doc_batch_generator: + logger.debug( + f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}" + ) + index_user_id = ( None if db_credential.public_doc else db_credential.user_id ) diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index 7dc59d2ec..7bd0c6834 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -28,6 +28,10 @@ class Document: semantic_identifier: str metadata: dict[str, Any] + def to_short_descriptor(self) -> str: + """Used when logging the identity of a document""" + return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'" + class InputType(str, Enum): LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file