Add document-level logging for each batch of indexed documents

This commit is contained in:
Weves 2023-08-15 18:05:39 -07:00 committed by Chris Weaver
parent 8fc74a4313
commit 820f8b7b48
2 changed files with 8 additions and 0 deletions

View File

@ -197,6 +197,10 @@ def run_indexing_jobs(db_session: Session) -> None:
document_count = 0
chunk_count = 0
for doc_batch in doc_batch_generator:
logger.debug(
f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}"
)
index_user_id = (
None if db_credential.public_doc else db_credential.user_id
)

View File

@ -28,6 +28,10 @@ class Document:
semantic_identifier: str
metadata: dict[str, Any]
def to_short_descriptor(self) -> str:
"""Used when logging the identity of a document"""
return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'"
class InputType(str, Enum):
LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file