mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-09 12:47:13 +02:00
Add document-level logging for each batch of indexed documents
This commit is contained in:
@@ -197,6 +197,10 @@ def run_indexing_jobs(db_session: Session) -> None:
|
|||||||
document_count = 0
|
document_count = 0
|
||||||
chunk_count = 0
|
chunk_count = 0
|
||||||
for doc_batch in doc_batch_generator:
|
for doc_batch in doc_batch_generator:
|
||||||
|
logger.debug(
|
||||||
|
f"Indexing batch of documents: {[doc.to_short_descriptor() for doc in doc_batch]}"
|
||||||
|
)
|
||||||
|
|
||||||
index_user_id = (
|
index_user_id = (
|
||||||
None if db_credential.public_doc else db_credential.user_id
|
None if db_credential.public_doc else db_credential.user_id
|
||||||
)
|
)
|
||||||
|
@@ -28,6 +28,10 @@ class Document:
|
|||||||
semantic_identifier: str
|
semantic_identifier: str
|
||||||
metadata: dict[str, Any]
|
metadata: dict[str, Any]
|
||||||
|
|
||||||
|
def to_short_descriptor(self) -> str:
|
||||||
|
"""Used when logging the identity of a document"""
|
||||||
|
return f"ID: '{self.id}'; Semantic ID: '{self.semantic_identifier}'"
|
||||||
|
|
||||||
|
|
||||||
class InputType(str, Enum):
|
class InputType(str, Enum):
|
||||||
LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file
|
LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file
|
||||||
|
Reference in New Issue
Block a user