mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-20 13:05:49 +02:00
Add a bit more logging in indexing pipeline
This commit is contained in:
@@ -85,7 +85,7 @@ def _indexing_pipeline(
|
|||||||
doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at
|
doc.id: doc.doc_updated_at for doc in db_docs if doc.doc_updated_at
|
||||||
}
|
}
|
||||||
|
|
||||||
updatable_docs = []
|
updatable_docs: list[Document] = []
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
if (
|
if (
|
||||||
doc.id in id_update_time_map
|
doc.id in id_update_time_map
|
||||||
@@ -107,12 +107,12 @@ def _indexing_pipeline(
|
|||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.debug("Starting chunking")
|
||||||
chunks: list[DocAwareChunk] = list(
|
chunks: list[DocAwareChunk] = list(
|
||||||
chain(*[chunker.chunk(document=document) for document in updatable_docs])
|
chain(*[chunker.chunk(document=document) for document in updatable_docs])
|
||||||
)
|
)
|
||||||
logger.debug(
|
|
||||||
f"Indexing the following chunks: {[chunk.to_short_descriptor() for chunk in chunks]}"
|
logger.debug("Starting embedding")
|
||||||
)
|
|
||||||
chunks_with_embeddings = embedder.embed(chunks=chunks)
|
chunks_with_embeddings = embedder.embed(chunks=chunks)
|
||||||
|
|
||||||
# Attach the latest status from Postgres (source of truth for access) to each
|
# Attach the latest status from Postgres (source of truth for access) to each
|
||||||
@@ -138,6 +138,9 @@ def _indexing_pipeline(
|
|||||||
for chunk in chunks_with_embeddings
|
for chunk in chunks_with_embeddings
|
||||||
]
|
]
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Indexing the following chunks: {[chunk.to_short_descriptor() for chunk in chunks]}"
|
||||||
|
)
|
||||||
# A document will not be spread across different batches, so all the
|
# A document will not be spread across different batches, so all the
|
||||||
# documents with chunks in this set, are fully represented by the chunks
|
# documents with chunks in this set, are fully represented by the chunks
|
||||||
# in this set
|
# in this set
|
||||||
|
Reference in New Issue
Block a user