mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 11:58:34 +02:00
Skip shortcut docs (#1999)
This commit is contained in:
parent
348a2176f0
commit
d619602a6f
@ -165,6 +165,11 @@ def index_doc_batch(
|
||||
if not ignore_time_skip
|
||||
else documents
|
||||
)
|
||||
|
||||
# No docs to update either because the batch is empty or every doc was already indexed
|
||||
if not updatable_docs:
|
||||
return 0, 0
|
||||
|
||||
updatable_ids = [doc.id for doc in updatable_docs]
|
||||
|
||||
# Create records in the source of truth about these documents,
|
||||
@ -184,8 +189,12 @@ def index_doc_batch(
|
||||
]
|
||||
|
||||
logger.debug("Starting embedding")
|
||||
chunks_with_embeddings = embedder.embed_chunks(
|
||||
chunks=chunks,
|
||||
chunks_with_embeddings = (
|
||||
embedder.embed_chunks(
|
||||
chunks=chunks,
|
||||
)
|
||||
if chunks
|
||||
else []
|
||||
)
|
||||
|
||||
# Acquires a lock on the documents so that no other process can modify them
|
||||
|
@ -107,7 +107,7 @@ def upsert_ingestion_doc(
|
||||
db_session=db_session,
|
||||
)
|
||||
|
||||
new_doc, chunks = indexing_pipeline(
|
||||
new_doc, __chunk_count = indexing_pipeline(
|
||||
document_batch=[document],
|
||||
index_attempt_metadata=IndexAttemptMetadata(
|
||||
connector_id=cc_pair.connector_id,
|
||||
|
Loading…
x
Reference in New Issue
Block a user