Skip shortcut docs (#1999)

This commit is contained in:
Yuhong Sun
2024-07-31 09:51:01 -07:00
committed by GitHub
parent 348a2176f0
commit d619602a6f
2 changed files with 12 additions and 3 deletions

View File

@@ -165,6 +165,11 @@ def index_doc_batch(
if not ignore_time_skip if not ignore_time_skip
else documents else documents
) )
# No docs to update either because the batch is empty or every doc was already indexed
if not updatable_docs:
return 0, 0
updatable_ids = [doc.id for doc in updatable_docs] updatable_ids = [doc.id for doc in updatable_docs]
# Create records in the source of truth about these documents, # Create records in the source of truth about these documents,
@@ -184,8 +189,12 @@ def index_doc_batch(
] ]
logger.debug("Starting embedding") logger.debug("Starting embedding")
chunks_with_embeddings = embedder.embed_chunks( chunks_with_embeddings = (
chunks=chunks, embedder.embed_chunks(
chunks=chunks,
)
if chunks
else []
) )
# Acquires a lock on the documents so that no other process can modify them # Acquires a lock on the documents so that no other process can modify them

View File

@@ -107,7 +107,7 @@ def upsert_ingestion_doc(
db_session=db_session, db_session=db_session,
) )
new_doc, chunks = indexing_pipeline( new_doc, __chunk_count = indexing_pipeline(
document_batch=[document], document_batch=[document],
index_attempt_metadata=IndexAttemptMetadata( index_attempt_metadata=IndexAttemptMetadata(
connector_id=cc_pair.connector_id, connector_id=cc_pair.connector_id,