mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-09 20:55:06 +02:00
Skip shortcut docs (#1999)
This commit is contained in:
@@ -165,6 +165,11 @@ def index_doc_batch(
|
|||||||
if not ignore_time_skip
|
if not ignore_time_skip
|
||||||
else documents
|
else documents
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# No docs to update either because the batch is empty or every doc was already indexed
|
||||||
|
if not updatable_docs:
|
||||||
|
return 0, 0
|
||||||
|
|
||||||
updatable_ids = [doc.id for doc in updatable_docs]
|
updatable_ids = [doc.id for doc in updatable_docs]
|
||||||
|
|
||||||
# Create records in the source of truth about these documents,
|
# Create records in the source of truth about these documents,
|
||||||
@@ -184,8 +189,12 @@ def index_doc_batch(
|
|||||||
]
|
]
|
||||||
|
|
||||||
logger.debug("Starting embedding")
|
logger.debug("Starting embedding")
|
||||||
chunks_with_embeddings = embedder.embed_chunks(
|
chunks_with_embeddings = (
|
||||||
chunks=chunks,
|
embedder.embed_chunks(
|
||||||
|
chunks=chunks,
|
||||||
|
)
|
||||||
|
if chunks
|
||||||
|
else []
|
||||||
)
|
)
|
||||||
|
|
||||||
# Acquires a lock on the documents so that no other process can modify them
|
# Acquires a lock on the documents so that no other process can modify them
|
||||||
|
@@ -107,7 +107,7 @@ def upsert_ingestion_doc(
|
|||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
)
|
)
|
||||||
|
|
||||||
new_doc, chunks = indexing_pipeline(
|
new_doc, __chunk_count = indexing_pipeline(
|
||||||
document_batch=[document],
|
document_batch=[document],
|
||||||
index_attempt_metadata=IndexAttemptMetadata(
|
index_attempt_metadata=IndexAttemptMetadata(
|
||||||
connector_id=cc_pair.connector_id,
|
connector_id=cc_pair.connector_id,
|
||||||
|
Reference in New Issue
Block a user