From ec0e55fd390a6f430d900accd464565867680602 Mon Sep 17 00:00:00 2001 From: pablonyx Date: Sun, 16 Feb 2025 12:49:25 -0800 Subject: [PATCH] Seeding count issue (#4009) * k * k * quick nit * nit --- backend/onyx/background/celery/tasks/shared/tasks.py | 1 + backend/onyx/seeding/load_docs.py | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/backend/onyx/background/celery/tasks/shared/tasks.py b/backend/onyx/background/celery/tasks/shared/tasks.py index a27c4723a959..35990570070e 100644 --- a/backend/onyx/background/celery/tasks/shared/tasks.py +++ b/backend/onyx/background/celery/tasks/shared/tasks.py @@ -105,6 +105,7 @@ def document_by_cc_pair_cleanup_task( tenant_id=tenant_id, chunk_count=chunk_count, ) + delete_documents_complete__no_commit( db_session=db_session, document_ids=[document_id], diff --git a/backend/onyx/seeding/load_docs.py b/backend/onyx/seeding/load_docs.py index 5c2a362b2bd6..40e848b3d026 100644 --- a/backend/onyx/seeding/load_docs.py +++ b/backend/onyx/seeding/load_docs.py @@ -61,10 +61,10 @@ def _create_indexable_chunks( doc_updated_at=None, primary_owners=[], secondary_owners=[], - chunk_count=1, + chunk_count=preprocessed_doc["chunk_ind"] + 1, ) - if preprocessed_doc["chunk_ind"] == 0: - ids_to_documents[document.id] = document + + ids_to_documents[document.id] = document chunk = DocMetadataAwareIndexChunk( chunk_id=preprocessed_doc["chunk_ind"], @@ -92,6 +92,7 @@ def _create_indexable_chunks( boost=DEFAULT_BOOST, large_chunk_id=None, ) + chunks.append(chunk) return list(ids_to_documents.values()), chunks @@ -192,6 +193,7 @@ def seed_initial_documents( last_successful_index_time=last_index_time, seeding_flow=True, ) + cc_pair_id = cast(int, result.data) processed_docs = fetch_versioned_implementation( "onyx.seeding.load_docs", @@ -249,4 +251,5 @@ def seed_initial_documents( .values(chunk_count=doc.chunk_count) ) + db_session.commit() kv_store.store(KV_DOCUMENTS_SEEDED_KEY, True)