From e3b2c9d944ee04332d510fe5764118536e44a0ad Mon Sep 17 00:00:00 2001 From: pablonyx Date: Mon, 6 Jan 2025 09:17:00 -0800 Subject: [PATCH] Tracking update (#3605) * tracking update * k --- backend/onyx/document_index/vespa/index.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/backend/onyx/document_index/vespa/index.py b/backend/onyx/document_index/vespa/index.py index eccd79252..80e5504fa 100644 --- a/backend/onyx/document_index/vespa/index.py +++ b/backend/onyx/document_index/vespa/index.py @@ -338,7 +338,7 @@ class VespaIndex(DocumentIndex): # `old_version` documents. enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = [] - for document_id, _ in doc_id_to_previous_chunk_cnt.items(): + for document_id, doc_count in doc_id_to_previous_chunk_cnt.items(): last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None) # If the document has no `chunk_count` in the database, we know that it # has the old chunk ID system and we must check for the final chunk index @@ -356,6 +356,10 @@ class VespaIndex(DocumentIndex): http_client=http_client, ) + # If the document has previously indexed chunks, we know it previously existed + if doc_count or last_indexed_chunk: + existing_docs.add(document_id) + enriched_doc_info = EnrichedDocumentIndexingInfo( doc_id=document_id, chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),