Tracking update (#3605)

* tracking update * k
2025-05-29 17:19:36 +02:00 · 2025-01-06 09:17:00 -08:00 · 2025-01-06 09:17:00 -08:00 · e3b2c9d944
commit e3b2c9d944
parent 6c9c25642d
1 changed files with 5 additions and 1 deletions
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@ -338,7 +338,7 @@ class VespaIndex(DocumentIndex):
            # `old_version` documents.

            enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = []
-            for document_id, _ in doc_id_to_previous_chunk_cnt.items():
+            for document_id, doc_count in doc_id_to_previous_chunk_cnt.items():
                last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None)
                # If the document has no `chunk_count` in the database, we know that it
                # has the old chunk ID system and we must check for the final chunk index
@ -356,6 +356,10 @@ class VespaIndex(DocumentIndex):
                        http_client=http_client,
                    )

+                # If the document has previously indexed chunks, we know it previously existed
+                if doc_count or last_indexed_chunk:
+                    existing_docs.add(document_id)
+
                enriched_doc_info = EnrichedDocumentIndexingInfo(
                    doc_id=document_id,
                    chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),