mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-02 11:09:20 +02:00
parent
6c9c25642d
commit
e3b2c9d944
@ -338,7 +338,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
# `old_version` documents.
|
# `old_version` documents.
|
||||||
|
|
||||||
enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = []
|
enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = []
|
||||||
for document_id, _ in doc_id_to_previous_chunk_cnt.items():
|
for document_id, doc_count in doc_id_to_previous_chunk_cnt.items():
|
||||||
last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None)
|
last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None)
|
||||||
# If the document has no `chunk_count` in the database, we know that it
|
# If the document has no `chunk_count` in the database, we know that it
|
||||||
# has the old chunk ID system and we must check for the final chunk index
|
# has the old chunk ID system and we must check for the final chunk index
|
||||||
@ -356,6 +356,10 @@ class VespaIndex(DocumentIndex):
|
|||||||
http_client=http_client,
|
http_client=http_client,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# If the document has previously indexed chunks, we know it previously existed
|
||||||
|
if doc_count or last_indexed_chunk:
|
||||||
|
existing_docs.add(document_id)
|
||||||
|
|
||||||
enriched_doc_info = EnrichedDocumentIndexingInfo(
|
enriched_doc_info = EnrichedDocumentIndexingInfo(
|
||||||
doc_id=document_id,
|
doc_id=document_id,
|
||||||
chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
|
chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user