Fully remove visit API (#3621)

* v1 * update indexing logic * update updates * nit * clean up args * update for clarity + best practices * nit + logs * fix * minor clean up * remove logs * quick nit
2025-07-04 03:31:23 +02:00 · 2025-01-08 13:49:01 -08:00
parent eac73a1bf1
commit d7bc32c0ec
15 changed files with 397 additions and 254 deletions
--- a/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py
+++ b/backend/onyx/background/celery/tasks/shared/RetryDocumentIndex.py
@ -28,13 +28,35 @@ class RetryDocumentIndex:
        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),
        stop=stop_after_delay(STOP_AFTER),
    )
-    def delete_single(self, doc_id: str) -> int:
+    def delete_single(
-        return self.index.delete_single(doc_id)
+        self,
        doc_id: str,
        *,
        tenant_id: str | None,
        chunk_count: int | None,
    ) -> int:
        return self.index.delete_single(
            doc_id,
            tenant_id=tenant_id,
            chunk_count=chunk_count,
        )
    @retry(
        retry=retry_if_exception_type(httpx.ReadTimeout),
        wait=wait_random_exponential(multiplier=1, max=MAX_WAIT),
        stop=stop_after_delay(STOP_AFTER),
    )
-    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+    def update_single(
-        return self.index.update_single(doc_id, fields)
+        self,
        doc_id: str,
        *,
        tenant_id: str | None,
        chunk_count: int | None,
        fields: VespaDocumentFields,
    ) -> int:
        return self.index.update_single(
            doc_id,
            tenant_id=tenant_id,
            chunk_count=chunk_count,
            fields=fields,
        )
--- a/backend/onyx/background/celery/tasks/shared/tasks.py
+++ b/backend/onyx/background/celery/tasks/shared/tasks.py
@ -12,6 +12,7 @@ from onyx.background.celery.tasks.shared.RetryDocumentIndex import RetryDocument
 from onyx.configs.constants import OnyxCeleryTask
 from onyx.db.document import delete_document_by_connector_credential_pair__no_commit
 from onyx.db.document import delete_documents_complete__no_commit
 from onyx.db.document import fetch_chunk_count_for_document
 from onyx.db.document import get_document
 from onyx.db.document import get_document_connector_count
 from onyx.db.document import mark_document_as_modified
@ -80,7 +81,13 @@ def document_by_cc_pair_cleanup_task(
                # delete it from vespa and the db
                action = "delete"
-                chunks_affected = retry_index.delete_single(document_id)
+                chunk_count = fetch_chunk_count_for_document(document_id, db_session)
                chunks_affected = retry_index.delete_single(
                    document_id,
                    tenant_id=tenant_id,
                    chunk_count=chunk_count,
                )
                delete_documents_complete__no_commit(
                    db_session=db_session,
                    document_ids=[document_id],
@ -110,7 +117,12 @@ def document_by_cc_pair_cleanup_task(
                )
                # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-                chunks_affected = retry_index.update_single(document_id, fields=fields)
+                chunks_affected = retry_index.update_single(
                    document_id,
                    tenant_id=tenant_id,
                    chunk_count=doc.chunk_count,
                    fields=fields,
                )
                # there are still other cc_pair references to the doc, so just resync to Vespa
                delete_document_by_connector_credential_pair__no_commit(
--- a/backend/onyx/background/celery/tasks/vespa/tasks.py
+++ b/backend/onyx/background/celery/tasks/vespa/tasks.py
@ -992,7 +992,12 @@ def vespa_metadata_sync_task(
            )
            # update Vespa. OK if doc doesn't exist. Raises exception otherwise.
-            chunks_affected = retry_index.update_single(document_id, fields)
+            chunks_affected = retry_index.update_single(
                document_id,
                tenant_id=tenant_id,
                chunk_count=doc.chunk_count,
                fields=fields,
            )
            # update db last. Worst case = we crash right before this and
            # the sync might repeat again later
--- a/backend/onyx/db/document.py
+++ b/backend/onyx/db/document.py
@ -685,20 +685,27 @@ def get_document_sources(
 def fetch_chunk_counts_for_documents(
    document_ids: list[str],
    db_session: Session,
-) -> list[tuple[str, int | None]]:
+) -> list[tuple[str, int]]:
    """
    Return a list of (document_id, chunk_count) tuples.
-    Note: chunk_count might be None if not set in DB,
+    If a document_id is not found in the database, it will be returned with a chunk_count of 0.
    so we declare it as Optional[int].
    """
    stmt = select(DbDocument.id, DbDocument.chunk_count).where(
        DbDocument.id.in_(document_ids)
    )
    # results is a list of 'Row' objects, each containing two columns
    results = db_session.execute(stmt).all()
-    # If DbDocument.id is guaranteed to be a string, you can just do row.id;
+    # Create a dictionary of document_id to chunk_count
-    # otherwise cast to str if you need to be sure it's a string:
+    chunk_counts = {str(row.id): row.chunk_count or 0 for row in results}
-    return [(str(row[0]), row[1]) for row in results]
+
-    # or row.id, row.chunk_count if they are named attributes in your ORM model
+    # Return a list of tuples, using 0 for documents not found in the database
    return [(doc_id, chunk_counts.get(doc_id, 0)) for doc_id in document_ids]
 def fetch_chunk_count_for_document(
    document_id: str,
    db_session: Session,
 ) -> int | None:
    stmt = select(DbDocument.chunk_count).where(DbDocument.id == document_id)
    return db_session.execute(stmt).scalar_one_or_none()
--- a/backend/onyx/document_index/document_index_utils.py
+++ b/backend/onyx/document_index/document_index_utils.py
@ -8,7 +8,7 @@ from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
 from onyx.indexing.models import DocMetadataAwareIndexChunk
-
+from shared_configs.configs import MULTI_TENANT
 DEFAULT_BATCH_SIZE = 30
 DEFAULT_INDEX_NAME = "danswer_chunk"
@ -37,7 +37,10 @@ def translate_boost_count_to_multiplier(boost: int) -> float:
    return 2 / (1 + math.exp(-1 * boost / 3))
-def assemble_document_chunk_info(
+# Assembles a list of Vespa chunk IDs for a document
 # given the required context. This can be used to directly query
 # Vespa's Document API.
 def get_document_chunk_ids(
    enriched_document_info_list: list[EnrichedDocumentIndexingInfo],
    tenant_id: str | None,
    large_chunks_enabled: bool,
@ -110,10 +113,11 @@ def get_uuid_from_chunk_info(
        "large_" + str(large_chunk_id) if large_chunk_id is not None else str(chunk_id)
    )
    unique_identifier_string = "_".join([doc_str, chunk_index])
-    if tenant_id:
+    if tenant_id and MULTI_TENANT:
        unique_identifier_string += "_" + tenant_id
-    return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
+    uuid_value = uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
    return uuid_value
 def get_uuid_from_chunk_info_old(
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@ -109,7 +109,7 @@ class UpdateRequest:
    Does not update any of the None fields
    """
-    document_ids: list[str]
+    minimal_document_indexing_info: list[MinimalDocumentIndexingInfo]
    # all other fields except these 4 will always be left alone by the update request
    access: DocumentAccess | None = None
    document_sets: set[str] | None = None
@ -136,7 +136,7 @@ class Verifiable(abc.ABC):
        index_name: str,
        secondary_index_name: str | None,
        *args: Any,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> None:
        super().__init__(*args, **kwargs)
        self.index_name = index_name
@ -218,7 +218,13 @@ class Deletable(abc.ABC):
    """
    @abc.abstractmethod
-    def delete_single(self, doc_id: str) -> int:
+    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str | None,
        chunk_count: int | None,
    ) -> int:
        """
        Given a single document id, hard delete it from the document index
@ -239,7 +245,14 @@ class Updatable(abc.ABC):
    """
    @abc.abstractmethod
-    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+    def update_single(
        self,
        doc_id: str,
        *,
        tenant_id: str | None,
        chunk_count: int | None,
        fields: VespaDocumentFields,
    ) -> int:
        """
        Updates all chunks for a document with the specified fields.
        None values mean that the field does not need an update.
@ -257,7 +270,9 @@ class Updatable(abc.ABC):
        raise NotImplementedError
    @abc.abstractmethod
-    def update(self, update_requests: list[UpdateRequest]) -> None:
+    def update(
        self, update_requests: list[UpdateRequest], *, tenant_id: str | None
    ) -> None:
        """
        Updates some set of chunks. The document and fields to update are specified in the update
        requests. Each update request in the list applies its changes to a list of document ids.
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@ -13,11 +13,11 @@ from datetime import timedelta
 from typing import BinaryIO
 from typing import cast
 from typing import List
 from uuid import UUID
 import httpx  # type: ignore
 import requests  # type: ignore
 from onyx.configs.app_configs import DOCUMENT_INDEX_NAME
 from onyx.configs.chat_configs import DOC_TIME_DECAY
 from onyx.configs.chat_configs import NUM_RETURNED_HITS
 from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
@ -25,7 +25,8 @@ from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
-from onyx.document_index.document_index_utils import assemble_document_chunk_info
+from onyx.db.engine import get_session_with_tenant
 from onyx.document_index.document_index_utils import get_document_chunk_ids
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import DocumentInsertionRecord
 from onyx.document_index.interfaces import EnrichedDocumentIndexingInfo
@ -35,9 +36,6 @@ from onyx.document_index.interfaces import UpdateRequest
 from onyx.document_index.interfaces import VespaChunkRequest
 from onyx.document_index.interfaces import VespaDocumentFields
 from onyx.document_index.vespa.chunk_retrieval import batch_search_api_retrieval
 from onyx.document_index.vespa.chunk_retrieval import (
    get_all_vespa_ids_for_document_id,
 )
 from onyx.document_index.vespa.chunk_retrieval import (
    parallel_visit_api_retrieval,
 )
@ -46,6 +44,9 @@ from onyx.document_index.vespa.deletion import delete_vespa_chunks
 from onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks
 from onyx.document_index.vespa.indexing_utils import check_for_final_chunk_existence
 from onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy
 from onyx.document_index.vespa.indexing_utils import (
    get_multipass_config,
 )
 from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
 from onyx.document_index.vespa.shared_utils.utils import (
    replace_invalid_doc_id_characters,
@ -337,40 +338,25 @@ class VespaIndex(DocumentIndex):
            # documents that have `chunk_count` in the database, but not for
            # `old_version` documents.
-            enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = []
+            enriched_doc_infos: list[EnrichedDocumentIndexingInfo] = [
-            for document_id, doc_count in doc_id_to_previous_chunk_cnt.items():
+                VespaIndex.enrich_basic_chunk_info(
-                last_indexed_chunk = doc_id_to_previous_chunk_cnt.get(document_id, None)
+                    index_name=self.index_name,
-                # If the document has no `chunk_count` in the database, we know that it
+                    http_client=http_client,
-                # has the old chunk ID system and we must check for the final chunk index
+                    document_id=doc_id,
-                is_old_version = False
+                    previous_chunk_count=doc_id_to_previous_chunk_cnt.get(doc_id, 0),
-                if last_indexed_chunk is None:
+                    new_chunk_count=doc_id_to_new_chunk_cnt.get(doc_id, 0),
                    is_old_version = True
                    minimal_doc_info = MinimalDocumentIndexingInfo(
                        doc_id=document_id,
                        chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
                    )
                    last_indexed_chunk = check_for_final_chunk_existence(
                        minimal_doc_info=minimal_doc_info,
                        start_index=doc_id_to_new_chunk_cnt[document_id],
                        index_name=self.index_name,
                        http_client=http_client,
                    )
                # If the document has previously indexed chunks, we know it previously existed
                if doc_count or last_indexed_chunk:
                    existing_docs.add(document_id)
                enriched_doc_info = EnrichedDocumentIndexingInfo(
                    doc_id=document_id,
                    chunk_start_index=doc_id_to_new_chunk_cnt.get(document_id, 0),
                    chunk_end_index=last_indexed_chunk,
                    old_version=is_old_version,
                )
-                enriched_doc_infos.append(enriched_doc_info)
+                for doc_id in doc_id_to_new_chunk_cnt.keys()
            ]
            for cleaned_doc_info in enriched_doc_infos:
                # If the document has previously indexed chunks, we know it previously existed
                if cleaned_doc_info.chunk_end_index:
                    existing_docs.add(cleaned_doc_info.doc_id)
            # Now, for each doc, we know exactly where to start and end our deletion
            # So let's generate the chunk IDs for each chunk to delete
-            chunks_to_delete = assemble_document_chunk_info(
+            chunks_to_delete = get_document_chunk_ids(
                enriched_document_info_list=enriched_doc_infos,
                tenant_id=tenant_id,
                large_chunks_enabled=large_chunks_enabled,
@ -447,21 +433,21 @@ class VespaIndex(DocumentIndex):
                        failure_msg = f"Failed to update document: {future_to_document_id[future]}"
                        raise requests.HTTPError(failure_msg) from e
-    def update(self, update_requests: list[UpdateRequest]) -> None:
+    def update(
        self, update_requests: list[UpdateRequest], *, tenant_id: str | None
    ) -> None:
        logger.debug(f"Updating {len(update_requests)} documents in Vespa")
        # Handle Vespa character limitations
        # Mutating update_requests but it's not used later anyway
        for update_request in update_requests:
-            update_request.document_ids = [
+            for doc_info in update_request.minimal_document_indexing_info:
-                replace_invalid_doc_id_characters(doc_id)
+                doc_info.doc_id = replace_invalid_doc_id_characters(doc_info.doc_id)
                for doc_id in update_request.document_ids
            ]
        update_start = time.monotonic()
        processed_updates_requests: list[_VespaUpdateRequest] = []
-        all_doc_chunk_ids: dict[str, list[str]] = {}
+        all_doc_chunk_ids: dict[str, list[UUID]] = {}
        # Fetch all chunks for each document ahead of time
        index_names = [self.index_name]
@ -469,30 +455,24 @@ class VespaIndex(DocumentIndex):
            index_names.append(self.secondary_index_name)
        chunk_id_start_time = time.monotonic()
-        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
+        with get_vespa_http_client() as http_client:
-            future_to_doc_chunk_ids = {
+            for update_request in update_requests:
-                executor.submit(
+                for doc_info in update_request.minimal_document_indexing_info:
-                    get_all_vespa_ids_for_document_id,
+                    for index_name in index_names:
-                    document_id=document_id,
+                        doc_chunk_info = VespaIndex.enrich_basic_chunk_info(
-                    index_name=index_name,
+                            index_name=index_name,
-                    filters=None,
+                            http_client=http_client,
-                    get_large_chunks=True,
+                            document_id=doc_info.doc_id,
-                ): (document_id, index_name)
+                            previous_chunk_count=doc_info.chunk_start_index,
-                for index_name in index_names
+                            new_chunk_count=0,
-                for update_request in update_requests
+                        )
-                for document_id in update_request.document_ids
+                        doc_chunk_ids = get_document_chunk_ids(
-            }
+                            enriched_document_info_list=[doc_chunk_info],
-            for future in concurrent.futures.as_completed(future_to_doc_chunk_ids):
+                            tenant_id=tenant_id,
-                document_id, index_name = future_to_doc_chunk_ids[future]
+                            large_chunks_enabled=False,
-                try:
+                        )
-                    doc_chunk_ids = future.result()
+                        all_doc_chunk_ids[doc_info.doc_id] = doc_chunk_ids
-                    if document_id not in all_doc_chunk_ids:
+
                        all_doc_chunk_ids[document_id] = []
                    all_doc_chunk_ids[document_id].extend(doc_chunk_ids)
                except Exception as e:
                    logger.error(
                        f"Error retrieving chunk IDs for document {document_id} in index {index_name}: {e}"
                    )
        logger.debug(
            f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs"
        )
@ -521,11 +501,11 @@ class VespaIndex(DocumentIndex):
                logger.error("Update request received but nothing to update")
                continue
-            for document_id in update_request.document_ids:
+            for doc_info in update_request.minimal_document_indexing_info:
-                for doc_chunk_id in all_doc_chunk_ids[document_id]:
+                for doc_chunk_id in all_doc_chunk_ids[doc_info.doc_id]:
                    processed_updates_requests.append(
                        _VespaUpdateRequest(
-                            document_id=document_id,
+                            document_id=doc_info.doc_id,
                            url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}",
                            update_request=update_dict,
                        )
@ -537,36 +517,70 @@ class VespaIndex(DocumentIndex):
            time.monotonic() - update_start,
        )
-    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+    def update_single_chunk(
        self,
        doc_chunk_id: UUID,
        index_name: str,
        fields: VespaDocumentFields,
        doc_id: str,
    ) -> None:
        """
        Update a single "chunk" (document) in Vespa using its chunk ID.
        """
        update_dict: dict[str, dict] = {"fields": {}}
        if fields.boost is not None:
            update_dict["fields"][BOOST] = {"assign": fields.boost}
        if fields.document_sets is not None:
            # WeightedSet<string> needs a map { item: weight, ... }
            update_dict["fields"][DOCUMENT_SETS] = {
                "assign": {document_set: 1 for document_set in fields.document_sets}
            }
        if fields.access is not None:
            # Similar to above
            update_dict["fields"][ACCESS_CONTROL_LIST] = {
                "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
            }
        if fields.hidden is not None:
            update_dict["fields"][HIDDEN] = {"assign": fields.hidden}
        if not update_dict["fields"]:
            logger.error("Update request received but nothing to update.")
            return
        vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}?create=true"
        with get_vespa_http_client(http2=False) as http_client:
            try:
                resp = http_client.put(
                    vespa_url,
                    headers={"Content-Type": "application/json"},
                    json=update_dict,
                )
                resp.raise_for_status()
            except httpx.HTTPStatusError as e:
                error_message = f"Failed to update doc chunk {doc_chunk_id} (doc_id={doc_id}). Details: {e.response.text}"
                logger.error(error_message)
                raise
    def update_single(
        self,
        doc_id: str,
        *,
        chunk_count: int | None,
        tenant_id: str | None,
        fields: VespaDocumentFields,
    ) -> int:
        """Note: if the document id does not exist, the update will be a no-op and the
        function will complete with no errors or exceptions.
        Handle other exceptions if you wish to implement retry behavior
        """
-        total_chunks_updated = 0
+        doc_chunk_count = 0
        # Handle Vespa character limitations
        # Mutating update_request but it's not used later anyway
        normalized_doc_id = replace_invalid_doc_id_characters(doc_id)
        # Build the _VespaUpdateRequest objects
        update_dict: dict[str, dict] = {"fields": {}}
        if fields.boost is not None:
            update_dict["fields"][BOOST] = {"assign": fields.boost}
        if fields.document_sets is not None:
            update_dict["fields"][DOCUMENT_SETS] = {
                "assign": {document_set: 1 for document_set in fields.document_sets}
            }
        if fields.access is not None:
            update_dict["fields"][ACCESS_CONTROL_LIST] = {
                "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
            }
        if fields.hidden is not None:
            update_dict["fields"][HIDDEN] = {"assign": fields.hidden}
        if not update_dict["fields"]:
            logger.error("Update request received but nothing to update")
            return 0
        index_names = [self.index_name]
        if self.secondary_index_name:
@ -574,66 +588,51 @@ class VespaIndex(DocumentIndex):
        with get_vespa_http_client(http2=False) as http_client:
            for index_name in index_names:
-                params = httpx.QueryParams(
+                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-                    {
+                    multipass_config = get_multipass_config(
-                        "selection": f"{index_name}.document_id=='{normalized_doc_id}'",
+                        db_session=db_session,
-                        "cluster": DOCUMENT_INDEX_NAME,
+                        primary_index=index_name == self.index_name,
-                    }
+                    )
                    large_chunks_enabled = multipass_config.enable_large_chunks
                enriched_doc_infos = VespaIndex.enrich_basic_chunk_info(
                    index_name=index_name,
                    http_client=http_client,
                    document_id=doc_id,
                    previous_chunk_count=chunk_count,
                    new_chunk_count=0,
                )
-                while True:
+                doc_chunk_ids = get_document_chunk_ids(
-                    try:
+                    enriched_document_info_list=[enriched_doc_infos],
-                        vespa_url = (
+                    tenant_id=tenant_id,
-                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}"
+                    large_chunks_enabled=large_chunks_enabled,
                        )
                        logger.debug(f'update_single PUT on URL "{vespa_url}"')
                        resp = http_client.put(
                            vespa_url,
                            params=params,
                            headers={"Content-Type": "application/json"},
                            json=update_dict,
                        )
                        resp.raise_for_status()
                    except httpx.HTTPStatusError as e:
                        logger.error(
                            f"Failed to update chunks, details: {e.response.text}"
                        )
                        raise
                    resp_data = resp.json()
                    if "documentCount" in resp_data:
                        chunks_updated = resp_data["documentCount"]
                        total_chunks_updated += chunks_updated
                    # Check for continuation token to handle pagination
                    if "continuation" not in resp_data:
                        break  # Exit loop if no continuation token
                    if not resp_data["continuation"]:
                        break  # Exit loop if continuation token is empty
                    params = params.set("continuation", resp_data["continuation"])
                logger.debug(
                    f"VespaIndex.update_single: "
                    f"index={index_name} "
                    f"doc={normalized_doc_id} "
                    f"chunks_updated={total_chunks_updated}"
                )
-        return total_chunks_updated
+                logger.info("UPDATING len(doc_chunk_ids)")
-    def delete_single(self, doc_id: str) -> int:
+                doc_chunk_count += len(doc_chunk_ids)
        """Possibly faster overall than the delete method due to using a single
        delete call with a selection query."""
                for doc_chunk_id in doc_chunk_ids:
                    logger.info("UPDATING CHUNK")
                    self.update_single_chunk(
                        doc_chunk_id=doc_chunk_id,
                        index_name=index_name,
                        fields=fields,
                        doc_id=doc_id,
                    )
        logger.info(f"UPDATED A TOTAL OF {doc_chunk_count} CHUNKS for {doc_id}")
        return doc_chunk_count
    def delete_single(
        self,
        doc_id: str,
        *,
        tenant_id: str | None,
        chunk_count: int | None,
    ) -> int:
        total_chunks_deleted = 0
        # Vespa deletion is poorly documented ... luckily we found this
        # https://docs.vespa.ai/en/operations/batch-delete.html#example
        doc_id = replace_invalid_doc_id_characters(doc_id)
        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
@ -642,53 +641,41 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)
-        with get_vespa_http_client(http2=False) as http_client:
+        with get_vespa_http_client(
            http2=False
        ) as http_client, concurrent.futures.ThreadPoolExecutor(
            max_workers=NUM_THREADS
        ) as executor:
            for index_name in index_names:
-                params = httpx.QueryParams(
+                with get_session_with_tenant(tenant_id=tenant_id) as db_session:
-                    {
+                    multipass_config = get_multipass_config(
-                        "selection": f"{index_name}.document_id=='{doc_id}'",
+                        db_session=db_session,
-                        "cluster": DOCUMENT_INDEX_NAME,
+                        primary_index=index_name == self.index_name,
-                    }
+                    )
                    large_chunks_enabled = multipass_config.enable_large_chunks
                enriched_doc_infos = VespaIndex.enrich_basic_chunk_info(
                    index_name=index_name,
                    http_client=http_client,
                    document_id=doc_id,
                    previous_chunk_count=chunk_count,
                    new_chunk_count=0,
                )
-
+                chunks_to_delete = get_document_chunk_ids(
-                while True:
+                    enriched_document_info_list=[enriched_doc_infos],
-                    try:
+                    tenant_id=tenant_id,
-                        vespa_url = (
+                    large_chunks_enabled=large_chunks_enabled,
                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}"
                        )
                        logger.debug(f'delete_single DELETE on URL "{vespa_url}"')
                        resp = http_client.delete(
                            vespa_url,
                            params=params,
                        )
                        resp.raise_for_status()
                    except httpx.HTTPStatusError as e:
                        logger.error(
                            f"Failed to delete chunk, details: {e.response.text}"
                        )
                        raise
                    resp_data = resp.json()
                    if "documentCount" in resp_data:
                        chunks_deleted = resp_data["documentCount"]
                        total_chunks_deleted += chunks_deleted
                    # Check for continuation token to handle pagination
                    if "continuation" not in resp_data:
                        break  # Exit loop if no continuation token
                    if not resp_data["continuation"]:
                        break  # Exit loop if continuation token is empty
                    params = params.set("continuation", resp_data["continuation"])
                logger.debug(
                    f"VespaIndex.delete_single: "
                    f"index={index_name} "
                    f"doc={doc_id} "
                    f"chunks_deleted={total_chunks_deleted}"
                )
                for doc_chunk_ids_batch in batch_generator(
                    chunks_to_delete, BATCH_SIZE
                ):
                    total_chunks_deleted += len(doc_chunk_ids_batch)
                    delete_vespa_chunks(
                        doc_chunk_ids=doc_chunk_ids_batch,
                        index_name=index_name,
                        http_client=http_client,
                        executor=executor,
                    )
        return total_chunks_deleted
@ -787,8 +774,51 @@ class VespaIndex(DocumentIndex):
        return query_vespa(params)
    # Retrieves chunk information for a document:
    # - Determines the last indexed chunk
    # - Identifies if the document uses the old or new chunk ID system
    # This data is crucial for Vespa document updates without relying on the visit API.
    @classmethod
-    def delete_entries_by_tenant_id(cls, tenant_id: str, index_name: str) -> None:
+    def enrich_basic_chunk_info(
        cls,
        index_name: str,
        http_client: httpx.Client,
        document_id: str,
        previous_chunk_count: int | None = None,
        new_chunk_count: int = 0,
    ) -> EnrichedDocumentIndexingInfo:
        last_indexed_chunk = previous_chunk_count
        # If the document has no `chunk_count` in the database, we know that it
        # has the old chunk ID system and we must check for the final chunk index
        is_old_version = False
        if last_indexed_chunk is None:
            is_old_version = True
            minimal_doc_info = MinimalDocumentIndexingInfo(
                doc_id=document_id, chunk_start_index=new_chunk_count
            )
            last_indexed_chunk = check_for_final_chunk_existence(
                minimal_doc_info=minimal_doc_info,
                start_index=new_chunk_count,
                index_name=index_name,
                http_client=http_client,
            )
        enriched_doc_info = EnrichedDocumentIndexingInfo(
            doc_id=document_id,
            chunk_start_index=new_chunk_count,
            chunk_end_index=last_indexed_chunk,
            old_version=is_old_version,
        )
        return enriched_doc_info
    @classmethod
    def delete_entries_by_tenant_id(
        cls,
        *,
        tenant_id: str,
        index_name: str,
    ) -> None:
        """
        Deletes all entries in the specified index with the given tenant_id.
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@ -7,10 +7,15 @@ from http import HTTPStatus
 import httpx
 from retry import retry
 from sqlalchemy.orm import Session
 from onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING
 from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
    get_experts_stores_representations,
 )
 from onyx.db.models import SearchSettings
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.search_settings import get_secondary_search_settings
 from onyx.document_index.document_index_utils import get_uuid_from_chunk
 from onyx.document_index.document_index_utils import get_uuid_from_chunk_info_old
 from onyx.document_index.interfaces import MinimalDocumentIndexingInfo
@ -45,6 +50,8 @@ from onyx.document_index.vespa_constants import TENANT_ID
 from onyx.document_index.vespa_constants import TITLE
 from onyx.document_index.vespa_constants import TITLE_EMBEDDING
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.indexing.models import EmbeddingProvider
 from onyx.indexing.models import MultipassConfig
 from onyx.utils.logger import setup_logger
 logger = setup_logger()
@ -129,7 +136,9 @@ def _index_vespa_chunk(
    document = chunk.source_document
    # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself
    vespa_chunk_id = str(get_uuid_from_chunk(chunk))
    embeddings = chunk.embeddings
    embeddings_name_vector_map = {"full_chunk": embeddings.full_embedding}
@ -263,5 +272,49 @@ def check_for_final_chunk_existence(
        )
        if not _does_doc_chunk_exist(doc_chunk_id, index_name, http_client):
            return index
        index += 1
 def should_use_multipass(search_settings: SearchSettings | None) -> bool:
    """
    Determines whether multipass should be used based on the search settings
    or the default config if settings are unavailable.
    """
    if search_settings is not None:
        return search_settings.multipass_indexing
    return ENABLE_MULTIPASS_INDEXING
 def can_use_large_chunks(multipass: bool, search_settings: SearchSettings) -> bool:
    """
    Given multipass usage and an embedder, decides whether large chunks are allowed
    based on model/provider constraints.
    """
    # Only local models that support a larger context are from Nomic
    # Cohere does not support larger contexts (they recommend not going above ~512 tokens)
    return (
        multipass
        and search_settings.model_name.startswith("nomic-ai")
        and search_settings.provider_type != EmbeddingProvider.COHERE
    )
 def get_multipass_config(
    db_session: Session, primary_index: bool = True
 ) -> MultipassConfig:
    """
    Determines whether to enable multipass and large chunks by examining
    the current search settings and the embedder configuration.
    """
    search_settings = (
        get_current_search_settings(db_session)
        if primary_index
        else get_secondary_search_settings(db_session)
    )
    multipass = should_use_multipass(search_settings)
    if not search_settings:
        return MultipassConfig(multipass_indexing=False, enable_large_chunks=False)
    enable_large_chunks = can_use_large_chunks(multipass, search_settings)
    return MultipassConfig(
        multipass_indexing=multipass, enable_large_chunks=enable_large_chunks
    )
--- a/backend/onyx/indexing/indexing_pipeline.py
+++ b/backend/onyx/indexing/indexing_pipeline.py
@ -11,7 +11,6 @@ from sqlalchemy.orm import Session
 from onyx.access.access import get_access_for_documents
 from onyx.access.models import DocumentAccess
 from onyx.configs.app_configs import ENABLE_MULTIPASS_INDEXING
 from onyx.configs.app_configs import INDEXING_EXCEPTION_LIMIT
 from onyx.configs.app_configs import MAX_DOCUMENT_CHARS
 from onyx.configs.constants import DEFAULT_BOOST
@ -31,12 +30,14 @@ from onyx.db.document import upsert_documents
 from onyx.db.document_set import fetch_document_sets_for_documents
 from onyx.db.index_attempt import create_index_attempt_error
 from onyx.db.models import Document as DBDocument
 from onyx.db.search_settings import get_current_search_settings
 from onyx.db.tag import create_or_add_document_tag
 from onyx.db.tag import create_or_add_document_tag_list
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import DocumentMetadata
 from onyx.document_index.interfaces import IndexBatchParams
 from onyx.document_index.vespa.indexing_utils import (
    get_multipass_config,
 )
 from onyx.indexing.chunker import Chunker
 from onyx.indexing.embedder import IndexingEmbedder
 from onyx.indexing.indexing_heartbeat import IndexingHeartbeatInterface
@ -44,7 +45,6 @@ from onyx.indexing.models import DocAwareChunk
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from onyx.utils.logger import setup_logger
 from onyx.utils.timing import log_function_time
 from shared_configs.enums import EmbeddingProvider
 logger = setup_logger()
@ -479,28 +479,6 @@ def index_doc_batch(
    return result
 def check_enable_large_chunks_and_multipass(
    embedder: IndexingEmbedder, db_session: Session
 ) -> tuple[bool, bool]:
    search_settings = get_current_search_settings(db_session)
    multipass = (
        search_settings.multipass_indexing
        if search_settings
        else ENABLE_MULTIPASS_INDEXING
    )
    enable_large_chunks = (
        multipass
        and
        # Only local models that supports larger context are from Nomic
        (embedder.model_name.startswith("nomic-ai"))
        and
        # Cohere does not support larger context they recommend not going above 512 tokens
        embedder.provider_type != EmbeddingProvider.COHERE
    )
    return multipass, enable_large_chunks
 def build_indexing_pipeline(
    *,
    embedder: IndexingEmbedder,
@ -513,14 +491,12 @@ def build_indexing_pipeline(
    callback: IndexingHeartbeatInterface | None = None,
 ) -> IndexingPipelineProtocol:
    """Builds a pipeline which takes in a list (batch) of docs and indexes them."""
-    multipass, enable_large_chunks = check_enable_large_chunks_and_multipass(
+    multipass_config = get_multipass_config(db_session, primary_index=True)
        embedder, db_session
    )
    chunker = chunker or Chunker(
        tokenizer=embedder.embedding_model.tokenizer,
-        enable_multipass=multipass,
+        enable_multipass=multipass_config.multipass_indexing,
-        enable_large_chunks=enable_large_chunks,
+        enable_large_chunks=multipass_config.enable_large_chunks,
        # after every doc, update status in case there are a bunch of really long docs
        callback=callback,
    )
--- a/backend/onyx/indexing/models.py
+++ b/backend/onyx/indexing/models.py
@ -154,3 +154,8 @@ class IndexingSetting(EmbeddingModelDetail):
            index_name=search_settings.index_name,
            multipass_indexing=search_settings.multipass_indexing,
        )
 class MultipassConfig(BaseModel):
    multipass_indexing: bool
    enable_large_chunks: bool
--- a/backend/scripts/force_delete_connector_by_id.py
+++ b/backend/scripts/force_delete_connector_by_id.py
@ -5,6 +5,7 @@ import sys
 from sqlalchemy import delete
 from sqlalchemy.orm import Session
 from onyx.db.document import delete_documents_complete__no_commit
 from onyx.db.enums import ConnectorCredentialPairStatus
 # Modify sys.path
@ -38,7 +39,6 @@ from onyx.db.engine import get_session_context_manager
 from onyx.document_index.factory import get_default_document_index
 from onyx.file_store.file_store import get_default_file_store
 from onyx.document_index.document_index_utils import get_both_index_names
 from onyx.db.document import delete_documents_complete__no_commit
 # pylint: enable=E402
 # flake8: noqa: E402
@ -71,13 +71,16 @@ def _unsafe_deletion(
        if not documents:
            break
-        document_ids = [document.id for document in documents]
+        for document in documents:
-        for doc_id in document_ids:
+            document_index.delete_single(
-            document_index.delete_single(doc_id)
+                doc_id=document.id,
                tenant_id=None,
                chunk_count=document.chunk_count,
            )
        delete_documents_complete__no_commit(
            db_session=db_session,
-            document_ids=document_ids,
+            document_ids=[document.id for document in documents],
        )
        num_docs_deleted += len(documents)
@ -216,6 +219,7 @@ if __name__ == "__main__":
    parser.add_argument(
        "connector_id", type=int, help="The ID of the connector to delete"
    )
    args = parser.parse_args()
    with get_session_context_manager() as db_session:
        _delete_connector(args.connector_id, db_session)
--- a/backend/scripts/orphan_doc_cleanup_script.py
+++ b/backend/scripts/orphan_doc_cleanup_script.py
@ -15,6 +15,7 @@ from onyx.db.engine import get_session_context_manager  # noqa: E402
 from onyx.db.document import delete_documents_complete__no_commit  # noqa: E402
 from onyx.db.search_settings import get_current_search_settings  # noqa: E402
 from onyx.document_index.vespa.index import VespaIndex  # noqa: E402
 from onyx.db.document import get_document  # noqa: E402
 BATCH_SIZE = 100
@ -63,6 +64,9 @@ def main() -> None:
            with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
                def process_doc(doc_id: str) -> str | None:
                    document = get_document(doc_id, db_session)
                    if not document:
                        return None
                    # Check if document exists in Vespa first
                    try:
                        chunks = vespa_index.id_based_retrieval(
@ -83,7 +87,9 @@ def main() -> None:
                    try:
                        print(f"Deleting document {doc_id} in Vespa")
-                        chunks_deleted = vespa_index.delete_single(doc_id)
+                        chunks_deleted = vespa_index.delete_single(
                            doc_id, tenant_id=None, chunk_count=document.chunk_count
                        )
                        if chunks_deleted > 0:
                            print(
                                f"Deleted {chunks_deleted} chunks for document {doc_id}"
--- a/backend/tests/init.py
+++ b/backend/tests/init.py
--- a/backend/tests/integration/init.py
+++ b/backend/tests/integration/init.py
--- a/backend/tests/integration/common_utils/managers/document.py
+++ b/backend/tests/integration/common_utils/managers/document.py
@ -21,8 +21,9 @@ def _verify_document_permissions(
    group_names: list[str] | None = None,
    doc_creating_user: DATestUser | None = None,
 ) -> None:
-    acl_keys = set(retrieved_doc["access_control_list"].keys())
+    acl_keys = set(retrieved_doc.get("access_control_list", {}).keys())
    print(f"ACL keys: {acl_keys}")
    if cc_pair.access_type == AccessType.PUBLIC:
        if "PUBLIC" not in acl_keys:
            raise ValueError(
@ -42,8 +43,9 @@ def _verify_document_permissions(
        found_group_keys = {key for key in acl_keys if key.startswith("group:")}
        if found_group_keys != expected_group_keys:
            raise ValueError(
-                f"Document {retrieved_doc['document_id']} has incorrect group ACL keys. Found: {found_group_keys}, \n"
+                f"Document {retrieved_doc['document_id']} has incorrect group ACL keys. "
-                f"Expected: {expected_group_keys}"
+                f"Expected: {expected_group_keys}  Found: {found_group_keys}\n"
                f"All ACL keys: {acl_keys}"
            )
    if doc_set_names is not None:
@ -153,9 +155,11 @@ class DocumentManager:
    ) -> None:
        doc_ids = [document.id for document in cc_pair.documents]
        retrieved_docs_dict = vespa_client.get_documents_by_id(doc_ids)["documents"]
        retrieved_docs = {
            doc["fields"]["document_id"]: doc["fields"] for doc in retrieved_docs_dict
        }
        # Left this here for debugging purposes.
        # import json
        # for doc in retrieved_docs.values():