welcome to onyx

2025-07-03 11:11:45 +02:00 · 2024-12-13 09:48:43 -08:00
parent 54dcbfa288
commit 21ec5ed795
813 changed files with 7021 additions and 6824 deletions
--- a/backend/onyx/document_index/init.py
+++ b/backend/onyx/document_index/init.py
--- a/backend/onyx/document_index/document_index_utils.py
+++ b/backend/onyx/document_index/document_index_utils.py
@ -0,0 +1,60 @@
+import math
+import uuid
+
+from sqlalchemy.orm import Session
+
+from onyx.context.search.models import InferenceChunk
+from onyx.db.search_settings import get_current_search_settings
+from onyx.db.search_settings import get_secondary_search_settings
+from onyx.indexing.models import IndexChunk
+
+
+DEFAULT_BATCH_SIZE = 30
+DEFAULT_INDEX_NAME = "danswer_chunk"
+
+
+def get_both_index_names(db_session: Session) -> tuple[str, str | None]:
+    search_settings = get_current_search_settings(db_session)
+
+    search_settings_new = get_secondary_search_settings(db_session)
+    if not search_settings_new:
+        return search_settings.index_name, None
+
+    return search_settings.index_name, search_settings_new.index_name
+
+
+def translate_boost_count_to_multiplier(boost: int) -> float:
+    """Mapping boost integer values to a multiplier according to a sigmoid curve
+    Piecewise such that at many downvotes, its 0.5x the score and with many upvotes
+    it is 2x the score. This should be in line with the Vespa calculation."""
+    # 3 in the equation below stretches it out to hit asymptotes slower
+    if boost < 0:
+        # 0.5 + sigmoid -> range of 0.5 to 1
+        return 0.5 + (1 / (1 + math.exp(-1 * boost / 3)))
+
+    # 2 x sigmoid -> range of 1 to 2
+    return 2 / (1 + math.exp(-1 * boost / 3))
+
+
+def get_uuid_from_chunk(
+    chunk: IndexChunk | InferenceChunk, mini_chunk_ind: int = 0
+) -> uuid.UUID:
+    doc_str = (
+        chunk.document_id
+        if isinstance(chunk, InferenceChunk)
+        else chunk.source_document.id
+    )
+    # Web parsing URL duplicate catching
+    if doc_str and doc_str[-1] == "/":
+        doc_str = doc_str[:-1]
+    unique_identifier_string = "_".join(
+        [doc_str, str(chunk.chunk_id), str(mini_chunk_ind)]
+    )
+    if chunk.large_chunk_reference_ids:
+        unique_identifier_string += "_large" + "_".join(
+            [
+                str(referenced_chunk_id)
+                for referenced_chunk_id in chunk.large_chunk_reference_ids
+            ]
+        )
+    return uuid.uuid5(uuid.NAMESPACE_X500, unique_identifier_string)
--- a/backend/onyx/document_index/factory.py
+++ b/backend/onyx/document_index/factory.py
@ -0,0 +1,32 @@
+from sqlalchemy.orm import Session
+
+from onyx.db.search_settings import get_current_search_settings
+from onyx.document_index.interfaces import DocumentIndex
+from onyx.document_index.vespa.index import VespaIndex
+from shared_configs.configs import MULTI_TENANT
+
+
+def get_default_document_index(
+    primary_index_name: str,
+    secondary_index_name: str | None,
+) -> DocumentIndex:
+    """Primary index is the index that is used for querying/updating etc.
+    Secondary index is for when both the currently used index and the upcoming
+    index both need to be updated, updates are applied to both indices"""
+    # Currently only supporting Vespa
+    return VespaIndex(
+        index_name=primary_index_name,
+        secondary_index_name=secondary_index_name,
+        multitenant=MULTI_TENANT,
+    )
+
+
+def get_current_primary_default_document_index(db_session: Session) -> DocumentIndex:
+    """
+    TODO: Use redis to cache this or something
+    """
+    search_settings = get_current_search_settings(db_session)
+    return get_default_document_index(
+        primary_index_name=search_settings.index_name,
+        secondary_index_name=None,
+    )
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@ -0,0 +1,399 @@
+import abc
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Any
+
+from onyx.access.models import DocumentAccess
+from onyx.context.search.models import IndexFilters
+from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.indexing.models import DocMetadataAwareIndexChunk
+from shared_configs.model_server_models import Embedding
+
+
+@dataclass(frozen=True)
+class DocumentInsertionRecord:
+    document_id: str
+    already_existed: bool
+
+
+@dataclass(frozen=True)
+class VespaChunkRequest:
+    document_id: str
+    min_chunk_ind: int | None = None
+    max_chunk_ind: int | None = None
+
+    @property
+    def is_capped(self) -> bool:
+        # If the max chunk index is not None, then the chunk request is capped
+        # If the min chunk index is None, we can assume the min is 0
+        return self.max_chunk_ind is not None
+
+    @property
+    def range(self) -> int | None:
+        if self.max_chunk_ind is not None:
+            return (self.max_chunk_ind - (self.min_chunk_ind or 0)) + 1
+        return None
+
+
+@dataclass
+class DocumentMetadata:
+    """
+    Document information that needs to be inserted into Postgres on first time encountering this
+    document during indexing across any of the connectors.
+    """
+
+    connector_id: int
+    credential_id: int
+    document_id: str
+    semantic_identifier: str
+    first_link: str
+    doc_updated_at: datetime | None = None
+    # Emails, not necessarily attached to users
+    # Users may not be in Onyx
+    primary_owners: list[str] | None = None
+    secondary_owners: list[str] | None = None
+    from_ingestion_api: bool = False
+
+
+@dataclass
+class VespaDocumentFields:
+    """
+    Specifies fields in Vespa for a document.  Fields set to None will be ignored.
+    Perhaps we should name this in an implementation agnostic fashion, but it's more
+    understandable like this for now.
+    """
+
+    # all other fields except these 4 will always be left alone by the update request
+    access: DocumentAccess | None = None
+    document_sets: set[str] | None = None
+    boost: float | None = None
+    hidden: bool | None = None
+
+
+@dataclass
+class UpdateRequest:
+    """
+    For all document_ids, update the allowed_users and the boost to the new values
+    Does not update any of the None fields
+    """
+
+    document_ids: list[str]
+    # all other fields except these 4 will always be left alone by the update request
+    access: DocumentAccess | None = None
+    document_sets: set[str] | None = None
+    boost: float | None = None
+    hidden: bool | None = None
+
+
+class Verifiable(abc.ABC):
+    """
+    Class must implement document index schema verification. For example, verify that all of the
+    necessary attributes for indexing, querying, filtering, and fields to return from search are
+    all valid in the schema.
+
+    Parameters:
+    - index_name: The name of the primary index currently used for querying
+    - secondary_index_name: The name of the secondary index being built in the background, if it
+            currently exists. Some functions on the document index act on both the primary and
+            secondary index, some act on just one.
+    """
+
+    @abc.abstractmethod
+    def __init__(
+        self,
+        index_name: str,
+        secondary_index_name: str | None,
+        *args: Any,
+        **kwargs: Any
+    ) -> None:
+        super().__init__(*args, **kwargs)
+        self.index_name = index_name
+        self.secondary_index_name = secondary_index_name
+
+    @abc.abstractmethod
+    def ensure_indices_exist(
+        self,
+        index_embedding_dim: int,
+        secondary_index_embedding_dim: int | None,
+    ) -> None:
+        """
+        Verify that the document index exists and is consistent with the expectations in the code.
+
+        Parameters:
+        - index_embedding_dim: Vector dimensionality for the vector similarity part of the search
+        - secondary_index_embedding_dim: Vector dimensionality of the secondary index being built
+                behind the scenes. The secondary index should only be built when switching
+                embedding models therefore this dim should be different from the primary index.
+        """
+        raise NotImplementedError
+
+    @staticmethod
+    @abc.abstractmethod
+    def register_multitenant_indices(
+        indices: list[str],
+        embedding_dims: list[int],
+    ) -> None:
+        """
+        Register multitenant indices with the document index.
+        """
+        raise NotImplementedError
+
+
+class Indexable(abc.ABC):
+    """
+    Class must implement the ability to index document chunks
+    """
+
+    @abc.abstractmethod
+    def index(
+        self,
+        chunks: list[DocMetadataAwareIndexChunk],
+        fresh_index: bool = False,
+    ) -> set[DocumentInsertionRecord]:
+        """
+        Takes a list of document chunks and indexes them in the document index
+
+        NOTE: When a document is reindexed/updated here, it must clear all of the existing document
+        chunks before reindexing. This is because the document may have gotten shorter since the
+        last run. Therefore, upserting the first 0 through n chunks may leave some old chunks that
+        have not been written over.
+
+        NOTE: The chunks of a document are never separated into separate index() calls. So there is
+        no worry of receiving the first 0 through n chunks in one index call and the next n through
+        m chunks of a docu in the next index call.
+
+        NOTE: Due to some asymmetry between the primary and secondary indexing logic, this function
+        only needs to index chunks into the PRIMARY index. Do not update the secondary index here,
+        it is done automatically outside of this code.
+
+        NOTE: The fresh_index parameter, when set to True, assumes no documents have been previously
+        indexed for the given index/tenant. This can be used to optimize the indexing process for
+        new or empty indices.
+
+        Parameters:
+        - chunks: Document chunks with all of the information needed for indexing to the document
+                index.
+        - fresh_index: Boolean indicating whether this is a fresh index with no existing documents.
+
+        Returns:
+            List of document ids which map to unique documents and are used for deduping chunks
+            when updating, as well as if the document is newly indexed or already existed and
+            just updated
+        """
+        raise NotImplementedError
+
+
+class Deletable(abc.ABC):
+    """
+    Class must implement the ability to delete document by their unique document ids.
+    """
+
+    @abc.abstractmethod
+    def delete_single(self, doc_id: str) -> int:
+        """
+        Given a single document id, hard delete it from the document index
+
+        Parameters:
+        - doc_id: document id as specified by the connector
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def delete(self, doc_ids: list[str]) -> None:
+        """
+        Given a list of document ids, hard delete them from the document index
+
+        Parameters:
+        - doc_ids: list of document ids as specified by the connector
+        """
+        raise NotImplementedError
+
+
+class Updatable(abc.ABC):
+    """
+    Class must implement the ability to update certain attributes of a document without needing to
+    update all of the fields. Specifically, needs to be able to update:
+    - Access Control List
+    - Document-set membership
+    - Boost value (learning from feedback mechanism)
+    - Whether the document is hidden or not, hidden documents are not returned from search
+    """
+
+    @abc.abstractmethod
+    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+        """
+        Updates all chunks for a document with the specified fields.
+        None values mean that the field does not need an update.
+
+        The rationale for a single update function is that it allows retries and parallelism
+        to happen at a higher / more strategic level, is simpler to read, and allows
+        us to individually handle error conditions per document.
+
+        Parameters:
+        - fields: the fields to update in the document. Any field set to None will not be changed.
+
+        Return:
+            None
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def update(self, update_requests: list[UpdateRequest]) -> None:
+        """
+        Updates some set of chunks. The document and fields to update are specified in the update
+        requests. Each update request in the list applies its changes to a list of document ids.
+        None values mean that the field does not need an update.
+
+        Parameters:
+        - update_requests: for a list of document ids in the update request, apply the same updates
+                to all of the documents with those ids. This is for bulk handling efficiency. Many
+                updates are done at the connector level which have many documents for the connector
+        """
+        raise NotImplementedError
+
+
+class IdRetrievalCapable(abc.ABC):
+    """
+    Class must implement the ability to retrieve either:
+    - all of the chunks of a document IN ORDER given a document id.
+    - a specific chunk given a document id and a chunk index (0 based)
+    """
+
+    @abc.abstractmethod
+    def id_based_retrieval(
+        self,
+        chunk_requests: list[VespaChunkRequest],
+        filters: IndexFilters,
+        batch_retrieval: bool = False,
+    ) -> list[InferenceChunkUncleaned]:
+        """
+        Fetch chunk(s) based on document id
+
+        NOTE: This is used to reconstruct a full document or an extended (multi-chunk) section
+        of a document. Downstream currently assumes that the chunking does not introduce overlaps
+        between the chunks. If there are overlaps for the chunks, then the reconstructed document
+        or extended section will have duplicate segments.
+
+        Parameters:
+        - chunk_requests: requests containing the document id and the chunk range to retrieve
+        - filters: Filters to apply to retrieval
+        - batch_retrieval: If True, perform a batch retrieval
+
+        Returns:
+            list of chunks for the document id or the specific chunk by the specified chunk index
+            and document id
+        """
+        raise NotImplementedError
+
+
+class HybridCapable(abc.ABC):
+    """
+    Class must implement hybrid (keyword + vector) search functionality
+    """
+
+    @abc.abstractmethod
+    def hybrid_retrieval(
+        self,
+        query: str,
+        query_embedding: Embedding,
+        final_keywords: list[str] | None,
+        filters: IndexFilters,
+        hybrid_alpha: float,
+        time_decay_multiplier: float,
+        num_to_retrieve: int,
+        offset: int = 0,
+    ) -> list[InferenceChunkUncleaned]:
+        """
+        Run hybrid search and return a list of inference chunks.
+
+        NOTE: the query passed in here is the unprocessed plain text query. Preprocessing is
+        expected to be handled by this function as it may depend on the index implementation.
+        Things like query expansion, synonym injection, stop word removal, lemmatization, etc. are
+        done here.
+
+        Parameters:
+        - query: unmodified user query. This is needed for getting the matching highlighted
+                keywords
+        - query_embedding: vector representation of the query, must be of the correct
+                dimensionality for the primary index
+        - final_keywords: Final keywords to be used from the query, defaults to query if not set
+        - filters: standard filter object
+        - hybrid_alpha: weighting between the keyword and vector search results. It is important
+                that the two scores are normalized to the same range so that a meaningful
+                comparison can be made. 1 for 100% weighting on vector score, 0 for 100% weighting
+                on keyword score.
+        - time_decay_multiplier: how much to decay the document scores as they age. Some queries
+                based on the persona settings, will have this be a 2x or 3x of the default
+        - num_to_retrieve: number of highest matching chunks to return
+        - offset: number of highest matching chunks to skip (kind of like pagination)
+
+        Returns:
+            best matching chunks based on weighted sum of keyword and vector/semantic search scores
+        """
+        raise NotImplementedError
+
+
+class AdminCapable(abc.ABC):
+    """
+    Class must implement a search for the admin "Explorer" page. The assumption here is that the
+    admin is not "searching" for knowledge but has some document already in mind. They are either
+    looking to positively boost it because they know it's a good reference document, looking to
+    negatively boost it as a way of "deprecating", or hiding the document.
+
+    Assuming the admin knows the document name, this search has high emphasis on the title match.
+
+    Suggested implementation:
+    Keyword only, BM25 search with 5x weighting on the title field compared to the contents
+    """
+
+    @abc.abstractmethod
+    def admin_retrieval(
+        self,
+        query: str,
+        filters: IndexFilters,
+        num_to_retrieve: int,
+        offset: int = 0,
+    ) -> list[InferenceChunkUncleaned]:
+        """
+        Run the special search for the admin document explorer page
+
+        Parameters:
+        - query: unmodified user query. Though in this flow probably unmodified is best
+        - filters: standard filter object
+        - num_to_retrieve: number of highest matching chunks to return
+        - offset: number of highest matching chunks to skip (kind of like pagination)
+
+        Returns:
+            list of best matching chunks for the explorer page query
+        """
+        raise NotImplementedError
+
+
+class BaseIndex(
+    Verifiable,
+    Indexable,
+    Updatable,
+    Deletable,
+    AdminCapable,
+    IdRetrievalCapable,
+    abc.ABC,
+):
+    """
+    All basic document index functionalities excluding the actual querying approach.
+
+    As a summary, document indices need to be able to
+    - Verify the schema definition is valid
+    - Index new documents
+    - Update specific attributes of existing documents
+    - Delete documents
+    - Provide a search for the admin document explorer page
+    - Retrieve documents based on document id
+    """
+
+
+class DocumentIndex(HybridCapable, BaseIndex, abc.ABC):
+    """
+    A valid document index that can plug into all Onyx flows must implement all of these
+    functionalities, though "technically" it does not need to be keyword or vector capable as
+    currently all default search flows use Hybrid Search.
+    """
--- a/backend/onyx/document_index/vespa/init.py
+++ b/backend/onyx/document_index/vespa/init.py
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@ -0,0 +1,221 @@
+schema DANSWER_CHUNK_NAME {
+    document DANSWER_CHUNK_NAME {
+        TENANT_ID_REPLACEMENT
+        # Not to be confused with the UUID generated for this chunk which is called documentid by default
+        field document_id type string {
+            indexing: summary | attribute
+            attribute: fast-search
+            rank: filter
+        }
+        field chunk_id type int {
+            indexing: summary | attribute
+        }
+        # Displayed in the UI as the main identifier for the doc
+        field semantic_identifier type string {
+            indexing: summary | attribute
+        }
+        # Must have an additional field for whether to skip title embeddings
+        # This information cannot be extracted from either the title field nor title embedding
+        field skip_title type bool {
+            indexing: attribute 
+        }
+        # May not always match the `semantic_identifier` e.g. for Slack docs the
+        # `semantic_identifier` will be the channel name, but the `title` will be empty
+        field title type string {
+            indexing: summary | index | attribute
+            index: enable-bm25
+        }
+        field content type string {
+            indexing: summary | index
+            index: enable-bm25
+        }
+        # duplication of `content` is far from ideal, but is needed for 
+        # non-gram based highlighting for now. If the capability to re-use a 
+        # single field to do both is added, `content_summary` should be removed
+        field content_summary type string {
+            indexing: summary | index
+            summary: dynamic
+        }
+        # Title embedding (x1)
+        field title_embedding type tensor<float>(x[VARIABLE_DIM]) {
+            indexing: attribute | index
+            attribute {
+                distance-metric: angular
+            }
+        }
+        # Content embeddings (chunk + optional mini chunks embeddings)
+        # "t" and "x" are arbitrary names, not special keywords
+        field embeddings type tensor<float>(t{},x[VARIABLE_DIM]) {
+            indexing: attribute | index
+            attribute {
+                distance-metric: angular
+            }
+        }
+        # Starting section of the doc, currently unused as it has been replaced by match highlighting
+        field blurb type string {
+            indexing: summary | attribute
+        }
+        # https://docs.vespa.ai/en/attributes.html potential enum store for speed, but probably not worth it
+        field source_type type string {
+            indexing: summary | attribute
+            rank: filter
+            attribute: fast-search
+        }
+        # Can also index links https://docs.vespa.ai/en/reference/schema-reference.html#attribute
+        # URL type matching
+        field source_links type string {
+            indexing: summary | attribute
+        }
+        field section_continuation type bool {
+            indexing: summary | attribute
+        }
+        # Technically this one should be int, but can't change without causing breaks to existing index
+        field boost type float {
+            indexing: summary | attribute
+        }
+        field hidden type bool {
+            indexing: summary | attribute
+            rank: filter
+        }
+        # Needs to have a separate Attribute list for efficient filtering
+        field metadata_list type array<string> {
+            indexing: summary | attribute
+            rank:filter
+            attribute: fast-search
+        }
+        # If chunk is a large chunk, this will contain the ids of the smaller chunks
+        field large_chunk_reference_ids type array<int> {
+            indexing: summary | attribute
+        }
+        field metadata type string {
+            indexing: summary | attribute
+        }
+        field metadata_suffix type string {
+            indexing: summary | attribute
+        }
+        field doc_updated_at type int {
+            indexing: summary | attribute
+        }
+        field primary_owners type array<string> {
+            indexing : summary | attribute
+        }
+        field secondary_owners type array<string> {
+            indexing : summary | attribute
+        }
+        field access_control_list type weightedset<string> {
+            indexing: summary | attribute
+            rank: filter
+            attribute: fast-search
+        }
+        field document_sets type weightedset<string> {
+            indexing: summary | attribute
+            rank: filter
+            attribute: fast-search
+        }
+    }
+
+    # If using different tokenization settings, the fieldset has to be removed, and the field must
+    # be specified in the yql like:
+    # + 'or ({grammar: "weakAnd", defaultIndex:"title"}userInput(@query)) '
+    # + 'or ({grammar: "weakAnd", defaultIndex:"content"}userInput(@query)) '
+    # Note: for BM-25, the ngram size (and whether ngrams are used) changes the range of the scores
+    fieldset default {
+        fields: content, title
+    }
+
+    rank-profile default_rank {
+        inputs {
+            query(decay_factor) float
+        }
+
+        function inline document_boost() {
+            # 0.5 to 2x score: piecewise sigmoid function stretched out by factor of 3
+            # meaning requires 3x the number of feedback votes to have default sigmoid effect
+            expression: if(attribute(boost) < 0, 0.5 + (1 / (1 + exp(-attribute(boost) / 3))), 2 / (1 + exp(-attribute(boost) / 3)))
+        }
+
+        function inline document_age() {
+            # Time in years (91.3 days ~= 3 Months ~= 1 fiscal quarter if no age found)
+            expression: max(if(isNan(attribute(doc_updated_at)) == 1, 7890000, now() - attribute(doc_updated_at)) / 31536000, 0)
+        }
+
+        # Document score decays from 1 to 0.75 as age of last updated time increases
+        function inline recency_bias() {
+            expression: max(1 / (1 + query(decay_factor) * document_age), 0.75)
+        }
+
+        match-features: recency_bias
+    }
+
+    rank-profile hybrid_searchVARIABLE_DIM inherits default, default_rank {
+        inputs {
+            query(query_embedding) tensor<float>(x[VARIABLE_DIM])
+        }
+
+        function title_vector_score() {
+            expression {
+                # If no good matching titles, then it should use the context embeddings rather than having some
+                # irrelevant title have a vector score of 1. This way at least it will be the doc with the highest
+                # matching content score getting the full score
+                max(closeness(field, embeddings), closeness(field, title_embedding))
+            }
+        }
+
+        # First phase must be vector to allow hits that have no keyword matches
+        first-phase {
+            expression: closeness(field, embeddings)
+        }
+
+        # Weighted average between Vector Search and BM-25
+        global-phase {
+            expression {
+                (
+                    # Weighted Vector Similarity Score
+                    (
+                        query(alpha) * (
+                            (query(title_content_ratio) * normalize_linear(title_vector_score))
+                            +
+                            ((1 - query(title_content_ratio)) * normalize_linear(closeness(field, embeddings)))
+                        )
+                    )
+
+                    +
+
+                    # Weighted Keyword Similarity Score
+                    # Note: for the BM25 Title score, it requires decent stopword removal in the query
+                    # This needs to be the case so there aren't irrelevant titles being normalized to a score of 1
+                    (
+                        (1 - query(alpha)) * (
+                            (query(title_content_ratio) * normalize_linear(bm25(title)))
+                            +
+                            ((1 - query(title_content_ratio)) * normalize_linear(bm25(content)))
+                        )
+                    )
+                )
+                # Boost based on user feedback
+                * document_boost
+                # Decay factor based on time document was last updated
+                * recency_bias
+            }
+            rerank-count: 1000
+        }
+
+        match-features {
+            bm25(title)
+            bm25(content)
+            closeness(field, title_embedding)
+            closeness(field, embeddings)
+            document_boost
+            recency_bias
+            closest(embeddings)
+        }
+    }
+
+    # Used when searching from the admin UI for a specific doc to hide / boost
+    # Very heavily prioritize title
+    rank-profile admin_search inherits default, default_rank {
+        first-phase {
+            expression: bm25(content) + (5 * bm25(title))
+        }
+    }
+}
--- a/backend/onyx/document_index/vespa/app_config/services.xml
+++ b/backend/onyx/document_index/vespa/app_config/services.xml
@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<services version="1.0">
+    <container id="default" version="1.0">
+        <document-api/>
+        <search/>
+        <http>
+            <server id="default" port="8081"/>
+        </http>
+        <nodes>
+            <node hostalias="danswer-node" />
+        </nodes>
+    </container>
+    <content id="danswer_index" version="1.0">
+        <redundancy>1</redundancy>
+        <documents>
+            <!-- <document type="danswer_chunk" mode="index" /> -->
+            DOCUMENT_REPLACEMENT
+        </documents>
+        <nodes>
+            <node hostalias="danswer-node" distribution-key="0" />
+        </nodes>
+        <tuning>
+            <resource-limits>
+                <!-- Default is 75% but this can be increased for Dockerized deployments -->
+                <!-- https://docs.vespa.ai/en/operations/feed-block.html -->
+                <disk>0.75</disk>
+            </resource-limits>
+        </tuning>
+        <engine>    
+            <proton>
+                <tuning>
+                    <searchnode>
+                        <requestthreads>
+                            <persearch>SEARCH_THREAD_NUMBER</persearch>
+                        </requestthreads>
+                    </searchnode>
+                </tuning>
+            </proton>
+        </engine>
+        <config name="vespa.config.search.summary.juniperrc">
+            <max_matches>3</max_matches>
+            <length>750</length>
+            <surround_max>350</surround_max>
+            <min_length>300</min_length>
+        </config>
+    </content>
+</services>
--- a/backend/onyx/document_index/vespa/app_config/validation-overrides.xml
+++ b/backend/onyx/document_index/vespa/app_config/validation-overrides.xml
@ -0,0 +1,8 @@
+<validation-overrides>
+    <allow
+        until="DATE_REPLACEMENT"
+        comment="We need to be able to create/delete indices for swapping models">schema-removal</allow>
+    <allow
+        until="DATE_REPLACEMENT"
+        comment="We need to be able to update the schema for updates to the Onyx schema">indexing-change</allow>
+</validation-overrides>
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@ -0,0 +1,430 @@
+import json
+import string
+from collections.abc import Callable
+from collections.abc import Mapping
+from datetime import datetime
+from datetime import timezone
+from typing import Any
+from typing import cast
+
+import httpx
+from retry import retry
+
+from onyx.configs.app_configs import LOG_VESPA_TIMING_INFORMATION
+from onyx.context.search.models import IndexFilters
+from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.document_index.interfaces import VespaChunkRequest
+from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
+from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
+    build_vespa_filters,
+)
+from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
+    build_vespa_id_based_retrieval_yql,
+)
+from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
+from onyx.document_index.vespa_constants import BLURB
+from onyx.document_index.vespa_constants import BOOST
+from onyx.document_index.vespa_constants import CHUNK_ID
+from onyx.document_index.vespa_constants import CONTENT
+from onyx.document_index.vespa_constants import CONTENT_SUMMARY
+from onyx.document_index.vespa_constants import DOC_UPDATED_AT
+from onyx.document_index.vespa_constants import DOCUMENT_ID
+from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
+from onyx.document_index.vespa_constants import HIDDEN
+from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
+from onyx.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
+from onyx.document_index.vespa_constants import MAX_OR_CONDITIONS
+from onyx.document_index.vespa_constants import METADATA
+from onyx.document_index.vespa_constants import METADATA_SUFFIX
+from onyx.document_index.vespa_constants import PRIMARY_OWNERS
+from onyx.document_index.vespa_constants import RECENCY_BIAS
+from onyx.document_index.vespa_constants import SEARCH_ENDPOINT
+from onyx.document_index.vespa_constants import SECONDARY_OWNERS
+from onyx.document_index.vespa_constants import SECTION_CONTINUATION
+from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
+from onyx.document_index.vespa_constants import SOURCE_LINKS
+from onyx.document_index.vespa_constants import SOURCE_TYPE
+from onyx.document_index.vespa_constants import TITLE
+from onyx.document_index.vespa_constants import YQL_BASE
+from onyx.utils.logger import setup_logger
+from onyx.utils.threadpool_concurrency import run_functions_tuples_in_parallel
+
+logger = setup_logger()
+
+
+def _process_dynamic_summary(
+    dynamic_summary: str, max_summary_length: int = 400
+) -> list[str]:
+    if not dynamic_summary:
+        return []
+
+    current_length = 0
+    processed_summary: list[str] = []
+    for summary_section in dynamic_summary.split("<sep />"):
+        # if we're past the desired max length, break at the last word
+        if current_length + len(summary_section) >= max_summary_length:
+            summary_section = summary_section[: max_summary_length - current_length]
+            summary_section = summary_section.lstrip()  # remove any leading whitespace
+
+            # handle the case where the truncated section is either just a
+            # single (partial) word or if it's empty
+            first_space = summary_section.find(" ")
+            if first_space == -1:
+                # add ``...`` to previous section
+                if processed_summary:
+                    processed_summary[-1] += "..."
+                break
+
+            # handle the valid truncated section case
+            summary_section = summary_section.rsplit(" ", 1)[0]
+            if summary_section[-1] in string.punctuation:
+                summary_section = summary_section[:-1]
+            summary_section += "..."
+            processed_summary.append(summary_section)
+            break
+
+        processed_summary.append(summary_section)
+        current_length += len(summary_section)
+
+    return processed_summary
+
+
+def _vespa_hit_to_inference_chunk(
+    hit: dict[str, Any], null_score: bool = False
+) -> InferenceChunkUncleaned:
+    fields = cast(dict[str, Any], hit["fields"])
+
+    # parse fields that are stored as strings, but are really json / datetime
+    metadata = json.loads(fields[METADATA]) if METADATA in fields else {}
+    updated_at = (
+        datetime.fromtimestamp(fields[DOC_UPDATED_AT], tz=timezone.utc)
+        if DOC_UPDATED_AT in fields
+        else None
+    )
+
+    match_highlights = _process_dynamic_summary(
+        # fallback to regular `content` if the `content_summary` field
+        # isn't present
+        dynamic_summary=hit["fields"].get(CONTENT_SUMMARY, hit["fields"][CONTENT]),
+    )
+    semantic_identifier = fields.get(SEMANTIC_IDENTIFIER, "")
+    if not semantic_identifier:
+        logger.error(
+            f"Chunk with blurb: {fields.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier"
+        )
+
+    source_links = fields.get(SOURCE_LINKS, {})
+    source_links_dict_unprocessed = (
+        json.loads(source_links) if isinstance(source_links, str) else source_links
+    )
+    source_links_dict = {
+        int(k): v
+        for k, v in cast(dict[str, str], source_links_dict_unprocessed).items()
+    }
+
+    return InferenceChunkUncleaned(
+        chunk_id=fields[CHUNK_ID],
+        blurb=fields.get(BLURB, ""),  # Unused
+        content=fields[CONTENT],  # Includes extra title prefix and metadata suffix
+        source_links=source_links_dict or {0: ""},
+        section_continuation=fields[SECTION_CONTINUATION],
+        document_id=fields[DOCUMENT_ID],
+        source_type=fields[SOURCE_TYPE],
+        title=fields.get(TITLE),
+        semantic_identifier=fields[SEMANTIC_IDENTIFIER],
+        boost=fields.get(BOOST, 1),
+        recency_bias=fields.get("matchfeatures", {}).get(RECENCY_BIAS, 1.0),
+        score=None if null_score else hit.get("relevance", 0),
+        hidden=fields.get(HIDDEN, False),
+        primary_owners=fields.get(PRIMARY_OWNERS),
+        secondary_owners=fields.get(SECONDARY_OWNERS),
+        large_chunk_reference_ids=fields.get(LARGE_CHUNK_REFERENCE_IDS, []),
+        metadata=metadata,
+        metadata_suffix=fields.get(METADATA_SUFFIX),
+        match_highlights=match_highlights,
+        updated_at=updated_at,
+    )
+
+
+def _get_chunks_via_visit_api(
+    chunk_request: VespaChunkRequest,
+    index_name: str,
+    filters: IndexFilters,
+    field_names: list[str] | None = None,
+    get_large_chunks: bool = False,
+) -> list[dict]:
+    # Constructing the URL for the Visit API
+    # NOTE: visit API uses the same URL as the document API, but with different params
+    url = DOCUMENT_ID_ENDPOINT.format(index_name=index_name)
+
+    # build the list of fields to retrieve
+    field_set_list = (
+        None
+        if not field_names
+        else [f"{index_name}:{field_name}" for field_name in field_names]
+    )
+    acl_fieldset_entry = f"{index_name}:{ACCESS_CONTROL_LIST}"
+    if (
+        field_set_list
+        and filters.access_control_list
+        and acl_fieldset_entry not in field_set_list
+    ):
+        field_set_list.append(acl_fieldset_entry)
+    field_set = ",".join(field_set_list) if field_set_list else None
+
+    # build filters
+    selection = f"{index_name}.document_id=='{chunk_request.document_id}'"
+
+    if chunk_request.is_capped:
+        selection += f" and {index_name}.chunk_id>={chunk_request.min_chunk_ind or 0}"
+        selection += f" and {index_name}.chunk_id<={chunk_request.max_chunk_ind}"
+    if not get_large_chunks:
+        selection += f" and {index_name}.large_chunk_reference_ids == null"
+
+    # Setting up the selection criteria in the query parameters
+    params = {
+        # NOTE: Document Selector Language doesn't allow `contains`, so we can't check
+        # for the ACL in the selection. Instead, we have to check as a postfilter
+        "selection": selection,
+        "continuation": None,
+        "wantedDocumentCount": 1_000,
+        "fieldSet": field_set,
+    }
+
+    document_chunks: list[dict] = []
+    while True:
+        try:
+            filtered_params = {k: v for k, v in params.items() if v is not None}
+            with get_vespa_http_client() as http_client:
+                response = http_client.get(url, params=filtered_params)
+                response.raise_for_status()
+        except httpx.HTTPError as e:
+            error_base = "Failed to query Vespa"
+            logger.error(
+                f"{error_base}:\n"
+                f"Request URL: {e.request.url}\n"
+                f"Request Headers: {e.request.headers}\n"
+                f"Request Payload: {params}\n"
+                f"Exception: {str(e)}"
+            )
+            raise httpx.HTTPError(error_base) from e
+
+        # Check if the response contains any documents
+        response_data = response.json()
+        if "documents" in response_data:
+            for document in response_data["documents"]:
+                if filters.access_control_list:
+                    document_acl = document["fields"].get(ACCESS_CONTROL_LIST)
+                    if not document_acl or not any(
+                        user_acl_entry in document_acl
+                        for user_acl_entry in filters.access_control_list
+                    ):
+                        continue
+                document_chunks.append(document)
+
+        # Check for continuation token to handle pagination
+        if "continuation" in response_data and response_data["continuation"]:
+            params["continuation"] = response_data["continuation"]
+        else:
+            break  # Exit loop if no continuation token
+
+    return document_chunks
+
+
+@retry(tries=10, delay=1, backoff=2)
+def get_all_vespa_ids_for_document_id(
+    document_id: str,
+    index_name: str,
+    filters: IndexFilters | None = None,
+    get_large_chunks: bool = False,
+) -> list[str]:
+    document_chunks = _get_chunks_via_visit_api(
+        chunk_request=VespaChunkRequest(document_id=document_id),
+        index_name=index_name,
+        filters=filters or IndexFilters(access_control_list=None),
+        field_names=[DOCUMENT_ID],
+        get_large_chunks=get_large_chunks,
+    )
+    return [chunk["id"].split("::", 1)[-1] for chunk in document_chunks]
+
+
+def parallel_visit_api_retrieval(
+    index_name: str,
+    chunk_requests: list[VespaChunkRequest],
+    filters: IndexFilters,
+    get_large_chunks: bool = False,
+) -> list[InferenceChunkUncleaned]:
+    functions_with_args: list[tuple[Callable, tuple]] = [
+        (
+            _get_chunks_via_visit_api,
+            (chunk_request, index_name, filters, get_large_chunks),
+        )
+        for chunk_request in chunk_requests
+    ]
+
+    parallel_results = run_functions_tuples_in_parallel(
+        functions_with_args, allow_failures=True
+    )
+
+    # Any failures to retrieve would give a None, drop the Nones and empty lists
+    vespa_chunk_sets = [res for res in parallel_results if res]
+
+    flattened_vespa_chunks = []
+    for chunk_set in vespa_chunk_sets:
+        flattened_vespa_chunks.extend(chunk_set)
+
+    inference_chunks = [
+        _vespa_hit_to_inference_chunk(chunk, null_score=True)
+        for chunk in flattened_vespa_chunks
+    ]
+
+    return inference_chunks
+
+
+@retry(tries=3, delay=1, backoff=2)
+def query_vespa(
+    query_params: Mapping[str, str | int | float]
+) -> list[InferenceChunkUncleaned]:
+    if "query" in query_params and not cast(str, query_params["query"]).strip():
+        raise ValueError("No/empty query received")
+
+    params = dict(
+        **query_params,
+        **{
+            "presentation.timing": True,
+        }
+        if LOG_VESPA_TIMING_INFORMATION
+        else {},
+    )
+
+    try:
+        with get_vespa_http_client() as http_client:
+            response = http_client.post(SEARCH_ENDPOINT, json=params)
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        error_base = "Failed to query Vespa"
+        logger.error(
+            f"{error_base}:\n"
+            f"Request URL: {e.request.url}\n"
+            f"Request Headers: {e.request.headers}\n"
+            f"Request Payload: {params}\n"
+            f"Exception: {str(e)}"
+        )
+        raise httpx.HTTPError(error_base) from e
+
+    response_json: dict[str, Any] = response.json()
+
+    if LOG_VESPA_TIMING_INFORMATION:
+        logger.debug("Vespa timing info: %s", response_json.get("timing"))
+    hits = response_json["root"].get("children", [])
+
+    if not hits:
+        logger.warning(
+            f"No hits found for YQL Query: {query_params.get('yql', 'No YQL Query')}"
+        )
+        logger.debug(f"Vespa Response: {response.text}")
+
+    for hit in hits:
+        if hit["fields"].get(CONTENT) is None:
+            identifier = hit["fields"].get("documentid") or hit["id"]
+            logger.error(
+                f"Vespa Index with Vespa ID {identifier} has no contents. "
+                f"This is invalid because the vector is not meaningful and keywordsearch cannot "
+                f"fetch this document"
+            )
+
+    filtered_hits = [hit for hit in hits if hit["fields"].get(CONTENT) is not None]
+
+    inference_chunks = [_vespa_hit_to_inference_chunk(hit) for hit in filtered_hits]
+    # Good Debugging Spot
+    return inference_chunks
+
+
+def _get_chunks_via_batch_search(
+    index_name: str,
+    chunk_requests: list[VespaChunkRequest],
+    filters: IndexFilters,
+    get_large_chunks: bool = False,
+) -> list[InferenceChunkUncleaned]:
+    if not chunk_requests:
+        return []
+
+    filters_str = build_vespa_filters(filters=filters, include_hidden=True)
+
+    yql = (
+        YQL_BASE.format(index_name=index_name)
+        + filters_str
+        + build_vespa_id_based_retrieval_yql(chunk_requests[0])
+    )
+    chunk_requests.pop(0)
+
+    for request in chunk_requests:
+        yql += " or " + build_vespa_id_based_retrieval_yql(request)
+    params: dict[str, str | int | float] = {
+        "yql": yql,
+        "hits": MAX_ID_SEARCH_QUERY_SIZE,
+    }
+
+    inference_chunks = query_vespa(params)
+    if not get_large_chunks:
+        inference_chunks = [
+            chunk for chunk in inference_chunks if not chunk.large_chunk_reference_ids
+        ]
+    inference_chunks.sort(key=lambda chunk: chunk.chunk_id)
+    return inference_chunks
+
+
+def batch_search_api_retrieval(
+    index_name: str,
+    chunk_requests: list[VespaChunkRequest],
+    filters: IndexFilters,
+    get_large_chunks: bool = False,
+) -> list[InferenceChunkUncleaned]:
+    retrieved_chunks: list[InferenceChunkUncleaned] = []
+    capped_requests: list[VespaChunkRequest] = []
+    uncapped_requests: list[VespaChunkRequest] = []
+    chunk_count = 0
+    for req_ind, request in enumerate(chunk_requests, start=1):
+        # All requests without a chunk range are uncapped
+        # Uncapped requests are retrieved using the Visit API
+        range = request.range
+        if range is None:
+            uncapped_requests.append(request)
+            continue
+
+        if (
+            chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE
+            or req_ind % MAX_OR_CONDITIONS == 0
+        ):
+            retrieved_chunks.extend(
+                _get_chunks_via_batch_search(
+                    index_name=index_name,
+                    chunk_requests=capped_requests,
+                    filters=filters,
+                    get_large_chunks=get_large_chunks,
+                )
+            )
+            capped_requests = []
+            chunk_count = 0
+        capped_requests.append(request)
+        chunk_count += range
+
+    if capped_requests:
+        retrieved_chunks.extend(
+            _get_chunks_via_batch_search(
+                index_name=index_name,
+                chunk_requests=capped_requests,
+                filters=filters,
+                get_large_chunks=get_large_chunks,
+            )
+        )
+
+    if uncapped_requests:
+        logger.debug(f"Retrieving {len(uncapped_requests)} uncapped requests")
+        retrieved_chunks.extend(
+            parallel_visit_api_retrieval(
+                index_name, uncapped_requests, filters, get_large_chunks
+            )
+        )
+
+    return retrieved_chunks
--- a/backend/onyx/document_index/vespa/deletion.py
+++ b/backend/onyx/document_index/vespa/deletion.py
@ -0,0 +1,65 @@
+import concurrent.futures
+
+import httpx
+from retry import retry
+
+from onyx.document_index.vespa.chunk_retrieval import (
+    get_all_vespa_ids_for_document_id,
+)
+from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
+from onyx.document_index.vespa_constants import NUM_THREADS
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+CONTENT_SUMMARY = "content_summary"
+
+
+@retry(tries=3, delay=1, backoff=2)
+def _delete_vespa_doc_chunks(
+    document_id: str, index_name: str, http_client: httpx.Client
+) -> None:
+    doc_chunk_ids = get_all_vespa_ids_for_document_id(
+        document_id=document_id,
+        index_name=index_name,
+        get_large_chunks=True,
+    )
+
+    for chunk_id in doc_chunk_ids:
+        try:
+            res = http_client.delete(
+                f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{chunk_id}"
+            )
+            res.raise_for_status()
+        except httpx.HTTPStatusError as e:
+            logger.error(f"Failed to delete chunk, details: {e.response.text}")
+            raise
+
+
+def delete_vespa_docs(
+    document_ids: list[str],
+    index_name: str,
+    http_client: httpx.Client,
+    executor: concurrent.futures.ThreadPoolExecutor | None = None,
+) -> None:
+    external_executor = True
+
+    if not executor:
+        external_executor = False
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)
+
+    try:
+        doc_deletion_future = {
+            executor.submit(
+                _delete_vespa_doc_chunks, doc_id, index_name, http_client
+            ): doc_id
+            for doc_id in document_ids
+        }
+        for future in concurrent.futures.as_completed(doc_deletion_future):
+            # Will raise exception if the deletion raised an exception
+            future.result()
+
+    finally:
+        if not external_executor:
+            executor.shutdown(wait=True)
--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@ -0,0 +1,915 @@
+import concurrent.futures
+import io
+import logging
+import os
+import re
+import time
+import urllib
+import zipfile
+from dataclasses import dataclass
+from datetime import datetime
+from datetime import timedelta
+from typing import BinaryIO
+from typing import cast
+from typing import List
+
+import httpx  # type: ignore
+import requests  # type: ignore
+
+from onyx.configs.app_configs import DOCUMENT_INDEX_NAME
+from onyx.configs.chat_configs import DOC_TIME_DECAY
+from onyx.configs.chat_configs import NUM_RETURNED_HITS
+from onyx.configs.chat_configs import TITLE_CONTENT_RATIO
+from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
+from onyx.configs.constants import KV_REINDEX_KEY
+from onyx.context.search.models import IndexFilters
+from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.document_index.interfaces import DocumentIndex
+from onyx.document_index.interfaces import DocumentInsertionRecord
+from onyx.document_index.interfaces import UpdateRequest
+from onyx.document_index.interfaces import VespaChunkRequest
+from onyx.document_index.interfaces import VespaDocumentFields
+from onyx.document_index.vespa.chunk_retrieval import batch_search_api_retrieval
+from onyx.document_index.vespa.chunk_retrieval import (
+    get_all_vespa_ids_for_document_id,
+)
+from onyx.document_index.vespa.chunk_retrieval import (
+    parallel_visit_api_retrieval,
+)
+from onyx.document_index.vespa.chunk_retrieval import query_vespa
+from onyx.document_index.vespa.deletion import delete_vespa_docs
+from onyx.document_index.vespa.indexing_utils import batch_index_vespa_chunks
+from onyx.document_index.vespa.indexing_utils import clean_chunk_id_copy
+from onyx.document_index.vespa.indexing_utils import (
+    get_existing_documents_from_chunks,
+)
+from onyx.document_index.vespa.shared_utils.utils import get_vespa_http_client
+from onyx.document_index.vespa.shared_utils.utils import (
+    replace_invalid_doc_id_characters,
+)
+from onyx.document_index.vespa.shared_utils.vespa_request_builders import (
+    build_vespa_filters,
+)
+from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
+from onyx.document_index.vespa_constants import BATCH_SIZE
+from onyx.document_index.vespa_constants import BOOST
+from onyx.document_index.vespa_constants import CONTENT_SUMMARY
+from onyx.document_index.vespa_constants import DANSWER_CHUNK_REPLACEMENT_PAT
+from onyx.document_index.vespa_constants import DATE_REPLACEMENT
+from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
+from onyx.document_index.vespa_constants import DOCUMENT_REPLACEMENT_PAT
+from onyx.document_index.vespa_constants import DOCUMENT_SETS
+from onyx.document_index.vespa_constants import HIDDEN
+from onyx.document_index.vespa_constants import NUM_THREADS
+from onyx.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT
+from onyx.document_index.vespa_constants import TENANT_ID_PAT
+from onyx.document_index.vespa_constants import TENANT_ID_REPLACEMENT
+from onyx.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT
+from onyx.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT
+from onyx.document_index.vespa_constants import VESPA_TIMEOUT
+from onyx.document_index.vespa_constants import YQL_BASE
+from onyx.indexing.models import DocMetadataAwareIndexChunk
+from onyx.key_value_store.factory import get_kv_store
+from onyx.utils.batching import batch_generator
+from onyx.utils.logger import setup_logger
+from shared_configs.configs import MULTI_TENANT
+from shared_configs.model_server_models import Embedding
+
+
+logger = setup_logger()
+
+# Set the logging level to WARNING to ignore INFO and DEBUG logs
+httpx_logger = logging.getLogger("httpx")
+httpx_logger.setLevel(logging.WARNING)
+
+
+@dataclass
+class _VespaUpdateRequest:
+    document_id: str
+    url: str
+    update_request: dict[str, dict]
+
+
+def in_memory_zip_from_file_bytes(file_contents: dict[str, bytes]) -> BinaryIO:
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zipf:
+        for filename, content in file_contents.items():
+            zipf.writestr(filename, content)
+    zip_buffer.seek(0)
+    return zip_buffer
+
+
+def _create_document_xml_lines(doc_names: list[str | None] | list[str]) -> str:
+    doc_lines = [
+        f'<document type="{doc_name}" mode="index" />'
+        for doc_name in doc_names
+        if doc_name
+    ]
+    return "\n".join(doc_lines)
+
+
+def add_ngrams_to_schema(schema_content: str) -> str:
+    # Add the match blocks containing gram and gram-size to title and content fields
+    schema_content = re.sub(
+        r"(field title type string \{[^}]*indexing: summary \| index \| attribute)",
+        r"\1\n            match {\n                gram\n                gram-size: 3\n            }",
+        schema_content,
+    )
+    schema_content = re.sub(
+        r"(field content type string \{[^}]*indexing: summary \| index)",
+        r"\1\n            match {\n                gram\n                gram-size: 3\n            }",
+        schema_content,
+    )
+    return schema_content
+
+
+class VespaIndex(DocumentIndex):
+    def __init__(
+        self,
+        index_name: str,
+        secondary_index_name: str | None,
+        multitenant: bool = False,
+    ) -> None:
+        self.index_name = index_name
+        self.secondary_index_name = secondary_index_name
+        self.multitenant = multitenant
+        self.http_client = get_vespa_http_client()
+
+    def ensure_indices_exist(
+        self,
+        index_embedding_dim: int,
+        secondary_index_embedding_dim: int | None,
+    ) -> None:
+        if MULTI_TENANT:
+            logger.info(
+                "Skipping Vespa index seup for multitenant (would wipe all indices)"
+            )
+            return None
+
+        deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate"
+        logger.notice(f"Deploying Vespa application package to {deploy_url}")
+
+        vespa_schema_path = os.path.join(
+            os.getcwd(), "onyx", "document_index", "vespa", "app_config"
+        )
+        schema_file = os.path.join(vespa_schema_path, "schemas", "danswer_chunk.sd")
+        services_file = os.path.join(vespa_schema_path, "services.xml")
+        overrides_file = os.path.join(vespa_schema_path, "validation-overrides.xml")
+
+        with open(services_file, "r") as services_f:
+            services_template = services_f.read()
+
+        schema_names = [self.index_name, self.secondary_index_name]
+
+        doc_lines = _create_document_xml_lines(schema_names)
+        services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines)
+        services = services.replace(
+            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
+        )
+
+        kv_store = get_kv_store()
+
+        needs_reindexing = False
+        try:
+            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
+        except Exception:
+            logger.debug("Could not load the reindexing flag. Using ngrams")
+
+        with open(overrides_file, "r") as overrides_f:
+            overrides_template = overrides_f.read()
+
+        # Vespa requires an override to erase data including the indices we're no longer using
+        # It also has a 30 day cap from current so we set it to 7 dynamically
+        now = datetime.now()
+        date_in_7_days = now + timedelta(days=7)
+        formatted_date = date_in_7_days.strftime("%Y-%m-%d")
+
+        overrides = overrides_template.replace(DATE_REPLACEMENT, formatted_date)
+
+        zip_dict = {
+            "services.xml": services.encode("utf-8"),
+            "validation-overrides.xml": overrides.encode("utf-8"),
+        }
+
+        with open(schema_file, "r") as schema_f:
+            schema_template = schema_f.read()
+        schema_template = schema_template.replace(TENANT_ID_PAT, "")
+
+        schema = schema_template.replace(
+            DANSWER_CHUNK_REPLACEMENT_PAT, self.index_name
+        ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim))
+
+        schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
+        schema = schema.replace(TENANT_ID_PAT, "")
+        zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8")
+
+        if self.secondary_index_name:
+            upcoming_schema = schema_template.replace(
+                DANSWER_CHUNK_REPLACEMENT_PAT, self.secondary_index_name
+            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(secondary_index_embedding_dim))
+            zip_dict[f"schemas/{schema_names[1]}.sd"] = upcoming_schema.encode("utf-8")
+
+        zip_file = in_memory_zip_from_file_bytes(zip_dict)
+
+        headers = {"Content-Type": "application/zip"}
+        response = requests.post(deploy_url, headers=headers, data=zip_file)
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to prepare Vespa Onyx Index. Response: {response.text}"
+            )
+
+    @staticmethod
+    def register_multitenant_indices(
+        indices: list[str],
+        embedding_dims: list[int],
+    ) -> None:
+        if not MULTI_TENANT:
+            raise ValueError("Multi-tenant is not enabled")
+
+        deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate"
+        logger.info(f"Deploying Vespa application package to {deploy_url}")
+
+        vespa_schema_path = os.path.join(
+            os.getcwd(), "onyx", "document_index", "vespa", "app_config"
+        )
+        schema_file = os.path.join(vespa_schema_path, "schemas", "danswer_chunk.sd")
+        services_file = os.path.join(vespa_schema_path, "services.xml")
+        overrides_file = os.path.join(vespa_schema_path, "validation-overrides.xml")
+
+        with open(services_file, "r") as services_f:
+            services_template = services_f.read()
+
+        # Generate schema names from index settings
+        schema_names = [index_name for index_name in indices]
+
+        full_schemas = schema_names
+
+        doc_lines = _create_document_xml_lines(full_schemas)
+
+        services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines)
+        services = services.replace(
+            SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS)
+        )
+
+        kv_store = get_kv_store()
+
+        needs_reindexing = False
+        try:
+            needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY))
+        except Exception:
+            logger.debug("Could not load the reindexing flag. Using ngrams")
+
+        with open(overrides_file, "r") as overrides_f:
+            overrides_template = overrides_f.read()
+
+        # Vespa requires an override to erase data including the indices we're no longer using
+        # It also has a 30 day cap from current so we set it to 7 dynamically
+        now = datetime.now()
+        date_in_7_days = now + timedelta(days=7)
+        formatted_date = date_in_7_days.strftime("%Y-%m-%d")
+
+        overrides = overrides_template.replace(DATE_REPLACEMENT, formatted_date)
+
+        zip_dict = {
+            "services.xml": services.encode("utf-8"),
+            "validation-overrides.xml": overrides.encode("utf-8"),
+        }
+
+        with open(schema_file, "r") as schema_f:
+            schema_template = schema_f.read()
+
+        for i, index_name in enumerate(indices):
+            embedding_dim = embedding_dims[i]
+            logger.info(
+                f"Creating index: {index_name} with embedding dimension: {embedding_dim}"
+            )
+
+            schema = schema_template.replace(
+                DANSWER_CHUNK_REPLACEMENT_PAT, index_name
+            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(embedding_dim))
+            schema = schema.replace(
+                TENANT_ID_PAT, TENANT_ID_REPLACEMENT if MULTI_TENANT else ""
+            )
+            schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
+            zip_dict[f"schemas/{index_name}.sd"] = schema.encode("utf-8")
+
+        zip_file = in_memory_zip_from_file_bytes(zip_dict)
+
+        headers = {"Content-Type": "application/zip"}
+        response = requests.post(deploy_url, headers=headers, data=zip_file)
+
+        if response.status_code != 200:
+            raise RuntimeError(
+                f"Failed to prepare Vespa Onyx Indexes. Response: {response.text}"
+            )
+
+    def index(
+        self,
+        chunks: list[DocMetadataAwareIndexChunk],
+        fresh_index: bool = False,
+    ) -> set[DocumentInsertionRecord]:
+        """Receive a list of chunks from a batch of documents and index the chunks into Vespa along
+        with updating the associated permissions. Assumes that a document will not be split into
+        multiple chunk batches calling this function multiple times, otherwise only the last set of
+        chunks will be kept"""
+        # IMPORTANT: This must be done one index at a time, do not use secondary index here
+        cleaned_chunks = [clean_chunk_id_copy(chunk) for chunk in chunks]
+
+        existing_docs: set[str] = set()
+
+        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
+        # indexing / updates / deletes since we have to make a large volume of requests.
+        with (
+            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
+            get_vespa_http_client() as http_client,
+        ):
+            if not fresh_index:
+                # Check for existing documents, existing documents need to have all of their chunks deleted
+                # prior to indexing as the document size (num chunks) may have shrunk
+                first_chunks = [
+                    chunk for chunk in cleaned_chunks if chunk.chunk_id == 0
+                ]
+                for chunk_batch in batch_generator(first_chunks, BATCH_SIZE):
+                    existing_docs.update(
+                        get_existing_documents_from_chunks(
+                            chunks=chunk_batch,
+                            index_name=self.index_name,
+                            http_client=http_client,
+                            executor=executor,
+                        )
+                    )
+
+                for doc_id_batch in batch_generator(existing_docs, BATCH_SIZE):
+                    delete_vespa_docs(
+                        document_ids=doc_id_batch,
+                        index_name=self.index_name,
+                        http_client=http_client,
+                        executor=executor,
+                    )
+
+            for chunk_batch in batch_generator(cleaned_chunks, BATCH_SIZE):
+                batch_index_vespa_chunks(
+                    chunks=chunk_batch,
+                    index_name=self.index_name,
+                    http_client=http_client,
+                    multitenant=self.multitenant,
+                    executor=executor,
+                )
+
+        all_doc_ids = {chunk.source_document.id for chunk in cleaned_chunks}
+
+        return {
+            DocumentInsertionRecord(
+                document_id=doc_id,
+                already_existed=doc_id in existing_docs,
+            )
+            for doc_id in all_doc_ids
+        }
+
+    @staticmethod
+    def _apply_updates_batched(
+        updates: list[_VespaUpdateRequest],
+        batch_size: int = BATCH_SIZE,
+    ) -> None:
+        """Runs a batch of updates in parallel via the ThreadPoolExecutor."""
+
+        def _update_chunk(
+            update: _VespaUpdateRequest, http_client: httpx.Client
+        ) -> httpx.Response:
+            logger.debug(
+                f"Updating with request to {update.url} with body {update.update_request}"
+            )
+            return http_client.put(
+                update.url,
+                headers={"Content-Type": "application/json"},
+                json=update.update_request,
+            )
+
+        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for
+        # indexing / updates / deletes since we have to make a large volume of requests.
+
+        with (
+            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
+            get_vespa_http_client() as http_client,
+        ):
+            for update_batch in batch_generator(updates, batch_size):
+                future_to_document_id = {
+                    executor.submit(
+                        _update_chunk,
+                        update,
+                        http_client,
+                    ): update.document_id
+                    for update in update_batch
+                }
+                for future in concurrent.futures.as_completed(future_to_document_id):
+                    res = future.result()
+                    try:
+                        res.raise_for_status()
+                    except requests.HTTPError as e:
+                        failure_msg = f"Failed to update document: {future_to_document_id[future]}"
+                        raise requests.HTTPError(failure_msg) from e
+
+    def update(self, update_requests: list[UpdateRequest]) -> None:
+        logger.debug(f"Updating {len(update_requests)} documents in Vespa")
+
+        # Handle Vespa character limitations
+        # Mutating update_requests but it's not used later anyway
+        for update_request in update_requests:
+            update_request.document_ids = [
+                replace_invalid_doc_id_characters(doc_id)
+                for doc_id in update_request.document_ids
+            ]
+
+        update_start = time.monotonic()
+
+        processed_updates_requests: list[_VespaUpdateRequest] = []
+        all_doc_chunk_ids: dict[str, list[str]] = {}
+
+        # Fetch all chunks for each document ahead of time
+        index_names = [self.index_name]
+        if self.secondary_index_name:
+            index_names.append(self.secondary_index_name)
+
+        chunk_id_start_time = time.monotonic()
+        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
+            future_to_doc_chunk_ids = {
+                executor.submit(
+                    get_all_vespa_ids_for_document_id,
+                    document_id=document_id,
+                    index_name=index_name,
+                    filters=None,
+                    get_large_chunks=True,
+                ): (document_id, index_name)
+                for index_name in index_names
+                for update_request in update_requests
+                for document_id in update_request.document_ids
+            }
+            for future in concurrent.futures.as_completed(future_to_doc_chunk_ids):
+                document_id, index_name = future_to_doc_chunk_ids[future]
+                try:
+                    doc_chunk_ids = future.result()
+                    if document_id not in all_doc_chunk_ids:
+                        all_doc_chunk_ids[document_id] = []
+                    all_doc_chunk_ids[document_id].extend(doc_chunk_ids)
+                except Exception as e:
+                    logger.error(
+                        f"Error retrieving chunk IDs for document {document_id} in index {index_name}: {e}"
+                    )
+        logger.debug(
+            f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs"
+        )
+
+        # Build the _VespaUpdateRequest objects
+        for update_request in update_requests:
+            update_dict: dict[str, dict] = {"fields": {}}
+            if update_request.boost is not None:
+                update_dict["fields"][BOOST] = {"assign": update_request.boost}
+            if update_request.document_sets is not None:
+                update_dict["fields"][DOCUMENT_SETS] = {
+                    "assign": {
+                        document_set: 1 for document_set in update_request.document_sets
+                    }
+                }
+            if update_request.access is not None:
+                update_dict["fields"][ACCESS_CONTROL_LIST] = {
+                    "assign": {
+                        acl_entry: 1 for acl_entry in update_request.access.to_acl()
+                    }
+                }
+            if update_request.hidden is not None:
+                update_dict["fields"][HIDDEN] = {"assign": update_request.hidden}
+
+            if not update_dict["fields"]:
+                logger.error("Update request received but nothing to update")
+                continue
+
+            for document_id in update_request.document_ids:
+                for doc_chunk_id in all_doc_chunk_ids[document_id]:
+                    processed_updates_requests.append(
+                        _VespaUpdateRequest(
+                            document_id=document_id,
+                            url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}",
+                            update_request=update_dict,
+                        )
+                    )
+
+        self._apply_updates_batched(processed_updates_requests)
+        logger.debug(
+            "Finished updating Vespa documents in %.2f seconds",
+            time.monotonic() - update_start,
+        )
+
+    def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int:
+        """Note: if the document id does not exist, the update will be a no-op and the
+        function will complete with no errors or exceptions.
+        Handle other exceptions if you wish to implement retry behavior
+        """
+
+        total_chunks_updated = 0
+
+        # Handle Vespa character limitations
+        # Mutating update_request but it's not used later anyway
+        normalized_doc_id = replace_invalid_doc_id_characters(doc_id)
+
+        # Build the _VespaUpdateRequest objects
+        update_dict: dict[str, dict] = {"fields": {}}
+        if fields.boost is not None:
+            update_dict["fields"][BOOST] = {"assign": fields.boost}
+        if fields.document_sets is not None:
+            update_dict["fields"][DOCUMENT_SETS] = {
+                "assign": {document_set: 1 for document_set in fields.document_sets}
+            }
+        if fields.access is not None:
+            update_dict["fields"][ACCESS_CONTROL_LIST] = {
+                "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()}
+            }
+        if fields.hidden is not None:
+            update_dict["fields"][HIDDEN] = {"assign": fields.hidden}
+
+        if not update_dict["fields"]:
+            logger.error("Update request received but nothing to update")
+            return 0
+
+        index_names = [self.index_name]
+        if self.secondary_index_name:
+            index_names.append(self.secondary_index_name)
+
+        with get_vespa_http_client() as http_client:
+            for index_name in index_names:
+                params = httpx.QueryParams(
+                    {
+                        "selection": f"{index_name}.document_id=='{normalized_doc_id}'",
+                        "cluster": DOCUMENT_INDEX_NAME,
+                    }
+                )
+
+                while True:
+                    try:
+                        resp = http_client.put(
+                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}",
+                            params=params,
+                            headers={"Content-Type": "application/json"},
+                            json=update_dict,
+                        )
+
+                        resp.raise_for_status()
+                    except httpx.HTTPStatusError as e:
+                        logger.error(
+                            f"Failed to update chunks, details: {e.response.text}"
+                        )
+                        raise
+
+                    resp_data = resp.json()
+
+                    if "documentCount" in resp_data:
+                        chunks_updated = resp_data["documentCount"]
+                        total_chunks_updated += chunks_updated
+
+                    # Check for continuation token to handle pagination
+                    if "continuation" not in resp_data:
+                        break  # Exit loop if no continuation token
+
+                    if not resp_data["continuation"]:
+                        break  # Exit loop if continuation token is empty
+
+                    params = params.set("continuation", resp_data["continuation"])
+
+                logger.debug(
+                    f"VespaIndex.update_single: "
+                    f"index={index_name} "
+                    f"doc={normalized_doc_id} "
+                    f"chunks_updated={total_chunks_updated}"
+                )
+
+        return total_chunks_updated
+
+    def delete(self, doc_ids: list[str]) -> None:
+        logger.info(f"Deleting {len(doc_ids)} documents from Vespa")
+
+        doc_ids = [replace_invalid_doc_id_characters(doc_id) for doc_id in doc_ids]
+
+        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
+        # indexing / updates / deletes since we have to make a large volume of requests.
+        with get_vespa_http_client() as http_client:
+            index_names = [self.index_name]
+            if self.secondary_index_name:
+                index_names.append(self.secondary_index_name)
+
+            for index_name in index_names:
+                delete_vespa_docs(
+                    document_ids=doc_ids, index_name=index_name, http_client=http_client
+                )
+        return
+
+    def delete_single(self, doc_id: str) -> int:
+        """Possibly faster overall than the delete method due to using a single
+        delete call with a selection query."""
+
+        total_chunks_deleted = 0
+
+        # Vespa deletion is poorly documented ... luckily we found this
+        # https://docs.vespa.ai/en/operations/batch-delete.html#example
+
+        doc_id = replace_invalid_doc_id_characters(doc_id)
+
+        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
+        # indexing / updates / deletes since we have to make a large volume of requests.
+        index_names = [self.index_name]
+        if self.secondary_index_name:
+            index_names.append(self.secondary_index_name)
+
+        with get_vespa_http_client() as http_client:
+            for index_name in index_names:
+                params = httpx.QueryParams(
+                    {
+                        "selection": f"{index_name}.document_id=='{doc_id}'",
+                        "cluster": DOCUMENT_INDEX_NAME,
+                    }
+                )
+
+                while True:
+                    try:
+                        resp = http_client.delete(
+                            f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}",
+                            params=params,
+                        )
+                        resp.raise_for_status()
+                    except httpx.HTTPStatusError as e:
+                        logger.error(
+                            f"Failed to delete chunk, details: {e.response.text}"
+                        )
+                        raise
+
+                    resp_data = resp.json()
+
+                    if "documentCount" in resp_data:
+                        chunks_deleted = resp_data["documentCount"]
+                        total_chunks_deleted += chunks_deleted
+
+                    # Check for continuation token to handle pagination
+                    if "continuation" not in resp_data:
+                        break  # Exit loop if no continuation token
+
+                    if not resp_data["continuation"]:
+                        break  # Exit loop if continuation token is empty
+
+                    params = params.set("continuation", resp_data["continuation"])
+
+                logger.debug(
+                    f"VespaIndex.delete_single: "
+                    f"index={index_name} "
+                    f"doc={doc_id} "
+                    f"chunks_deleted={total_chunks_deleted}"
+                )
+
+        return total_chunks_deleted
+
+    def id_based_retrieval(
+        self,
+        chunk_requests: list[VespaChunkRequest],
+        filters: IndexFilters,
+        batch_retrieval: bool = False,
+        get_large_chunks: bool = False,
+    ) -> list[InferenceChunkUncleaned]:
+        if batch_retrieval:
+            return batch_search_api_retrieval(
+                index_name=self.index_name,
+                chunk_requests=chunk_requests,
+                filters=filters,
+                get_large_chunks=get_large_chunks,
+            )
+        return parallel_visit_api_retrieval(
+            index_name=self.index_name,
+            chunk_requests=chunk_requests,
+            filters=filters,
+            get_large_chunks=get_large_chunks,
+        )
+
+    def hybrid_retrieval(
+        self,
+        query: str,
+        query_embedding: Embedding,
+        final_keywords: list[str] | None,
+        filters: IndexFilters,
+        hybrid_alpha: float,
+        time_decay_multiplier: float,
+        num_to_retrieve: int,
+        offset: int = 0,
+        title_content_ratio: float | None = TITLE_CONTENT_RATIO,
+    ) -> list[InferenceChunkUncleaned]:
+        vespa_where_clauses = build_vespa_filters(filters)
+        # Needs to be at least as much as the value set in Vespa schema config
+        target_hits = max(10 * num_to_retrieve, 1000)
+        yql = (
+            YQL_BASE.format(index_name=self.index_name)
+            + vespa_where_clauses
+            + f"(({{targetHits: {target_hits}}}nearestNeighbor(embeddings, query_embedding)) "
+            + f"or ({{targetHits: {target_hits}}}nearestNeighbor(title_embedding, query_embedding)) "
+            + 'or ({grammar: "weakAnd"}userInput(@query)) '
+            + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
+        )
+
+        final_query = " ".join(final_keywords) if final_keywords else query
+
+        logger.debug(f"Query YQL: {yql}")
+
+        params: dict[str, str | int | float] = {
+            "yql": yql,
+            "query": final_query,
+            "input.query(query_embedding)": str(query_embedding),
+            "input.query(decay_factor)": str(DOC_TIME_DECAY * time_decay_multiplier),
+            "input.query(alpha)": hybrid_alpha,
+            "input.query(title_content_ratio)": title_content_ratio
+            if title_content_ratio is not None
+            else TITLE_CONTENT_RATIO,
+            "hits": num_to_retrieve,
+            "offset": offset,
+            "ranking.profile": f"hybrid_search{len(query_embedding)}",
+            "timeout": VESPA_TIMEOUT,
+        }
+
+        return query_vespa(params)
+
+    def admin_retrieval(
+        self,
+        query: str,
+        filters: IndexFilters,
+        num_to_retrieve: int = NUM_RETURNED_HITS,
+        offset: int = 0,
+    ) -> list[InferenceChunkUncleaned]:
+        vespa_where_clauses = build_vespa_filters(filters, include_hidden=True)
+        yql = (
+            YQL_BASE.format(index_name=self.index_name)
+            + vespa_where_clauses
+            + '({grammar: "weakAnd"}userInput(@query) '
+            # `({defaultIndex: "content_summary"}userInput(@query))` section is
+            # needed for highlighting while the N-gram highlighting is broken /
+            # not working as desired
+            + f'or ({{defaultIndex: "{CONTENT_SUMMARY}"}}userInput(@query)))'
+        )
+
+        params: dict[str, str | int] = {
+            "yql": yql,
+            "query": query,
+            "hits": num_to_retrieve,
+            "offset": 0,
+            "ranking.profile": "admin_search",
+            "timeout": VESPA_TIMEOUT,
+        }
+
+        return query_vespa(params)
+
+    @classmethod
+    def delete_entries_by_tenant_id(cls, tenant_id: str, index_name: str) -> None:
+        """
+        Deletes all entries in the specified index with the given tenant_id.
+
+        Parameters:
+            tenant_id (str): The tenant ID whose documents are to be deleted.
+            index_name (str): The name of the index from which to delete documents.
+        """
+        logger.info(
+            f"Deleting entries with tenant_id: {tenant_id} from index: {index_name}"
+        )
+
+        # Step 1: Retrieve all document IDs with the given tenant_id
+        document_ids = cls._get_all_document_ids_by_tenant_id(tenant_id, index_name)
+
+        if not document_ids:
+            logger.info(
+                f"No documents found with tenant_id: {tenant_id} in index: {index_name}"
+            )
+            return
+
+        # Step 2: Delete documents in batches
+        delete_requests = [
+            _VespaDeleteRequest(document_id=doc_id, index_name=index_name)
+            for doc_id in document_ids
+        ]
+
+        cls._apply_deletes_batched(delete_requests)
+
+    @classmethod
+    def _get_all_document_ids_by_tenant_id(
+        cls, tenant_id: str, index_name: str
+    ) -> List[str]:
+        """
+        Retrieves all document IDs with the specified tenant_id, handling pagination.
+
+        Parameters:
+            tenant_id (str): The tenant ID to search for.
+            index_name (str): The name of the index to search in.
+
+        Returns:
+            List[str]: A list of document IDs matching the tenant_id.
+        """
+        offset = 0
+        limit = 1000  # Vespa's maximum hits per query
+        document_ids = []
+
+        logger.debug(
+            f"Starting document ID retrieval for tenant_id: {tenant_id} in index: {index_name}"
+        )
+
+        while True:
+            # Construct the query to fetch document IDs
+            query_params = {
+                "yql": f'select id from sources * where tenant_id contains "{tenant_id}";',
+                "offset": str(offset),
+                "hits": str(limit),
+                "timeout": "10s",
+                "format": "json",
+                "summary": "id",
+            }
+
+            url = f"{VESPA_APPLICATION_ENDPOINT}/search/"
+
+            logger.debug(
+                f"Querying for document IDs with tenant_id: {tenant_id}, offset: {offset}"
+            )
+
+            with get_vespa_http_client(no_timeout=True) as http_client:
+                response = http_client.get(url, params=query_params)
+                response.raise_for_status()
+
+                search_result = response.json()
+                hits = search_result.get("root", {}).get("children", [])
+
+                if not hits:
+                    break
+
+                for hit in hits:
+                    doc_id = hit.get("id")
+                    if doc_id:
+                        document_ids.append(doc_id)
+
+                offset += limit  # Move to the next page
+
+        logger.debug(
+            f"Retrieved {len(document_ids)} document IDs for tenant_id: {tenant_id}"
+        )
+        return document_ids
+
+    @classmethod
+    def _apply_deletes_batched(
+        cls,
+        delete_requests: List["_VespaDeleteRequest"],
+        batch_size: int = BATCH_SIZE,
+    ) -> None:
+        """
+        Deletes documents in batches using multiple threads.
+
+        Parameters:
+            delete_requests (List[_VespaDeleteRequest]): The list of delete requests.
+            batch_size (int): The number of documents to delete in each batch.
+        """
+
+        def _delete_document(
+            delete_request: "_VespaDeleteRequest", http_client: httpx.Client
+        ) -> None:
+            logger.debug(f"Deleting document with ID {delete_request.document_id}")
+            response = http_client.delete(
+                delete_request.url,
+                headers={"Content-Type": "application/json"},
+            )
+            response.raise_for_status()
+
+        logger.debug(f"Starting batch deletion for {len(delete_requests)} documents")
+
+        with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
+            with get_vespa_http_client(no_timeout=True) as http_client:
+                for batch_start in range(0, len(delete_requests), batch_size):
+                    batch = delete_requests[batch_start : batch_start + batch_size]
+
+                    future_to_document_id = {
+                        executor.submit(
+                            _delete_document,
+                            delete_request,
+                            http_client,
+                        ): delete_request.document_id
+                        for delete_request in batch
+                    }
+
+                    for future in concurrent.futures.as_completed(
+                        future_to_document_id
+                    ):
+                        doc_id = future_to_document_id[future]
+                        try:
+                            future.result()
+                            logger.debug(f"Successfully deleted document: {doc_id}")
+                        except httpx.HTTPError as e:
+                            logger.error(f"Failed to delete document {doc_id}: {e}")
+                            # Optionally, implement retry logic or error handling here
+
+        logger.info("Batch deletion completed")
+
+
+class _VespaDeleteRequest:
+    def __init__(self, document_id: str, index_name: str) -> None:
+        self.document_id = document_id
+        # Encode the document ID to ensure it's safe for use in the URL
+        encoded_doc_id = urllib.parse.quote_plus(self.document_id)
+        self.url = (
+            f"{VESPA_APPLICATION_ENDPOINT}/document/v1/"
+            f"{index_name}/{index_name}/docid/{encoded_doc_id}"
+        )
--- a/backend/onyx/document_index/vespa/indexing_utils.py
+++ b/backend/onyx/document_index/vespa/indexing_utils.py
@ -0,0 +1,250 @@
+import concurrent.futures
+import json
+from datetime import datetime
+from datetime import timezone
+from http import HTTPStatus
+
+import httpx
+from retry import retry
+
+from onyx.connectors.cross_connector_utils.miscellaneous_utils import (
+    get_experts_stores_representations,
+)
+from onyx.document_index.document_index_utils import get_uuid_from_chunk
+from onyx.document_index.vespa.shared_utils.utils import remove_invalid_unicode_chars
+from onyx.document_index.vespa.shared_utils.utils import (
+    replace_invalid_doc_id_characters,
+)
+from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
+from onyx.document_index.vespa_constants import BLURB
+from onyx.document_index.vespa_constants import BOOST
+from onyx.document_index.vespa_constants import CHUNK_ID
+from onyx.document_index.vespa_constants import CONTENT
+from onyx.document_index.vespa_constants import CONTENT_SUMMARY
+from onyx.document_index.vespa_constants import DOC_UPDATED_AT
+from onyx.document_index.vespa_constants import DOCUMENT_ID
+from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
+from onyx.document_index.vespa_constants import DOCUMENT_SETS
+from onyx.document_index.vespa_constants import EMBEDDINGS
+from onyx.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
+from onyx.document_index.vespa_constants import METADATA
+from onyx.document_index.vespa_constants import METADATA_LIST
+from onyx.document_index.vespa_constants import METADATA_SUFFIX
+from onyx.document_index.vespa_constants import NUM_THREADS
+from onyx.document_index.vespa_constants import PRIMARY_OWNERS
+from onyx.document_index.vespa_constants import SECONDARY_OWNERS
+from onyx.document_index.vespa_constants import SECTION_CONTINUATION
+from onyx.document_index.vespa_constants import SEMANTIC_IDENTIFIER
+from onyx.document_index.vespa_constants import SKIP_TITLE_EMBEDDING
+from onyx.document_index.vespa_constants import SOURCE_LINKS
+from onyx.document_index.vespa_constants import SOURCE_TYPE
+from onyx.document_index.vespa_constants import TENANT_ID
+from onyx.document_index.vespa_constants import TITLE
+from onyx.document_index.vespa_constants import TITLE_EMBEDDING
+from onyx.indexing.models import DocMetadataAwareIndexChunk
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+@retry(tries=3, delay=1, backoff=2)
+def _does_document_exist(
+    doc_chunk_id: str,
+    index_name: str,
+    http_client: httpx.Client,
+) -> bool:
+    """Returns whether the document already exists and the users/group whitelists
+    Specifically in this case, document refers to a vespa document which is equivalent to a Onyx
+    chunk. This checks for whether the chunk exists already in the index"""
+    doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}"
+    doc_fetch_response = http_client.get(doc_url)
+    if doc_fetch_response.status_code == 404:
+        return False
+
+    if doc_fetch_response.status_code != 200:
+        logger.debug(f"Failed to check for document with URL {doc_url}")
+        raise RuntimeError(
+            f"Unexpected fetch document by ID value from Vespa "
+            f"with error {doc_fetch_response.status_code}"
+            f"Index name: {index_name}"
+            f"Doc chunk id: {doc_chunk_id}"
+        )
+    return True
+
+
+def _vespa_get_updated_at_attribute(t: datetime | None) -> int | None:
+    if not t:
+        return None
+
+    if t.tzinfo != timezone.utc:
+        raise ValueError("Connectors must provide document update time in UTC")
+
+    return int(t.timestamp())
+
+
+def get_existing_documents_from_chunks(
+    chunks: list[DocMetadataAwareIndexChunk],
+    index_name: str,
+    http_client: httpx.Client,
+    executor: concurrent.futures.ThreadPoolExecutor | None = None,
+) -> set[str]:
+    external_executor = True
+
+    if not executor:
+        external_executor = False
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)
+
+    document_ids: set[str] = set()
+    try:
+        chunk_existence_future = {
+            executor.submit(
+                _does_document_exist,
+                str(get_uuid_from_chunk(chunk)),
+                index_name,
+                http_client,
+            ): chunk
+            for chunk in chunks
+        }
+        for future in concurrent.futures.as_completed(chunk_existence_future):
+            chunk = chunk_existence_future[future]
+            chunk_already_existed = future.result()
+            if chunk_already_existed:
+                document_ids.add(chunk.source_document.id)
+
+    finally:
+        if not external_executor:
+            executor.shutdown(wait=True)
+
+    return document_ids
+
+
+@retry(tries=5, delay=1, backoff=2)
+def _index_vespa_chunk(
+    chunk: DocMetadataAwareIndexChunk,
+    index_name: str,
+    http_client: httpx.Client,
+    multitenant: bool,
+) -> None:
+    json_header = {
+        "Content-Type": "application/json",
+    }
+    document = chunk.source_document
+
+    # No minichunk documents in vespa, minichunk vectors are stored in the chunk itself
+    vespa_chunk_id = str(get_uuid_from_chunk(chunk))
+    embeddings = chunk.embeddings
+
+    embeddings_name_vector_map = {"full_chunk": embeddings.full_embedding}
+
+    if embeddings.mini_chunk_embeddings:
+        for ind, m_c_embed in enumerate(embeddings.mini_chunk_embeddings):
+            embeddings_name_vector_map[f"mini_chunk_{ind}"] = m_c_embed
+
+    title = document.get_title_for_document_index()
+
+    vespa_document_fields = {
+        DOCUMENT_ID: document.id,
+        CHUNK_ID: chunk.chunk_id,
+        BLURB: remove_invalid_unicode_chars(chunk.blurb),
+        TITLE: remove_invalid_unicode_chars(title) if title else None,
+        SKIP_TITLE_EMBEDDING: not title,
+        # For the BM25 index, the keyword suffix is used, the vector is already generated with the more
+        # natural language representation of the metadata section
+        CONTENT: remove_invalid_unicode_chars(
+            f"{chunk.title_prefix}{chunk.content}{chunk.metadata_suffix_keyword}"
+        ),
+        # This duplication of `content` is needed for keyword highlighting
+        # Note that it's not exactly the same as the actual content
+        # which contains the title prefix and metadata suffix
+        CONTENT_SUMMARY: remove_invalid_unicode_chars(chunk.content),
+        SOURCE_TYPE: str(document.source.value),
+        SOURCE_LINKS: json.dumps(chunk.source_links),
+        SEMANTIC_IDENTIFIER: remove_invalid_unicode_chars(document.semantic_identifier),
+        SECTION_CONTINUATION: chunk.section_continuation,
+        LARGE_CHUNK_REFERENCE_IDS: chunk.large_chunk_reference_ids,
+        METADATA: json.dumps(document.metadata),
+        # Save as a list for efficient extraction as an Attribute
+        METADATA_LIST: chunk.source_document.get_metadata_str_attributes(),
+        METADATA_SUFFIX: chunk.metadata_suffix_keyword,
+        EMBEDDINGS: embeddings_name_vector_map,
+        TITLE_EMBEDDING: chunk.title_embedding,
+        DOC_UPDATED_AT: _vespa_get_updated_at_attribute(document.doc_updated_at),
+        PRIMARY_OWNERS: get_experts_stores_representations(document.primary_owners),
+        SECONDARY_OWNERS: get_experts_stores_representations(document.secondary_owners),
+        # the only `set` vespa has is `weightedset`, so we have to give each
+        # element an arbitrary weight
+        # rkuo: acl, docset and boost metadata are also updated through the metadata sync queue
+        # which only calls VespaIndex.update
+        ACCESS_CONTROL_LIST: {acl_entry: 1 for acl_entry in chunk.access.to_acl()},
+        DOCUMENT_SETS: {document_set: 1 for document_set in chunk.document_sets},
+        BOOST: chunk.boost,
+    }
+
+    if multitenant:
+        if chunk.tenant_id:
+            vespa_document_fields[TENANT_ID] = chunk.tenant_id
+
+    vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}"
+    logger.debug(f'Indexing to URL "{vespa_url}"')
+    res = http_client.post(
+        vespa_url, headers=json_header, json={"fields": vespa_document_fields}
+    )
+    try:
+        res.raise_for_status()
+    except Exception as e:
+        logger.exception(
+            f"Failed to index document: '{document.id}'. Got response: '{res.text}'"
+        )
+        if isinstance(e, httpx.HTTPStatusError):
+            if e.response.status_code == HTTPStatus.INSUFFICIENT_STORAGE:
+                logger.error(
+                    "NOTE: HTTP Status 507 Insufficient Storage usually means "
+                    "you need to allocate more memory or disk space to the "
+                    "Vespa/index container."
+                )
+
+        raise e
+
+
+def batch_index_vespa_chunks(
+    chunks: list[DocMetadataAwareIndexChunk],
+    index_name: str,
+    http_client: httpx.Client,
+    multitenant: bool,
+    executor: concurrent.futures.ThreadPoolExecutor | None = None,
+) -> None:
+    external_executor = True
+
+    if not executor:
+        external_executor = False
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS)
+
+    try:
+        chunk_index_future = {
+            executor.submit(
+                _index_vespa_chunk, chunk, index_name, http_client, multitenant
+            ): chunk
+            for chunk in chunks
+        }
+        for future in concurrent.futures.as_completed(chunk_index_future):
+            # Will raise exception if any indexing raised an exception
+            future.result()
+
+    finally:
+        if not external_executor:
+            executor.shutdown(wait=True)
+
+
+def clean_chunk_id_copy(
+    chunk: DocMetadataAwareIndexChunk,
+) -> DocMetadataAwareIndexChunk:
+    clean_chunk = chunk.copy(
+        update={
+            "source_document": chunk.source_document.copy(
+                update={
+                    "id": replace_invalid_doc_id_characters(chunk.source_document.id)
+                }
+            )
+        }
+    )
+    return clean_chunk
--- a/backend/onyx/document_index/vespa/shared_utils/utils.py
+++ b/backend/onyx/document_index/vespa/shared_utils/utils.py
@ -0,0 +1,71 @@
+import re
+from typing import cast
+
+import httpx
+
+from onyx.configs.app_configs import MANAGED_VESPA
+from onyx.configs.app_configs import VESPA_CLOUD_CERT_PATH
+from onyx.configs.app_configs import VESPA_CLOUD_KEY_PATH
+from onyx.configs.app_configs import VESPA_REQUEST_TIMEOUT
+
+# NOTE: This does not seem to be used in reality despite the Vespa Docs pointing to this code
+# See here for reference: https://docs.vespa.ai/en/documents.html
+# https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java
+
+# Define allowed ASCII characters
+ALLOWED_ASCII_CHARS: list[bool] = [False] * 0x80
+ALLOWED_ASCII_CHARS[0x9] = True  # tab
+ALLOWED_ASCII_CHARS[0xA] = True  # newline
+ALLOWED_ASCII_CHARS[0xD] = True  # carriage return
+for i in range(0x20, 0x7F):
+    ALLOWED_ASCII_CHARS[i] = True  # printable ASCII chars
+ALLOWED_ASCII_CHARS[0x7F] = True  # del - discouraged, but allowed
+
+
+def is_text_character(codepoint: int) -> bool:
+    """Returns whether the given codepoint is a valid text character."""
+    if codepoint < 0x80:
+        return ALLOWED_ASCII_CHARS[codepoint]
+    if codepoint < 0xD800:
+        return True
+    if codepoint <= 0xDFFF:
+        return False
+    if codepoint < 0xFDD0:
+        return True
+    if codepoint <= 0xFDEF:
+        return False
+    if codepoint >= 0x10FFFE:
+        return False
+    return (codepoint & 0xFFFF) < 0xFFFE
+
+
+def replace_invalid_doc_id_characters(text: str) -> str:
+    """Replaces invalid document ID characters in text."""
+    # There may be a more complete set of replacements that need to be made but Vespa docs are unclear
+    # and users only seem to be running into this error with single quotes
+    return text.replace("'", "_")
+
+
+def remove_invalid_unicode_chars(text: str) -> str:
+    """Vespa does not take in unicode chars that aren't valid for XML.
+    This removes them."""
+    _illegal_xml_chars_RE: re.Pattern = re.compile(
+        "[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]"
+    )
+    return _illegal_xml_chars_RE.sub("", text)
+
+
+def get_vespa_http_client(no_timeout: bool = False) -> httpx.Client:
+    """
+    Configure and return an HTTP client for communicating with Vespa,
+    including authentication if needed.
+    """
+
+    return httpx.Client(
+        cert=cast(tuple[str, str], (VESPA_CLOUD_CERT_PATH, VESPA_CLOUD_KEY_PATH))
+        if MANAGED_VESPA
+        else None,
+        verify=False if not MANAGED_VESPA else True,
+        timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT,
+        http2=True,
+    )
--- a/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
+++ b/backend/onyx/document_index/vespa/shared_utils/vespa_request_builders.py
@ -0,0 +1,100 @@
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
+
+from onyx.configs.constants import INDEX_SEPARATOR
+from onyx.context.search.models import IndexFilters
+from onyx.document_index.interfaces import VespaChunkRequest
+from onyx.document_index.vespa_constants import ACCESS_CONTROL_LIST
+from onyx.document_index.vespa_constants import CHUNK_ID
+from onyx.document_index.vespa_constants import DOC_UPDATED_AT
+from onyx.document_index.vespa_constants import DOCUMENT_ID
+from onyx.document_index.vespa_constants import DOCUMENT_SETS
+from onyx.document_index.vespa_constants import HIDDEN
+from onyx.document_index.vespa_constants import METADATA_LIST
+from onyx.document_index.vespa_constants import SOURCE_TYPE
+from onyx.document_index.vespa_constants import TENANT_ID
+from onyx.utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def build_vespa_filters(filters: IndexFilters, include_hidden: bool = False) -> str:
+    def _build_or_filters(key: str, vals: list[str] | None) -> str:
+        if vals is None:
+            return ""
+
+        valid_vals = [val for val in vals if val]
+        if not key or not valid_vals:
+            return ""
+
+        eq_elems = [f'{key} contains "{elem}"' for elem in valid_vals]
+        or_clause = " or ".join(eq_elems)
+        return f"({or_clause}) and "
+
+    def _build_time_filter(
+        cutoff: datetime | None,
+        # Slightly over 3 Months, approximately 1 fiscal quarter
+        untimed_doc_cutoff: timedelta = timedelta(days=92),
+    ) -> str:
+        if not cutoff:
+            return ""
+
+        # For Documents that don't have an updated at, filter them out for queries asking for
+        # very recent documents (3 months) default. Documents that don't have an updated at
+        # time are assigned 3 months for time decay value
+        include_untimed = datetime.now(timezone.utc) - untimed_doc_cutoff > cutoff
+        cutoff_secs = int(cutoff.timestamp())
+
+        if include_untimed:
+            # Documents without updated_at are assigned -1 as their date
+            return f"!({DOC_UPDATED_AT} < {cutoff_secs}) and "
+
+        return f"({DOC_UPDATED_AT} >= {cutoff_secs}) and "
+
+    filter_str = f"!({HIDDEN}=true) and " if not include_hidden else ""
+
+    if filters.tenant_id:
+        filter_str += f'({TENANT_ID} contains "{filters.tenant_id}") and '
+
+    # CAREFUL touching this one, currently there is no second ACL double-check post retrieval
+    if filters.access_control_list is not None:
+        filter_str += _build_or_filters(
+            ACCESS_CONTROL_LIST, filters.access_control_list
+        )
+
+    source_strs = (
+        [s.value for s in filters.source_type] if filters.source_type else None
+    )
+    filter_str += _build_or_filters(SOURCE_TYPE, source_strs)
+
+    tag_attributes = None
+    tags = filters.tags
+    if tags:
+        tag_attributes = [tag.tag_key + INDEX_SEPARATOR + tag.tag_value for tag in tags]
+    filter_str += _build_or_filters(METADATA_LIST, tag_attributes)
+
+    filter_str += _build_or_filters(DOCUMENT_SETS, filters.document_set)
+
+    filter_str += _build_time_filter(filters.time_cutoff)
+
+    return filter_str
+
+
+def build_vespa_id_based_retrieval_yql(
+    chunk_request: VespaChunkRequest,
+) -> str:
+    id_based_retrieval_yql_section = (
+        f'({DOCUMENT_ID} contains "{chunk_request.document_id}"'
+    )
+
+    if chunk_request.is_capped:
+        id_based_retrieval_yql_section += (
+            f" and {CHUNK_ID} >= {chunk_request.min_chunk_ind or 0}"
+        )
+        id_based_retrieval_yql_section += (
+            f" and {CHUNK_ID} <= {chunk_request.max_chunk_ind}"
+        )
+
+    id_based_retrieval_yql_section += ")"
+    return id_based_retrieval_yql_section
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@ -0,0 +1,104 @@
+from onyx.configs.app_configs import VESPA_CLOUD_URL
+from onyx.configs.app_configs import VESPA_CONFIG_SERVER_HOST
+from onyx.configs.app_configs import VESPA_HOST
+from onyx.configs.app_configs import VESPA_PORT
+from onyx.configs.app_configs import VESPA_TENANT_PORT
+from onyx.configs.constants import SOURCE_TYPE
+
+VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM"
+DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME"
+DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT"
+SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER"
+DATE_REPLACEMENT = "DATE_REPLACEMENT"
+SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER"
+TENANT_ID_PAT = "TENANT_ID_REPLACEMENT"
+
+TENANT_ID_REPLACEMENT = """field tenant_id type string {
+            indexing: summary | attribute
+            rank: filter
+            attribute: fast-search
+        }"""
+# config server
+
+
+VESPA_CONFIG_SERVER_URL = (
+    VESPA_CLOUD_URL or f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}"
+)
+VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2"
+
+# main search application
+VESPA_APP_CONTAINER_URL = VESPA_CLOUD_URL or f"http://{VESPA_HOST}:{VESPA_PORT}"
+
+
+# danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd
+DOCUMENT_ID_ENDPOINT = (
+    f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid"
+)
+
+SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/"
+
+NUM_THREADS = (
+    32  # since Vespa doesn't allow batching of inserts / updates, we use threads
+)
+MAX_ID_SEARCH_QUERY_SIZE = 400
+# Suspect that adding too many "or" conditions will cause Vespa to timeout and return
+# an empty list of hits (with no error status and coverage: 0 and degraded)
+MAX_OR_CONDITIONS = 10
+# up from 500ms for now, since we've seen quite a few timeouts
+# in the long term, we are looking to improve the performance of Vespa
+# so that we can bring this back to default
+VESPA_TIMEOUT = "3s"
+BATCH_SIZE = 128  # Specific to Vespa
+
+TENANT_ID = "tenant_id"
+DOCUMENT_ID = "document_id"
+CHUNK_ID = "chunk_id"
+BLURB = "blurb"
+CONTENT = "content"
+SOURCE_LINKS = "source_links"
+SEMANTIC_IDENTIFIER = "semantic_identifier"
+TITLE = "title"
+SKIP_TITLE_EMBEDDING = "skip_title"
+SECTION_CONTINUATION = "section_continuation"
+EMBEDDINGS = "embeddings"
+TITLE_EMBEDDING = "title_embedding"
+ACCESS_CONTROL_LIST = "access_control_list"
+DOCUMENT_SETS = "document_sets"
+LARGE_CHUNK_REFERENCE_IDS = "large_chunk_reference_ids"
+METADATA = "metadata"
+METADATA_LIST = "metadata_list"
+METADATA_SUFFIX = "metadata_suffix"
+BOOST = "boost"
+DOC_UPDATED_AT = "doc_updated_at"  # Indexed as seconds since epoch
+PRIMARY_OWNERS = "primary_owners"
+SECONDARY_OWNERS = "secondary_owners"
+RECENCY_BIAS = "recency_bias"
+HIDDEN = "hidden"
+
+# Specific to Vespa, needed for highlighting matching keywords / section
+CONTENT_SUMMARY = "content_summary"
+
+
+YQL_BASE = (
+    f"select "
+    f"documentid, "
+    f"{DOCUMENT_ID}, "
+    f"{CHUNK_ID}, "
+    f"{BLURB}, "
+    f"{CONTENT}, "
+    f"{SOURCE_TYPE}, "
+    f"{SOURCE_LINKS}, "
+    f"{SEMANTIC_IDENTIFIER}, "
+    f"{TITLE}, "
+    f"{SECTION_CONTINUATION}, "
+    f"{BOOST}, "
+    f"{HIDDEN}, "
+    f"{DOC_UPDATED_AT}, "
+    f"{PRIMARY_OWNERS}, "
+    f"{SECONDARY_OWNERS}, "
+    f"{LARGE_CHUNK_REFERENCE_IDS}, "
+    f"{METADATA}, "
+    f"{METADATA_SUFFIX}, "
+    f"{CONTENT_SUMMARY} "
+    f"from {{index_name}} where "
+)