Add option to not re-index (#4157)

* Add option to not re-index * Add quantizaton / dimensionality override support * Fix build / ut
2025-07-03 11:11:45 +02:00 · 2025-03-03 10:54:11 -08:00
parent 39fd6919ad
commit f25e1e80f6
40 changed files with 1020 additions and 358 deletions
--- a/backend/onyx/document_index/interfaces.py
+++ b/backend/onyx/document_index/interfaces.py
@ -6,6 +6,7 @@ from typing import Any
 from onyx.access.models import DocumentAccess
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.db.enums import EmbeddingPrecision
 from onyx.indexing.models import DocMetadataAwareIndexChunk
 from shared_configs.model_server_models import Embedding

@ -145,17 +146,21 @@ class Verifiable(abc.ABC):
    @abc.abstractmethod
    def ensure_indices_exist(
        self,
-        index_embedding_dim: int,
+        primary_embedding_dim: int,
+        primary_embedding_precision: EmbeddingPrecision,
        secondary_index_embedding_dim: int | None,
+        secondary_index_embedding_precision: EmbeddingPrecision | None,
    ) -> None:
        """
        Verify that the document index exists and is consistent with the expectations in the code.

        Parameters:
-        - index_embedding_dim: Vector dimensionality for the vector similarity part of the search
+        - primary_embedding_dim: Vector dimensionality for the vector similarity part of the search
+        - primary_embedding_precision: Precision of the vector similarity part of the search
        - secondary_index_embedding_dim: Vector dimensionality of the secondary index being built
                behind the scenes. The secondary index should only be built when switching
                embedding models therefore this dim should be different from the primary index.
+        - secondary_index_embedding_precision: Precision of the vector similarity part of the secondary index
        """
        raise NotImplementedError

@ -164,6 +169,7 @@ class Verifiable(abc.ABC):
    def register_multitenant_indices(
        indices: list[str],
        embedding_dims: list[int],
+        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
        """
        Register multitenant indices with the document index.
--- a/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
+++ b/backend/onyx/document_index/vespa/app_config/schemas/danswer_chunk.sd
@ -37,7 +37,7 @@ schema DANSWER_CHUNK_NAME {
            summary: dynamic
        }
        # Title embedding (x1)
-        field title_embedding type tensor<float>(x[VARIABLE_DIM]) {
+        field title_embedding type tensor<EMBEDDING_PRECISION>(x[VARIABLE_DIM]) {
            indexing: attribute | index
            attribute {
                distance-metric: angular
@ -45,7 +45,7 @@ schema DANSWER_CHUNK_NAME {
        }
        # Content embeddings (chunk + optional mini chunks embeddings)
        # "t" and "x" are arbitrary names, not special keywords
-        field embeddings type tensor<float>(t{},x[VARIABLE_DIM]) {
+        field embeddings type tensor<EMBEDDING_PRECISION>(t{},x[VARIABLE_DIM]) {
            indexing: attribute | index
            attribute {
                distance-metric: angular
--- a/backend/onyx/document_index/vespa/app_config/validation-overrides.xml
+++ b/backend/onyx/document_index/vespa/app_config/validation-overrides.xml
@ -5,4 +5,7 @@
    <allow
        until="DATE_REPLACEMENT"
        comment="We need to be able to update the schema for updates to the Onyx schema">indexing-change</allow>
+    <allow 
+        until='DATE_REPLACEMENT'
+        comment="Prevents old alt indices from interfering with changes">field-type-change</allow>
 </validation-overrides>
--- a/backend/onyx/document_index/vespa/chunk_retrieval.py
+++ b/backend/onyx/document_index/vespa/chunk_retrieval.py
@ -310,6 +310,11 @@ def query_vespa(
            f"Request Headers: {e.request.headers}\n"
            f"Request Payload: {params}\n"
            f"Exception: {str(e)}"
+            + (
+                f"\nResponse: {e.response.text}"
+                if isinstance(e, httpx.HTTPStatusError)
+                else ""
+            )
        )
        raise httpx.HTTPError(error_base) from e

--- a/backend/onyx/document_index/vespa/index.py
+++ b/backend/onyx/document_index/vespa/index.py
@ -26,6 +26,7 @@ from onyx.configs.chat_configs import VESPA_SEARCHER_THREADS
 from onyx.configs.constants import KV_REINDEX_KEY
 from onyx.context.search.models import IndexFilters
 from onyx.context.search.models import InferenceChunkUncleaned
+from onyx.db.enums import EmbeddingPrecision
 from onyx.document_index.document_index_utils import get_document_chunk_ids
 from onyx.document_index.interfaces import DocumentIndex
 from onyx.document_index.interfaces import DocumentInsertionRecord
@ -63,6 +64,7 @@ from onyx.document_index.vespa_constants import DATE_REPLACEMENT
 from onyx.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from onyx.document_index.vespa_constants import DOCUMENT_REPLACEMENT_PAT
 from onyx.document_index.vespa_constants import DOCUMENT_SETS
+from onyx.document_index.vespa_constants import EMBEDDING_PRECISION_REPLACEMENT_PAT
 from onyx.document_index.vespa_constants import HIDDEN
 from onyx.document_index.vespa_constants import NUM_THREADS
 from onyx.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT
@ -112,6 +114,21 @@ def _create_document_xml_lines(doc_names: list[str | None] | list[str]) -> str:
    return "\n".join(doc_lines)


+def _replace_template_values_in_schema(
+    schema_template: str,
+    index_name: str,
+    embedding_dim: int,
+    embedding_precision: EmbeddingPrecision,
+) -> str:
+    return (
+        schema_template.replace(
+            EMBEDDING_PRECISION_REPLACEMENT_PAT, embedding_precision.value
+        )
+        .replace(DANSWER_CHUNK_REPLACEMENT_PAT, index_name)
+        .replace(VESPA_DIM_REPLACEMENT_PAT, str(embedding_dim))
+    )
+
+
 def add_ngrams_to_schema(schema_content: str) -> str:
    # Add the match blocks containing gram and gram-size to title and content fields
    schema_content = re.sub(
@ -163,8 +180,10 @@ class VespaIndex(DocumentIndex):

    def ensure_indices_exist(
        self,
-        index_embedding_dim: int,
+        primary_embedding_dim: int,
+        primary_embedding_precision: EmbeddingPrecision,
        secondary_index_embedding_dim: int | None,
+        secondary_index_embedding_precision: EmbeddingPrecision | None,
    ) -> None:
        if MULTI_TENANT:
            logger.info(
@ -221,18 +240,29 @@ class VespaIndex(DocumentIndex):
            schema_template = schema_f.read()
        schema_template = schema_template.replace(TENANT_ID_PAT, "")

-        schema = schema_template.replace(
-            DANSWER_CHUNK_REPLACEMENT_PAT, self.index_name
-        ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim))
+        schema = _replace_template_values_in_schema(
+            schema_template,
+            self.index_name,
+            primary_embedding_dim,
+            primary_embedding_precision,
+        )

        schema = add_ngrams_to_schema(schema) if needs_reindexing else schema
        schema = schema.replace(TENANT_ID_PAT, "")
        zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8")

        if self.secondary_index_name:
-            upcoming_schema = schema_template.replace(
-                DANSWER_CHUNK_REPLACEMENT_PAT, self.secondary_index_name
-            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(secondary_index_embedding_dim))
+            if secondary_index_embedding_dim is None:
+                raise ValueError("Secondary index embedding dimension is required")
+            if secondary_index_embedding_precision is None:
+                raise ValueError("Secondary index embedding precision is required")
+
+            upcoming_schema = _replace_template_values_in_schema(
+                schema_template,
+                self.secondary_index_name,
+                secondary_index_embedding_dim,
+                secondary_index_embedding_precision,
+            )
            zip_dict[f"schemas/{schema_names[1]}.sd"] = upcoming_schema.encode("utf-8")

        zip_file = in_memory_zip_from_file_bytes(zip_dict)
@ -251,6 +281,7 @@ class VespaIndex(DocumentIndex):
    def register_multitenant_indices(
        indices: list[str],
        embedding_dims: list[int],
+        embedding_precisions: list[EmbeddingPrecision],
    ) -> None:
        if not MULTI_TENANT:
            raise ValueError("Multi-tenant is not enabled")
@ -309,13 +340,14 @@ class VespaIndex(DocumentIndex):

        for i, index_name in enumerate(indices):
            embedding_dim = embedding_dims[i]
+            embedding_precision = embedding_precisions[i]
            logger.info(
                f"Creating index: {index_name} with embedding dimension: {embedding_dim}"
            )

-            schema = schema_template.replace(
-                DANSWER_CHUNK_REPLACEMENT_PAT, index_name
-            ).replace(VESPA_DIM_REPLACEMENT_PAT, str(embedding_dim))
+            schema = _replace_template_values_in_schema(
+                schema_template, index_name, embedding_dim, embedding_precision
+            )
            schema = schema.replace(
                TENANT_ID_PAT, TENANT_ID_REPLACEMENT if MULTI_TENANT else ""
            )
--- a/backend/onyx/document_index/vespa_constants.py
+++ b/backend/onyx/document_index/vespa_constants.py
@ -6,6 +6,7 @@ from onyx.configs.app_configs import VESPA_TENANT_PORT
 from onyx.configs.constants import SOURCE_TYPE

 VESPA_DIM_REPLACEMENT_PAT = "VARIABLE_DIM"
+EMBEDDING_PRECISION_REPLACEMENT_PAT = "EMBEDDING_PRECISION"
 DANSWER_CHUNK_REPLACEMENT_PAT = "DANSWER_CHUNK_NAME"
 DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT"
 SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER"