Vespa Degraded Handling (#2304)

2025-05-04 00:40:44 +02:00 · 2024-09-02 15:53:37 -07:00 · 2024-09-02 15:53:37 -07:00 · 812ca69949
commit 812ca69949
parent abe01144ca
5 changed files with 18 additions and 8 deletions
--- a/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py
+++ b/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py
@ -12,8 +12,8 @@ from sqlalchemy.sql import table, column

 revision = "a3795dce87be"
 down_revision = "1f60f60c3401"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]:
--- a/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py
+++ b/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py
@ -12,8 +12,8 @@ import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "bceb1e139447"
 down_revision = "a3795dce87be"
-branch_labels = None
-depends_on = None
+branch_labels: None = None
+depends_on: None = None


 def upgrade() -> None:
--- a/backend/danswer/document_index/vespa/chunk_retrieval.py
+++ b/backend/danswer/document_index/vespa/chunk_retrieval.py
@ -30,6 +30,7 @@ from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
 from danswer.document_index.vespa_constants import HIDDEN
 from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
 from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
+from danswer.document_index.vespa_constants import MAX_OR_CONDITIONS
 from danswer.document_index.vespa_constants import METADATA
 from danswer.document_index.vespa_constants import METADATA_SUFFIX
 from danswer.document_index.vespa_constants import PRIMARY_OWNERS
@ -384,7 +385,7 @@ def batch_search_api_retrieval(
    capped_requests: list[VespaChunkRequest] = []
    uncapped_requests: list[VespaChunkRequest] = []
    chunk_count = 0
-    for request in chunk_requests:
+    for req_ind, request in enumerate(chunk_requests, start=1):
        # All requests without a chunk range are uncapped
        # Uncapped requests are retrieved using the Visit API
        range = request.range
@ -392,9 +393,10 @@ def batch_search_api_retrieval(
            uncapped_requests.append(request)
            continue

-        # If adding the range to the chunk count is greater than the
-        # max query size, we need to perform a retrieval to avoid hitting the limit
-        if chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE:
+        if (
+            chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE
+            or req_ind % MAX_OR_CONDITIONS == 0
+        ):
            retrieved_chunks.extend(
                _get_chunks_via_batch_search(
                    index_name=index_name,
--- a/backend/danswer/document_index/vespa_constants.py
+++ b/backend/danswer/document_index/vespa_constants.py
@ -25,6 +25,9 @@ NUM_THREADS = (
    32  # since Vespa doesn't allow batching of inserts / updates, we use threads
 )
 MAX_ID_SEARCH_QUERY_SIZE = 400
+# Suspect that adding too many "or" conditions will cause Vespa to timeout and return
+# an empty list of hits (with no error status and coverage: 0 and degraded)
+MAX_OR_CONDITIONS = 10
 # up from 500ms for now, since we've seen quite a few timeouts
 # in the long term, we are looking to improve the performance of Vespa
 # so that we can bring this back to default
--- a/backend/danswer/search/pipeline.py
+++ b/backend/danswer/search/pipeline.py
@ -272,6 +272,11 @@ class SearchPipeline:
            (chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks
        }

+        # In case of failed parallel calls to Vespa, at least we should have the initial retrieved chunks
+        doc_chunk_ind_to_chunk.update(
+            {(chunk.document_id, chunk.chunk_id): chunk for chunk in retrieved_chunks}
+        )
+
        # Build the surroundings for all of the initial retrieved chunks
        for chunk in retrieved_chunks:
            start_ind = max(0, chunk.chunk_id - above)