diff --git a/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py b/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py index ad23892a4..20e33d0e2 100644 --- a/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py +++ b/backend/alembic/versions/a3795dce87be_migration_confluence_to_be_explicit.py @@ -12,8 +12,8 @@ from sqlalchemy.sql import table, column revision = "a3795dce87be" down_revision = "1f60f60c3401" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]: diff --git a/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py b/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py index 38bf07be5..968500e6a 100644 --- a/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py +++ b/backend/alembic/versions/bceb1e139447_add_base_url_to_cloudembeddingprovider.py @@ -12,8 +12,8 @@ import sqlalchemy as sa # revision identifiers, used by Alembic. revision = "bceb1e139447" down_revision = "a3795dce87be" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py index 753db1f5e..e4b2ad83c 100644 --- a/backend/danswer/document_index/vespa/chunk_retrieval.py +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -30,6 +30,7 @@ from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE +from danswer.document_index.vespa_constants import MAX_OR_CONDITIONS from danswer.document_index.vespa_constants import METADATA from danswer.document_index.vespa_constants import METADATA_SUFFIX from danswer.document_index.vespa_constants import PRIMARY_OWNERS @@ -384,7 +385,7 @@ def batch_search_api_retrieval( capped_requests: list[VespaChunkRequest] = [] uncapped_requests: list[VespaChunkRequest] = [] chunk_count = 0 - for request in chunk_requests: + for req_ind, request in enumerate(chunk_requests, start=1): # All requests without a chunk range are uncapped # Uncapped requests are retrieved using the Visit API range = request.range @@ -392,9 +393,10 @@ def batch_search_api_retrieval( uncapped_requests.append(request) continue - # If adding the range to the chunk count is greater than the - # max query size, we need to perform a retrieval to avoid hitting the limit - if chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE: + if ( + chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE + or req_ind % MAX_OR_CONDITIONS == 0 + ): retrieved_chunks.extend( _get_chunks_via_batch_search( index_name=index_name, diff --git a/backend/danswer/document_index/vespa_constants.py b/backend/danswer/document_index/vespa_constants.py index 0b8949b42..07d2f3f74 100644 --- a/backend/danswer/document_index/vespa_constants.py +++ b/backend/danswer/document_index/vespa_constants.py @@ -25,6 +25,9 @@ NUM_THREADS = ( 32 # since Vespa doesn't allow batching of inserts / updates, we use threads ) MAX_ID_SEARCH_QUERY_SIZE = 400 +# Suspect that adding too many "or" conditions will cause Vespa to timeout and return +# an empty list of hits (with no error status and coverage: 0 and degraded) +MAX_OR_CONDITIONS = 10 # up from 500ms for now, since we've seen quite a few timeouts # in the long term, we are looking to improve the performance of Vespa # so that we can bring this back to default diff --git a/backend/danswer/search/pipeline.py b/backend/danswer/search/pipeline.py index 2b2ce0c92..183c8729d 100644 --- a/backend/danswer/search/pipeline.py +++ b/backend/danswer/search/pipeline.py @@ -272,6 +272,11 @@ class SearchPipeline: (chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks } + # In case of failed parallel calls to Vespa, at least we should have the initial retrieved chunks + doc_chunk_ind_to_chunk.update( + {(chunk.document_id, chunk.chunk_id): chunk for chunk in retrieved_chunks} + ) + # Build the surroundings for all of the initial retrieved chunks for chunk in retrieved_chunks: start_ind = max(0, chunk.chunk_id - above)