Vespa Degraded Handling (#2304)

This commit is contained in:
Yuhong Sun 2024-09-02 15:53:37 -07:00 committed by GitHub
parent abe01144ca
commit 812ca69949
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 8 deletions

View File

@ -12,8 +12,8 @@ from sqlalchemy.sql import table, column
revision = "a3795dce87be" revision = "a3795dce87be"
down_revision = "1f60f60c3401" down_revision = "1f60f60c3401"
branch_labels = None branch_labels: None = None
depends_on = None depends_on: None = None
def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]: def extract_confluence_keys_from_url(wiki_url: str) -> tuple[str, str, str, bool]:

View File

@ -12,8 +12,8 @@ import sqlalchemy as sa
# revision identifiers, used by Alembic. # revision identifiers, used by Alembic.
revision = "bceb1e139447" revision = "bceb1e139447"
down_revision = "a3795dce87be" down_revision = "a3795dce87be"
branch_labels = None branch_labels: None = None
depends_on = None depends_on: None = None
def upgrade() -> None: def upgrade() -> None:

View File

@ -30,6 +30,7 @@ from danswer.document_index.vespa_constants import DOCUMENT_ID_ENDPOINT
from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import HIDDEN
from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS from danswer.document_index.vespa_constants import LARGE_CHUNK_REFERENCE_IDS
from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE from danswer.document_index.vespa_constants import MAX_ID_SEARCH_QUERY_SIZE
from danswer.document_index.vespa_constants import MAX_OR_CONDITIONS
from danswer.document_index.vespa_constants import METADATA from danswer.document_index.vespa_constants import METADATA
from danswer.document_index.vespa_constants import METADATA_SUFFIX from danswer.document_index.vespa_constants import METADATA_SUFFIX
from danswer.document_index.vespa_constants import PRIMARY_OWNERS from danswer.document_index.vespa_constants import PRIMARY_OWNERS
@ -384,7 +385,7 @@ def batch_search_api_retrieval(
capped_requests: list[VespaChunkRequest] = [] capped_requests: list[VespaChunkRequest] = []
uncapped_requests: list[VespaChunkRequest] = [] uncapped_requests: list[VespaChunkRequest] = []
chunk_count = 0 chunk_count = 0
for request in chunk_requests: for req_ind, request in enumerate(chunk_requests, start=1):
# All requests without a chunk range are uncapped # All requests without a chunk range are uncapped
# Uncapped requests are retrieved using the Visit API # Uncapped requests are retrieved using the Visit API
range = request.range range = request.range
@ -392,9 +393,10 @@ def batch_search_api_retrieval(
uncapped_requests.append(request) uncapped_requests.append(request)
continue continue
# If adding the range to the chunk count is greater than the if (
# max query size, we need to perform a retrieval to avoid hitting the limit chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE
if chunk_count + range > MAX_ID_SEARCH_QUERY_SIZE: or req_ind % MAX_OR_CONDITIONS == 0
):
retrieved_chunks.extend( retrieved_chunks.extend(
_get_chunks_via_batch_search( _get_chunks_via_batch_search(
index_name=index_name, index_name=index_name,

View File

@ -25,6 +25,9 @@ NUM_THREADS = (
32 # since Vespa doesn't allow batching of inserts / updates, we use threads 32 # since Vespa doesn't allow batching of inserts / updates, we use threads
) )
MAX_ID_SEARCH_QUERY_SIZE = 400 MAX_ID_SEARCH_QUERY_SIZE = 400
# Suspect that adding too many "or" conditions will cause Vespa to timeout and return
# an empty list of hits (with no error status and coverage: 0 and degraded)
MAX_OR_CONDITIONS = 10
# up from 500ms for now, since we've seen quite a few timeouts # up from 500ms for now, since we've seen quite a few timeouts
# in the long term, we are looking to improve the performance of Vespa # in the long term, we are looking to improve the performance of Vespa
# so that we can bring this back to default # so that we can bring this back to default

View File

@ -272,6 +272,11 @@ class SearchPipeline:
(chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks (chunk.document_id, chunk.chunk_id): chunk for chunk in inference_chunks
} }
# In case of failed parallel calls to Vespa, at least we should have the initial retrieved chunks
doc_chunk_ind_to_chunk.update(
{(chunk.document_id, chunk.chunk_id): chunk for chunk in retrieved_chunks}
)
# Build the surroundings for all of the initial retrieved chunks # Build the surroundings for all of the initial retrieved chunks
for chunk in retrieved_chunks: for chunk in retrieved_chunks:
start_ind = max(0, chunk.chunk_id - above) start_ind = max(0, chunk.chunk_id - above)