add vespa + embedding timeout env variables (#2689)

* add vespa + embedding timeout env variables * nit: integration test * add dangerous override * k * add additional clarity * nit * nit
2025-07-28 13:53:28 +02:00 · 2024-10-08 20:20:28 -07:00
parent 10f221cd37
commit d5b9a6e552
8 changed files with 27 additions and 6 deletions
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -401,6 +401,9 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
    os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
 )
 VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "5")
 SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")
 #####
 # Enterprise Edition Configs
--- a/backend/danswer/document_index/vespa/index.py
+++ b/backend/danswer/document_index/vespa/index.py
@@ -15,6 +15,7 @@ import httpx
 import requests
 from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
 from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT
 from danswer.configs.chat_configs import DOC_TIME_DECAY
 from danswer.configs.chat_configs import NUM_RETURNED_HITS
 from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -211,7 +212,7 @@ class VespaIndex(DocumentIndex):
        # indexing / updates / deletes since we have to make a large volume of requests.
        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            httpx.Client(http2=True) as http_client,
+            httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
        ):
            # Check for existing documents, existing documents need to have all of their chunks deleted
            # prior to indexing as the document size (num chunks) may have shrunk
@@ -275,7 +276,7 @@ class VespaIndex(DocumentIndex):
        # indexing / updates / deletes since we have to make a large volume of requests.
        with (
            concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
-            httpx.Client(http2=True) as http_client,
+            httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
        ):
            for update_batch in batch_generator(updates, batch_size):
                future_to_document_id = {
@@ -419,7 +420,7 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)
-        with httpx.Client(http2=True) as http_client:
+        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
            for index_name in index_names:
                params = httpx.QueryParams(
                    {
@@ -475,7 +476,7 @@ class VespaIndex(DocumentIndex):
        # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
        # indexing / updates / deletes since we have to make a large volume of requests.
-        with httpx.Client(http2=True) as http_client:
+        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
            index_names = [self.index_name]
            if self.secondary_index_name:
                index_names.append(self.secondary_index_name)
@@ -503,7 +504,7 @@ class VespaIndex(DocumentIndex):
        if self.secondary_index_name:
            index_names.append(self.secondary_index_name)
-        with httpx.Client(http2=True) as http_client:
+        with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
            for index_name in index_names:
                params = httpx.QueryParams(
                    {
--- a/backend/danswer/indexing/chunker.py
+++ b/backend/danswer/indexing/chunker.py
@@ -27,6 +27,7 @@ CHUNK_OVERLAP = 0
 MAX_METADATA_PERCENTAGE = 0.25
 CHUNK_MIN_CONTENT = 256
 logger = setup_logger()
--- a/backend/danswer/main.py
+++ b/backend/danswer/main.py
@@ -1,3 +1,4 @@
 import sys
 import traceback
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
@@ -32,6 +33,7 @@ from danswer.configs.app_configs import OAUTH_CLIENT_ID
 from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
 from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
 from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
 from danswer.configs.app_configs import SYSTEM_RECURSION_LIMIT
 from danswer.configs.app_configs import USER_AUTH_SECRET
 from danswer.configs.app_configs import WEB_DOMAIN
 from danswer.configs.constants import AuthType
@@ -140,6 +142,11 @@ def include_router_with_global_prefix_prepended(
@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator:
    # Set recursion limit
    if SYSTEM_RECURSION_LIMIT is not None:
        sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)
        logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}")
    SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)
    SqlEngine.init_engine(
        pool_size=POSTGRES_API_SERVER_POOL_SIZE,
--- a/backend/model_server/encoders.py
+++ b/backend/model_server/encoders.py
@@ -25,6 +25,7 @@ from model_server.constants import EmbeddingModelTextType
 from model_server.constants import EmbeddingProvider
 from model_server.utils import simple_log_function_time
 from shared_configs.configs import INDEXING_ONLY
 from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
 from shared_configs.enums import EmbedTextType
 from shared_configs.enums import RerankerProvider
 from shared_configs.model_server_models import Embedding
@@ -56,7 +57,7 @@ def _initialize_client(
    api_key: str, provider: EmbeddingProvider, model: str | None = None
 ) -> Any:
    if provider == EmbeddingProvider.OPENAI:
-        return openai.OpenAI(api_key=api_key)
+        return openai.OpenAI(api_key=api_key, timeout=OPENAI_EMBEDDING_TIMEOUT)
    elif provider == EmbeddingProvider.COHERE:
        return CohereClient(api_key=api_key)
    elif provider == EmbeddingProvider.VOYAGE:
--- a/backend/shared_configs/configs.py
+++ b/backend/shared_configs/configs.py
@@ -60,6 +60,9 @@ DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true
 # notset, debug, info, notice, warning, error, or critical
 LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice")
 # Only used for OpenAI
 OPENAI_EMBEDDING_TIMEOUT = int(os.environ.get("OPENAI_EMBEDDING_TIMEOUT", "600"))
 # Fields which should only be set on new search setting
 PRESERVED_SEARCH_FIELDS = [
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -281,6 +281,7 @@ services:
      - INDEXING_ONLY=True
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - indexing_huggingface_model_cache:/root/.cache/huggingface/
--- a/deployment/docker_compose/docker-compose.gpu-dev.yml
+++ b/deployment/docker_compose/docker-compose.gpu-dev.yml
@@ -70,6 +70,9 @@ services:
      - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
      - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
      - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
      - VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-}
      # We do not recommend changing this value
      - SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-}
      # Leave this on pretty please? Nothing sensitive is collected!
      # https://docs.danswer.dev/more/telemetry
      - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
@@ -252,6 +255,7 @@ services:
      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
      # Set to debug to get more fine-grained logs
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
    volumes:
      # Not necessary, this is just to reduce download time during startup
      - model_cache_huggingface:/root/.cache/huggingface/