add vespa + embedding timeout env variables (#2689)

* add vespa + embedding timeout env variables

* nit: integration test

* add dangerous override

* k

* add additional clarity

* nit

* nit
This commit is contained in:
pablodanswer
2024-10-08 20:20:28 -07:00
committed by GitHub
parent 10f221cd37
commit d5b9a6e552
8 changed files with 27 additions and 6 deletions

View File

@@ -401,6 +401,9 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]") os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
) )
VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "5")
SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")
##### #####
# Enterprise Edition Configs # Enterprise Edition Configs

View File

@@ -15,6 +15,7 @@ import httpx
import requests import requests
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT
from danswer.configs.chat_configs import DOC_TIME_DECAY from danswer.configs.chat_configs import DOC_TIME_DECAY
from danswer.configs.chat_configs import NUM_RETURNED_HITS from danswer.configs.chat_configs import NUM_RETURNED_HITS
from danswer.configs.chat_configs import TITLE_CONTENT_RATIO from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
@@ -211,7 +212,7 @@ class VespaIndex(DocumentIndex):
# indexing / updates / deletes since we have to make a large volume of requests. # indexing / updates / deletes since we have to make a large volume of requests.
with ( with (
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
httpx.Client(http2=True) as http_client, httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
): ):
# Check for existing documents, existing documents need to have all of their chunks deleted # Check for existing documents, existing documents need to have all of their chunks deleted
# prior to indexing as the document size (num chunks) may have shrunk # prior to indexing as the document size (num chunks) may have shrunk
@@ -275,7 +276,7 @@ class VespaIndex(DocumentIndex):
# indexing / updates / deletes since we have to make a large volume of requests. # indexing / updates / deletes since we have to make a large volume of requests.
with ( with (
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
httpx.Client(http2=True) as http_client, httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
): ):
for update_batch in batch_generator(updates, batch_size): for update_batch in batch_generator(updates, batch_size):
future_to_document_id = { future_to_document_id = {
@@ -419,7 +420,7 @@ class VespaIndex(DocumentIndex):
if self.secondary_index_name: if self.secondary_index_name:
index_names.append(self.secondary_index_name) index_names.append(self.secondary_index_name)
with httpx.Client(http2=True) as http_client: with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
for index_name in index_names: for index_name in index_names:
params = httpx.QueryParams( params = httpx.QueryParams(
{ {
@@ -475,7 +476,7 @@ class VespaIndex(DocumentIndex):
# NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
# indexing / updates / deletes since we have to make a large volume of requests. # indexing / updates / deletes since we have to make a large volume of requests.
with httpx.Client(http2=True) as http_client: with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
index_names = [self.index_name] index_names = [self.index_name]
if self.secondary_index_name: if self.secondary_index_name:
index_names.append(self.secondary_index_name) index_names.append(self.secondary_index_name)
@@ -503,7 +504,7 @@ class VespaIndex(DocumentIndex):
if self.secondary_index_name: if self.secondary_index_name:
index_names.append(self.secondary_index_name) index_names.append(self.secondary_index_name)
with httpx.Client(http2=True) as http_client: with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
for index_name in index_names: for index_name in index_names:
params = httpx.QueryParams( params = httpx.QueryParams(
{ {

View File

@@ -27,6 +27,7 @@ CHUNK_OVERLAP = 0
MAX_METADATA_PERCENTAGE = 0.25 MAX_METADATA_PERCENTAGE = 0.25
CHUNK_MIN_CONTENT = 256 CHUNK_MIN_CONTENT = 256
logger = setup_logger() logger = setup_logger()

View File

@@ -1,3 +1,4 @@
import sys
import traceback import traceback
from collections.abc import AsyncGenerator from collections.abc import AsyncGenerator
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
@@ -32,6 +33,7 @@ from danswer.configs.app_configs import OAUTH_CLIENT_ID
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
from danswer.configs.app_configs import SYSTEM_RECURSION_LIMIT
from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import USER_AUTH_SECRET
from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.app_configs import WEB_DOMAIN
from danswer.configs.constants import AuthType from danswer.configs.constants import AuthType
@@ -140,6 +142,11 @@ def include_router_with_global_prefix_prepended(
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator: async def lifespan(app: FastAPI) -> AsyncGenerator:
# Set recursion limit
if SYSTEM_RECURSION_LIMIT is not None:
sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)
logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}")
SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME) SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)
SqlEngine.init_engine( SqlEngine.init_engine(
pool_size=POSTGRES_API_SERVER_POOL_SIZE, pool_size=POSTGRES_API_SERVER_POOL_SIZE,

View File

@@ -25,6 +25,7 @@ from model_server.constants import EmbeddingModelTextType
from model_server.constants import EmbeddingProvider from model_server.constants import EmbeddingProvider
from model_server.utils import simple_log_function_time from model_server.utils import simple_log_function_time
from shared_configs.configs import INDEXING_ONLY from shared_configs.configs import INDEXING_ONLY
from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
from shared_configs.enums import EmbedTextType from shared_configs.enums import EmbedTextType
from shared_configs.enums import RerankerProvider from shared_configs.enums import RerankerProvider
from shared_configs.model_server_models import Embedding from shared_configs.model_server_models import Embedding
@@ -56,7 +57,7 @@ def _initialize_client(
api_key: str, provider: EmbeddingProvider, model: str | None = None api_key: str, provider: EmbeddingProvider, model: str | None = None
) -> Any: ) -> Any:
if provider == EmbeddingProvider.OPENAI: if provider == EmbeddingProvider.OPENAI:
return openai.OpenAI(api_key=api_key) return openai.OpenAI(api_key=api_key, timeout=OPENAI_EMBEDDING_TIMEOUT)
elif provider == EmbeddingProvider.COHERE: elif provider == EmbeddingProvider.COHERE:
return CohereClient(api_key=api_key) return CohereClient(api_key=api_key)
elif provider == EmbeddingProvider.VOYAGE: elif provider == EmbeddingProvider.VOYAGE:

View File

@@ -60,6 +60,9 @@ DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true
# notset, debug, info, notice, warning, error, or critical # notset, debug, info, notice, warning, error, or critical
LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice") LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice")
# Only used for OpenAI
OPENAI_EMBEDDING_TIMEOUT = int(os.environ.get("OPENAI_EMBEDDING_TIMEOUT", "600"))
# Fields which should only be set on new search setting # Fields which should only be set on new search setting
PRESERVED_SEARCH_FIELDS = [ PRESERVED_SEARCH_FIELDS = [

View File

@@ -281,6 +281,7 @@ services:
- INDEXING_ONLY=True - INDEXING_ONLY=True
# Set to debug to get more fine-grained logs # Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info} - LOG_LEVEL=${LOG_LEVEL:-info}
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
volumes: volumes:
# Not necessary, this is just to reduce download time during startup # Not necessary, this is just to reduce download time during startup
- indexing_huggingface_model_cache:/root/.cache/huggingface/ - indexing_huggingface_model_cache:/root/.cache/huggingface/

View File

@@ -70,6 +70,9 @@ services:
- DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-}
# We do not recommend changing this value
- SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-}
# Leave this on pretty please? Nothing sensitive is collected! # Leave this on pretty please? Nothing sensitive is collected!
# https://docs.danswer.dev/more/telemetry # https://docs.danswer.dev/more/telemetry
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
@@ -252,6 +255,7 @@ services:
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
# Set to debug to get more fine-grained logs # Set to debug to get more fine-grained logs
- LOG_LEVEL=${LOG_LEVEL:-info} - LOG_LEVEL=${LOG_LEVEL:-info}
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
volumes: volumes:
# Not necessary, this is just to reduce download time during startup # Not necessary, this is just to reduce download time during startup
- model_cache_huggingface:/root/.cache/huggingface/ - model_cache_huggingface:/root/.cache/huggingface/