mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-07-28 13:53:28 +02:00
add vespa + embedding timeout env variables (#2689)
* add vespa + embedding timeout env variables * nit: integration test * add dangerous override * k * add additional clarity * nit * nit
This commit is contained in:
@@ -401,6 +401,9 @@ CUSTOM_ANSWER_VALIDITY_CONDITIONS = json.loads(
|
|||||||
os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
|
os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "5")
|
||||||
|
|
||||||
|
SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000")
|
||||||
|
|
||||||
#####
|
#####
|
||||||
# Enterprise Edition Configs
|
# Enterprise Edition Configs
|
||||||
|
@@ -15,6 +15,7 @@ import httpx
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
|
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
|
||||||
|
from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT
|
||||||
from danswer.configs.chat_configs import DOC_TIME_DECAY
|
from danswer.configs.chat_configs import DOC_TIME_DECAY
|
||||||
from danswer.configs.chat_configs import NUM_RETURNED_HITS
|
from danswer.configs.chat_configs import NUM_RETURNED_HITS
|
||||||
from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
|
from danswer.configs.chat_configs import TITLE_CONTENT_RATIO
|
||||||
@@ -211,7 +212,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
# indexing / updates / deletes since we have to make a large volume of requests.
|
# indexing / updates / deletes since we have to make a large volume of requests.
|
||||||
with (
|
with (
|
||||||
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
|
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
|
||||||
httpx.Client(http2=True) as http_client,
|
httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
|
||||||
):
|
):
|
||||||
# Check for existing documents, existing documents need to have all of their chunks deleted
|
# Check for existing documents, existing documents need to have all of their chunks deleted
|
||||||
# prior to indexing as the document size (num chunks) may have shrunk
|
# prior to indexing as the document size (num chunks) may have shrunk
|
||||||
@@ -275,7 +276,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
# indexing / updates / deletes since we have to make a large volume of requests.
|
# indexing / updates / deletes since we have to make a large volume of requests.
|
||||||
with (
|
with (
|
||||||
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
|
concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor,
|
||||||
httpx.Client(http2=True) as http_client,
|
httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client,
|
||||||
):
|
):
|
||||||
for update_batch in batch_generator(updates, batch_size):
|
for update_batch in batch_generator(updates, batch_size):
|
||||||
future_to_document_id = {
|
future_to_document_id = {
|
||||||
@@ -419,7 +420,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
if self.secondary_index_name:
|
if self.secondary_index_name:
|
||||||
index_names.append(self.secondary_index_name)
|
index_names.append(self.secondary_index_name)
|
||||||
|
|
||||||
with httpx.Client(http2=True) as http_client:
|
with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
|
||||||
for index_name in index_names:
|
for index_name in index_names:
|
||||||
params = httpx.QueryParams(
|
params = httpx.QueryParams(
|
||||||
{
|
{
|
||||||
@@ -475,7 +476,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
|
|
||||||
# NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
|
# NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for
|
||||||
# indexing / updates / deletes since we have to make a large volume of requests.
|
# indexing / updates / deletes since we have to make a large volume of requests.
|
||||||
with httpx.Client(http2=True) as http_client:
|
with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
|
||||||
index_names = [self.index_name]
|
index_names = [self.index_name]
|
||||||
if self.secondary_index_name:
|
if self.secondary_index_name:
|
||||||
index_names.append(self.secondary_index_name)
|
index_names.append(self.secondary_index_name)
|
||||||
@@ -503,7 +504,7 @@ class VespaIndex(DocumentIndex):
|
|||||||
if self.secondary_index_name:
|
if self.secondary_index_name:
|
||||||
index_names.append(self.secondary_index_name)
|
index_names.append(self.secondary_index_name)
|
||||||
|
|
||||||
with httpx.Client(http2=True) as http_client:
|
with httpx.Client(http2=True, timeout=VESPA_REQUEST_TIMEOUT) as http_client:
|
||||||
for index_name in index_names:
|
for index_name in index_names:
|
||||||
params = httpx.QueryParams(
|
params = httpx.QueryParams(
|
||||||
{
|
{
|
||||||
|
@@ -27,6 +27,7 @@ CHUNK_OVERLAP = 0
|
|||||||
MAX_METADATA_PERCENTAGE = 0.25
|
MAX_METADATA_PERCENTAGE = 0.25
|
||||||
CHUNK_MIN_CONTENT = 256
|
CHUNK_MIN_CONTENT = 256
|
||||||
|
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
from collections.abc import AsyncGenerator
|
from collections.abc import AsyncGenerator
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
@@ -32,6 +33,7 @@ from danswer.configs.app_configs import OAUTH_CLIENT_ID
|
|||||||
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
|
from danswer.configs.app_configs import OAUTH_CLIENT_SECRET
|
||||||
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
|
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW
|
||||||
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
|
from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE
|
||||||
|
from danswer.configs.app_configs import SYSTEM_RECURSION_LIMIT
|
||||||
from danswer.configs.app_configs import USER_AUTH_SECRET
|
from danswer.configs.app_configs import USER_AUTH_SECRET
|
||||||
from danswer.configs.app_configs import WEB_DOMAIN
|
from danswer.configs.app_configs import WEB_DOMAIN
|
||||||
from danswer.configs.constants import AuthType
|
from danswer.configs.constants import AuthType
|
||||||
@@ -140,6 +142,11 @@ def include_router_with_global_prefix_prepended(
|
|||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
async def lifespan(app: FastAPI) -> AsyncGenerator:
|
||||||
|
# Set recursion limit
|
||||||
|
if SYSTEM_RECURSION_LIMIT is not None:
|
||||||
|
sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT)
|
||||||
|
logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}")
|
||||||
|
|
||||||
SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)
|
SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME)
|
||||||
SqlEngine.init_engine(
|
SqlEngine.init_engine(
|
||||||
pool_size=POSTGRES_API_SERVER_POOL_SIZE,
|
pool_size=POSTGRES_API_SERVER_POOL_SIZE,
|
||||||
|
@@ -25,6 +25,7 @@ from model_server.constants import EmbeddingModelTextType
|
|||||||
from model_server.constants import EmbeddingProvider
|
from model_server.constants import EmbeddingProvider
|
||||||
from model_server.utils import simple_log_function_time
|
from model_server.utils import simple_log_function_time
|
||||||
from shared_configs.configs import INDEXING_ONLY
|
from shared_configs.configs import INDEXING_ONLY
|
||||||
|
from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT
|
||||||
from shared_configs.enums import EmbedTextType
|
from shared_configs.enums import EmbedTextType
|
||||||
from shared_configs.enums import RerankerProvider
|
from shared_configs.enums import RerankerProvider
|
||||||
from shared_configs.model_server_models import Embedding
|
from shared_configs.model_server_models import Embedding
|
||||||
@@ -56,7 +57,7 @@ def _initialize_client(
|
|||||||
api_key: str, provider: EmbeddingProvider, model: str | None = None
|
api_key: str, provider: EmbeddingProvider, model: str | None = None
|
||||||
) -> Any:
|
) -> Any:
|
||||||
if provider == EmbeddingProvider.OPENAI:
|
if provider == EmbeddingProvider.OPENAI:
|
||||||
return openai.OpenAI(api_key=api_key)
|
return openai.OpenAI(api_key=api_key, timeout=OPENAI_EMBEDDING_TIMEOUT)
|
||||||
elif provider == EmbeddingProvider.COHERE:
|
elif provider == EmbeddingProvider.COHERE:
|
||||||
return CohereClient(api_key=api_key)
|
return CohereClient(api_key=api_key)
|
||||||
elif provider == EmbeddingProvider.VOYAGE:
|
elif provider == EmbeddingProvider.VOYAGE:
|
||||||
|
@@ -60,6 +60,9 @@ DEV_LOGGING_ENABLED = os.environ.get("DEV_LOGGING_ENABLED", "").lower() == "true
|
|||||||
# notset, debug, info, notice, warning, error, or critical
|
# notset, debug, info, notice, warning, error, or critical
|
||||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice")
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice")
|
||||||
|
|
||||||
|
# Only used for OpenAI
|
||||||
|
OPENAI_EMBEDDING_TIMEOUT = int(os.environ.get("OPENAI_EMBEDDING_TIMEOUT", "600"))
|
||||||
|
|
||||||
|
|
||||||
# Fields which should only be set on new search setting
|
# Fields which should only be set on new search setting
|
||||||
PRESERVED_SEARCH_FIELDS = [
|
PRESERVED_SEARCH_FIELDS = [
|
||||||
|
@@ -281,6 +281,7 @@ services:
|
|||||||
- INDEXING_ONLY=True
|
- INDEXING_ONLY=True
|
||||||
# Set to debug to get more fine-grained logs
|
# Set to debug to get more fine-grained logs
|
||||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||||
|
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
|
||||||
volumes:
|
volumes:
|
||||||
# Not necessary, this is just to reduce download time during startup
|
# Not necessary, this is just to reduce download time during startup
|
||||||
- indexing_huggingface_model_cache:/root/.cache/huggingface/
|
- indexing_huggingface_model_cache:/root/.cache/huggingface/
|
||||||
|
@@ -70,6 +70,9 @@ services:
|
|||||||
- DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
|
- DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-}
|
||||||
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
|
||||||
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
|
||||||
|
- VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-}
|
||||||
|
# We do not recommend changing this value
|
||||||
|
- SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-}
|
||||||
# Leave this on pretty please? Nothing sensitive is collected!
|
# Leave this on pretty please? Nothing sensitive is collected!
|
||||||
# https://docs.danswer.dev/more/telemetry
|
# https://docs.danswer.dev/more/telemetry
|
||||||
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
|
- DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-}
|
||||||
@@ -252,6 +255,7 @@ services:
|
|||||||
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
|
- MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
|
||||||
# Set to debug to get more fine-grained logs
|
# Set to debug to get more fine-grained logs
|
||||||
- LOG_LEVEL=${LOG_LEVEL:-info}
|
- LOG_LEVEL=${LOG_LEVEL:-info}
|
||||||
|
- CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-}
|
||||||
volumes:
|
volumes:
|
||||||
# Not necessary, this is just to reduce download time during startup
|
# Not necessary, this is just to reduce download time during startup
|
||||||
- model_cache_huggingface:/root/.cache/huggingface/
|
- model_cache_huggingface:/root/.cache/huggingface/
|
||||||
|
Reference in New Issue
Block a user