From 0f9842064f9e843e6d20a6c2cca2121a175abe69 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Wed, 8 Jan 2025 14:29:15 -0800 Subject: [PATCH] Added env var to skip warm up (#3633) --- .vscode/env_template.txt | 2 ++ backend/onyx/configs/app_configs.py | 1 + backend/onyx/natural_language_processing/search_nlp_models.py | 4 ++++ 3 files changed, 7 insertions(+) diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt index 89faca0abf02..98de4064f537 100644 --- a/.vscode/env_template.txt +++ b/.vscode/env_template.txt @@ -5,6 +5,8 @@ # For local dev, often user Authentication is not needed AUTH_TYPE=disabled +# Skip warm up for dev +SKIP_WARM_UP=True # Always keep these on for Dev # Logs all model prompts to stdout diff --git a/backend/onyx/configs/app_configs.py b/backend/onyx/configs/app_configs.py index c37384217b45..0d5c3f3b09db 100644 --- a/backend/onyx/configs/app_configs.py +++ b/backend/onyx/configs/app_configs.py @@ -17,6 +17,7 @@ APP_PORT = 8080 # prefix from requests directed towards the API server. In these cases, set this to `/api` APP_API_PREFIX = os.environ.get("API_PREFIX", "") +SKIP_WARM_UP = os.environ.get("SKIP_WARM_UP", "").lower() == "true" ##### # User Facing Features Configs diff --git a/backend/onyx/natural_language_processing/search_nlp_models.py b/backend/onyx/natural_language_processing/search_nlp_models.py index 4a40c6174c72..b7e54e81aff2 100644 --- a/backend/onyx/natural_language_processing/search_nlp_models.py +++ b/backend/onyx/natural_language_processing/search_nlp_models.py @@ -12,6 +12,7 @@ from requests import Response from retry import retry from onyx.configs.app_configs import LARGE_CHUNK_RATIO +from onyx.configs.app_configs import SKIP_WARM_UP from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS from onyx.configs.model_configs import ( BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES, @@ -384,6 +385,9 @@ def warm_up_bi_encoder( embedding_model: EmbeddingModel, non_blocking: bool = False, ) -> None: + if SKIP_WARM_UP: + return + warm_up_str = " ".join(WARM_UP_STRINGS) logger.debug(f"Warming up encoder model: {embedding_model.model_name}")