Added env var to skip warm up (#3633)

2025-09-22 17:16:20 +02:00 · 2025-01-08 14:29:15 -08:00
parent d7bc32c0ec
commit 0f9842064f
3 changed files with 7 additions and 0 deletions
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -5,6 +5,8 @@
 # For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled

+# Skip warm up for dev
+SKIP_WARM_UP=True

 # Always keep these on for Dev
 # Logs all model prompts to stdout
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -17,6 +17,7 @@ APP_PORT = 8080
 # prefix from requests directed towards the API server. In these cases, set this to `/api`
 APP_API_PREFIX = os.environ.get("API_PREFIX", "")

+SKIP_WARM_UP = os.environ.get("SKIP_WARM_UP", "").lower() == "true"

 #####
 # User Facing Features Configs
--- a/backend/onyx/natural_language_processing/search_nlp_models.py
+++ b/backend/onyx/natural_language_processing/search_nlp_models.py
@@ -12,6 +12,7 @@ from requests import Response
 from retry import retry

 from onyx.configs.app_configs import LARGE_CHUNK_RATIO
+from onyx.configs.app_configs import SKIP_WARM_UP
 from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
 from onyx.configs.model_configs import (
    BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,
@@ -384,6 +385,9 @@ def warm_up_bi_encoder(
    embedding_model: EmbeddingModel,
    non_blocking: bool = False,
 ) -> None:
+    if SKIP_WARM_UP:
+        return
+
    warm_up_str = " ".join(WARM_UP_STRINGS)

    logger.debug(f"Warming up encoder model: {embedding_model.model_name}")