From 0f9842064f9e843e6d20a6c2cca2121a175abe69 Mon Sep 17 00:00:00 2001
From: hagen-danswer <hagen@danswer.ai>
Date: Wed, 8 Jan 2025 14:29:15 -0800
Subject: [PATCH] Added env var to skip warm up (#3633)

---
 .vscode/env_template.txt                                      | 2 ++
 backend/onyx/configs/app_configs.py                           | 1 +
 backend/onyx/natural_language_processing/search_nlp_models.py | 4 ++++
 3 files changed, 7 insertions(+)

diff --git a/.vscode/env_template.txt b/.vscode/env_template.txt
index 89faca0abf02..98de4064f537 100644
--- a/.vscode/env_template.txt
+++ b/.vscode/env_template.txt
@@ -5,6 +5,8 @@
 # For local dev, often user Authentication is not needed
 AUTH_TYPE=disabled
 
+# Skip warm up for dev
+SKIP_WARM_UP=True
 
 # Always keep these on for Dev
 # Logs all model prompts to stdout
diff --git a/backend/onyx/configs/app_configs.py b/backend/onyx/configs/app_configs.py
index c37384217b45..0d5c3f3b09db 100644
--- a/backend/onyx/configs/app_configs.py
+++ b/backend/onyx/configs/app_configs.py
@@ -17,6 +17,7 @@ APP_PORT = 8080
 # prefix from requests directed towards the API server. In these cases, set this to `/api`
 APP_API_PREFIX = os.environ.get("API_PREFIX", "")
 
+SKIP_WARM_UP = os.environ.get("SKIP_WARM_UP", "").lower() == "true"
 
 #####
 # User Facing Features Configs
diff --git a/backend/onyx/natural_language_processing/search_nlp_models.py b/backend/onyx/natural_language_processing/search_nlp_models.py
index 4a40c6174c72..b7e54e81aff2 100644
--- a/backend/onyx/natural_language_processing/search_nlp_models.py
+++ b/backend/onyx/natural_language_processing/search_nlp_models.py
@@ -12,6 +12,7 @@ from requests import Response
 from retry import retry
 
 from onyx.configs.app_configs import LARGE_CHUNK_RATIO
+from onyx.configs.app_configs import SKIP_WARM_UP
 from onyx.configs.model_configs import BATCH_SIZE_ENCODE_CHUNKS
 from onyx.configs.model_configs import (
     BATCH_SIZE_ENCODE_CHUNKS_FOR_API_EMBEDDING_SERVICES,
@@ -384,6 +385,9 @@ def warm_up_bi_encoder(
     embedding_model: EmbeddingModel,
     non_blocking: bool = False,
 ) -> None:
+    if SKIP_WARM_UP:
+        return
+
     warm_up_str = " ".join(WARM_UP_STRINGS)
 
     logger.debug(f"Warming up encoder model: {embedding_model.model_name}")