diff --git a/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py b/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py index 27fab1188..29316adb1 100644 --- a/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py +++ b/backend/alembic/versions/4b08d97e175a_change_default_prune_freq.py @@ -10,8 +10,8 @@ from alembic import op # revision identifiers, used by Alembic. revision = "4b08d97e175a" down_revision = "d9ec13955951" -branch_labels = None -depends_on = None +branch_labels: None = None +depends_on: None = None def upgrade() -> None: diff --git a/backend/danswer/indexing/models.py b/backend/danswer/indexing/models.py index 6357056ea..e60cb588b 100644 --- a/backend/danswer/indexing/models.py +++ b/backend/danswer/indexing/models.py @@ -109,8 +109,6 @@ class EmbeddingModelDetail(BaseModel): embedding_model: "EmbeddingModel", ) -> "EmbeddingModelDetail": return cls( - # When constructing EmbeddingModel Detail for user-facing flows, strip the - # unneeded additional data after the `_`s model_name=embedding_model.model_name, model_dim=embedding_model.model_dim, normalize=embedding_model.normalize, diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index a5ae704dd..a9a445e34 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -1,4 +1,5 @@ import re +import threading import time from collections.abc import Callable from functools import wraps @@ -304,6 +305,7 @@ def warm_up_bi_encoder( embedding_model: DBEmbeddingModel, model_server_host: str = MODEL_SERVER_HOST, model_server_port: int = MODEL_SERVER_PORT, + non_blocking: bool = False, ) -> None: model_name = embedding_model.model_name normalize = embedding_model.normalize @@ -327,12 +329,26 @@ def warm_up_bi_encoder( api_key=None, ) - retry_encode = warm_up_retry(embed_model.encode) - retry_encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) + def _warm_up() -> None: + try: + embed_model.encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) + logger.debug(f"Warm-up complete for encoder model: {model_name}") + except Exception as e: + logger.warning( + f"Warm-up request failed for encoder model {model_name}: {e}" + ) + + if non_blocking: + threading.Thread(target=_warm_up, daemon=True).start() + logger.debug(f"Started non-blocking warm-up for encoder model: {model_name}") + else: + retry_encode = warm_up_retry(embed_model.encode) + retry_encode(texts=[warm_up_str], text_type=EmbedTextType.QUERY) def warm_up_cross_encoder( rerank_model_name: str, + non_blocking: bool = False, ) -> None: logger.debug(f"Warming up reranking model: {rerank_model_name}") @@ -342,5 +358,20 @@ def warm_up_cross_encoder( api_key=None, ) - retry_rerank = warm_up_retry(reranking_model.predict) - retry_rerank(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:]) + def _warm_up() -> None: + try: + reranking_model.predict(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:]) + logger.debug(f"Warm-up complete for reranking model: {rerank_model_name}") + except Exception as e: + logger.warning( + f"Warm-up request failed for reranking model {rerank_model_name}: {e}" + ) + + if non_blocking: + threading.Thread(target=_warm_up, daemon=True).start() + logger.debug( + f"Started non-blocking warm-up for reranking model: {rerank_model_name}" + ) + else: + retry_rerank = warm_up_retry(reranking_model.predict) + retry_rerank(WARM_UP_STRINGS[0], WARM_UP_STRINGS[1:]) diff --git a/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx index c156dad5c..ff3f9c7b9 100644 --- a/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx +++ b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx @@ -311,11 +311,11 @@ export default function EmbeddingForm() { > <>