Fix Model Server (#2191)

2025-10-10 21:26:01 +02:00 · 2024-08-20 17:57:09 -07:00
parent 53387ab3eb
commit 3b035d791e
2 changed files with 12 additions and 6 deletions
--- a/backend/Dockerfile.model_server
+++ b/backend/Dockerfile.model_server
@@ -22,14 +22,18 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \
 # Download model weights
 # Run Nomic to pull in the custom architecture and have it cached locally
 RUN python -c "from transformers import AutoTokenizer; \
-AutoTokenizer.from_pretrained('distilbert-base-uncased', cache_folder='/root/.cache/temp_huggingface/hub/'); \
+AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
-AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_folder='/root/.cache/temp_huggingface/hub/'); \
+AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3', cache_dir='/root/.cache/temp_huggingface/hub/'); \
+snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
-snapshot_download('nomic-ai/nomic-embed-text-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
+snapshot_download('nomic-ai/nomic-embed-text-v1'); \
-snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
+snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
 from sentence_transformers import SentenceTransformer; \
-SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True, cache_folder='/root/.cache/temp_huggingface/hub/');"
+SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
 # In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
 # running Danswer, don't overwrite it with the built in cache folder
 RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
 WORKDIR /app
--- a/backend/model_server/custom_models.py
+++ b/backend/model_server/custom_models.py
@@ -40,10 +40,12 @@ def get_local_intent_model(
    if _INTENT_MODEL is None:
        try:
            # Calculate where the cache should be, then load from local if available
            logger.notice(f"Loading model from local cache: {model_name_or_path}")
            local_path = snapshot_download(
                repo_id=model_name_or_path, revision=tag, local_files_only=True
            )
            _INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
            logger.notice(f"Loaded model from local cache: {local_path}")
        except Exception as e:
            logger.warning(f"Failed to load model directly: {e}")
            try: