Fix Model Server (#2191)

This commit is contained in:
Yuhong Sun 2024-08-20 17:57:09 -07:00 committed by GitHub
parent 53387ab3eb
commit 3b035d791e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 12 additions and 6 deletions

View File

@ -22,14 +22,18 @@ RUN apt-get remove -y --allow-remove-essential perl-base && \
# Download model weights
# Run Nomic to pull in the custom architecture and have it cached locally
RUN python -c "from transformers import AutoTokenizer; \
AutoTokenizer.from_pretrained('distilbert-base-uncased', cache_folder='/root/.cache/temp_huggingface/hub/'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_folder='/root/.cache/temp_huggingface/hub/'); \
AutoTokenizer.from_pretrained('distilbert-base-uncased'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from huggingface_hub import snapshot_download; \
snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download('nomic-ai/nomic-embed-text-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1', cache_dir='/root/.cache/temp_huggingface/hub/'); \
snapshot_download(repo_id='danswer/hybrid-intent-token-classifier', revision='v1.0.3'); \
snapshot_download('nomic-ai/nomic-embed-text-v1'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
from sentence_transformers import SentenceTransformer; \
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True, cache_folder='/root/.cache/temp_huggingface/hub/');"
SentenceTransformer(model_name_or_path='nomic-ai/nomic-embed-text-v1', trust_remote_code=True);"
# In case the user has volumes mounted to /root/.cache/huggingface that they've downloaded while
# running Danswer, don't overwrite it with the built in cache folder
RUN mv /root/.cache/huggingface /root/.cache/temp_huggingface
WORKDIR /app

View File

@ -40,10 +40,12 @@ def get_local_intent_model(
if _INTENT_MODEL is None:
try:
# Calculate where the cache should be, then load from local if available
logger.notice(f"Loading model from local cache: {model_name_or_path}")
local_path = snapshot_download(
repo_id=model_name_or_path, revision=tag, local_files_only=True
)
_INTENT_MODEL = HybridClassifier.from_pretrained(local_path)
logger.notice(f"Loaded model from local cache: {local_path}")
except Exception as e:
logger.warning(f"Failed to load model directly: {e}")
try: