Pre download models (#1354)

This commit is contained in:
Yuhong Sun 2024-04-19 21:52:53 -07:00 committed by GitHub
parent 87f304dfd0
commit 58545ccf3a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 0 deletions

View File

@ -42,6 +42,15 @@ RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cma
rm -rf /var/lib/apt/lists/* && \
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('wordnet', quiet=True); \
nltk.download('punkt', quiet=True);"
# Set up application files
WORKDIR /app
COPY ./danswer /app/danswer

View File

@ -17,6 +17,16 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
RUN apt-get remove -y --allow-remove-essential perl-base && \
apt-get autoremove -y
# Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \
from huggingface_hub import snapshot_download; \
AutoTokenizer.from_pretrained('danswer/intent-model'); \
AutoTokenizer.from_pretrained('intfloat/e5-base-v2'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
snapshot_download('danswer/intent-model'); \
snapshot_download('intfloat/e5-base-v2'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')"
WORKDIR /app
# Utils used by model server