Pre download models (#1354)

This commit is contained in:
Yuhong Sun
2024-04-19 21:52:53 -07:00
committed by GitHub
parent 87f304dfd0
commit 58545ccf3a
2 changed files with 19 additions and 0 deletions

View File

@@ -42,6 +42,15 @@ RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cma
rm -rf /var/lib/apt/lists/* && \
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('intfloat/e5-base-v2')"
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('wordnet', quiet=True); \
nltk.download('punkt', quiet=True);"
# Set up application files
WORKDIR /app
COPY ./danswer /app/danswer