danswer/backend/Dockerfile

109 lines
3.7 KiB
Docker
Raw Permalink Normal View History

FROM python:3.11.7-slim-bookworm
2023-04-28 22:40:46 -07:00
2024-04-13 12:10:46 -07:00
LABEL com.danswer.maintainer="founders@danswer.ai"
2024-02-15 14:45:00 -08:00
LABEL com.danswer.description="This image is the web/frontend container of Danswer which \
contains code for both the Community and Enterprise editions of Danswer. If you do not \
have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
Edition features outside of personal development or testing purposes. Please reach out to \
founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer"
2024-04-13 12:10:46 -07:00
2023-11-03 18:37:01 -07:00
# Default DANSWER_VERSION, typically overriden during builds by GitHub Actions.
2023-12-29 21:14:08 -08:00
ARG DANSWER_VERSION=0.3-dev
2024-09-13 11:27:20 -07:00
ENV DANSWER_VERSION=${DANSWER_VERSION} \
DANSWER_RUNNING_IN_DOCKER="true"
RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}"
# Install system dependencies
2023-12-29 20:42:16 -08:00
# cmake needed for psycopg (postgres)
# libpq-dev needed for psycopg (postgres)
# curl included just for users' convenience
# zip for Vespa step futher down
# ca-certificates for HTTPS
RUN apt-get update && \
2024-02-15 14:45:00 -08:00
apt-get install -y \
cmake \
curl \
zip \
ca-certificates \
2024-06-30 13:51:51 -07:00
libgnutls30=3.7.9-2+deb12u3 \
2024-02-15 14:45:00 -08:00
libblkid1=2.38.1-5+deb12u1 \
libmount1=2.38.1-5+deb12u1 \
libsmartcols1=2.38.1-5+deb12u1 \
libuuid1=2.38.1-5+deb12u1 \
libxmlsec1-dev \
pkg-config \
gcc && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
2024-02-15 14:45:00 -08:00
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN pip install --no-cache-dir --upgrade \
--retries 5 \
--timeout 30 \
2024-02-15 14:45:00 -08:00
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
2024-02-15 14:45:00 -08:00
playwright install chromium && \
playwright install-deps chromium && \
2023-12-29 20:42:16 -08:00
ln -s /usr/local/bin/supervisord /usr/bin/supervisord
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
2023-12-29 20:42:16 -08:00
# xserver-common and xvfb included by playwright installation but not needed after
# perl-base is part of the base Python Debian image but not needed for Danswer functionality
# perl-base could only be removed with --allow-remove-essential
2024-02-15 14:45:00 -08:00
RUN apt-get update && \
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
xvfb \
cmake \
libldap-2.5-0 \
libxmlsec1-dev \
pkg-config \
gcc && \
apt-get install -y libxmlsec1-openssl && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
2024-02-15 14:45:00 -08:00
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
2023-12-30 13:34:47 -08:00
2024-04-19 21:52:53 -07:00
# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
2024-04-19 21:52:53 -07:00
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt', quiet=True);"
2024-09-08 12:34:09 -07:00
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
2024-04-19 21:52:53 -07:00
# Set up application files
2023-04-28 22:40:46 -07:00
WORKDIR /app
2023-10-02 19:39:50 -07:00
# Enterprise Version Files
COPY ./ee /app/ee
2024-02-15 14:45:00 -08:00
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
2023-10-02 19:39:50 -07:00
# Set up application files
2023-04-29 12:36:20 -07:00
COPY ./danswer /app/danswer
2024-04-07 21:25:06 -07:00
COPY ./shared_configs /app/shared_configs
COPY ./alembic /app/alembic
COPY ./alembic.ini /app/alembic.ini
2023-12-29 20:42:16 -08:00
COPY supervisord.conf /usr/etc/supervisord.conf
2023-04-28 22:40:46 -07:00
# Escape hatch
2024-06-23 08:29:37 -07:00
COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
# Put logo in assets
COPY ./assets /app/assets
ENV PYTHONPATH /app
2023-07-16 15:31:52 -07:00
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
2023-07-16 15:31:52 -07:00
CMD ["tail", "-f", "/dev/null"]