danswer/backend/Dockerfile

118 lines
3.8 KiB
Docker
Raw Permalink Normal View History

FROM python:3.11.7-slim-bookworm
2023-04-28 22:40:46 -07:00
2024-12-13 09:48:43 -08:00
LABEL com.danswer.maintainer="founders@onyx.app"
LABEL com.danswer.description="This image is the web/frontend container of Onyx which \
contains code for both the Community and Enterprise editions of Onyx. If you do not \
2024-12-13 10:39:01 -08:00
have a contract or agreement with DanswerAI, you are not permitted to use the Enterprise \
2024-02-15 14:45:00 -08:00
Edition features outside of personal development or testing purposes. Please reach out to \
2024-12-13 09:48:43 -08:00
founders@onyx.app for more information. Please visit https://github.com/onyx-dot-app/onyx"
2024-04-13 12:10:46 -07:00
2024-12-13 11:49:27 -08:00
# Default ONYX_VERSION, typically overriden during builds by GitHub Actions.
ARG ONYX_VERSION=0.8-dev
# DO_NOT_TRACK is used to disable telemetry for Unstructured
2024-12-13 11:49:27 -08:00
ENV ONYX_VERSION=${ONYX_VERSION} \
DANSWER_RUNNING_IN_DOCKER="true" \
DO_NOT_TRACK="true"
2024-10-15 12:23:04 -07:00
2024-12-13 11:49:27 -08:00
RUN echo "ONYX_VERSION: ${ONYX_VERSION}"
# Install system dependencies
2023-12-29 20:42:16 -08:00
# cmake needed for psycopg (postgres)
# libpq-dev needed for psycopg (postgres)
# curl included just for users' convenience
# zip for Vespa step futher down
# ca-certificates for HTTPS
RUN apt-get update && \
2024-02-15 14:45:00 -08:00
apt-get install -y \
cmake \
curl \
zip \
ca-certificates \
libgnutls30 \
libblkid1 \
libmount1 \
libsmartcols1 \
libuuid1 \
2024-02-15 14:45:00 -08:00
libxmlsec1-dev \
pkg-config \
gcc \
nano \
vim && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
2024-10-15 12:23:04 -07:00
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
2024-02-15 14:45:00 -08:00
COPY ./requirements/ee.txt /tmp/ee-requirements.txt
RUN pip install --no-cache-dir --upgrade \
--retries 5 \
--timeout 30 \
2024-02-15 14:45:00 -08:00
-r /tmp/requirements.txt \
-r /tmp/ee-requirements.txt && \
pip uninstall -y py && \
2024-02-15 14:45:00 -08:00
playwright install chromium && \
playwright install-deps chromium && \
2023-12-29 20:42:16 -08:00
ln -s /usr/local/bin/supervisord /usr/bin/supervisord
# Cleanup for CVEs and size reduction
# https://github.com/tornadoweb/tornado/issues/3107
2023-12-29 20:42:16 -08:00
# xserver-common and xvfb included by playwright installation but not needed after
2024-12-13 09:48:43 -08:00
# perl-base is part of the base Python Debian image but not needed for Onyx functionality
2023-12-29 20:42:16 -08:00
# perl-base could only be removed with --allow-remove-essential
2024-02-15 14:45:00 -08:00
RUN apt-get update && \
apt-get remove -y --allow-remove-essential \
perl-base \
xserver-common \
xvfb \
cmake \
libldap-2.5-0 \
libxmlsec1-dev \
pkg-config \
gcc && \
apt-get install -y libxmlsec1-openssl && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
2024-02-15 14:45:00 -08:00
rm -f /usr/local/lib/python3.11/site-packages/tornado/test/test.key
2023-12-30 13:34:47 -08:00
2024-04-19 21:52:53 -07:00
# Pre-downloading models for setups with limited egress
RUN python -c "from tokenizers import Tokenizer; \
Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')"
2024-04-19 21:52:53 -07:00
# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('punkt', quiet=True);"
2024-09-08 12:34:09 -07:00
# nltk.download('wordnet', quiet=True); introduce this back if lemmatization is needed
2024-04-19 21:52:53 -07:00
# Set up application files
2023-04-28 22:40:46 -07:00
WORKDIR /app
2023-10-02 19:39:50 -07:00
# Enterprise Version Files
COPY ./ee /app/ee
2024-02-15 14:45:00 -08:00
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
2023-10-02 19:39:50 -07:00
# Set up application files
2024-12-13 09:48:43 -08:00
COPY ./onyx /app/onyx
2024-04-07 21:25:06 -07:00
COPY ./shared_configs /app/shared_configs
COPY ./alembic /app/alembic
COPY ./alembic_tenants /app/alembic_tenants
COPY ./alembic.ini /app/alembic.ini
2023-12-29 20:42:16 -08:00
COPY supervisord.conf /usr/etc/supervisord.conf
2023-04-28 22:40:46 -07:00
# Escape hatch scripts
COPY ./scripts/debugging /app/scripts/debugging
2024-06-23 08:29:37 -07:00
COPY ./scripts/force_delete_connector_by_id.py /app/scripts/force_delete_connector_by_id.py
# Put logo in assets
COPY ./assets /app/assets
ENV PYTHONPATH=/app
2023-07-16 15:31:52 -07:00
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
2023-07-16 15:31:52 -07:00
CMD ["tail", "-f", "/dev/null"]