mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-03 09:28:25 +02:00
Dockerfile to build smaller Images (#567)
This commit is contained in:
parent
41964031bf
commit
dbf59d2acc
@ -1,53 +1,51 @@
|
||||
FROM python:3.11.4-slim-bookworm
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
|
||||
# Install system dependencies
|
||||
RUN apt-get update && \
|
||||
apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
|
||||
libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
|
||||
supervisor zip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY ./requirements/default.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
|
||||
supervisor zip ca-certificates gnupg && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
apt-get clean
|
||||
|
||||
# Install Python dependencies
|
||||
# Remove py which is pulled in by retry, py is not needed and is a CVE
|
||||
RUN pip uninstall -y py
|
||||
COPY ./requirements/default.txt /tmp/requirements.txt
|
||||
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \
|
||||
pip uninstall -y py && \
|
||||
playwright install chromium && \
|
||||
playwright install-deps chromium
|
||||
|
||||
RUN playwright install chromium
|
||||
RUN playwright install-deps chromium
|
||||
|
||||
# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
|
||||
# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
|
||||
# based on the instructions found here:
|
||||
# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
|
||||
# this is temporarily needed until playwright updates their packaged node version to
|
||||
# 20.5.1+
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y ca-certificates curl gnupg
|
||||
RUN mkdir -p /etc/apt/keyrings
|
||||
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
||||
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
|
||||
RUN apt-get update
|
||||
RUN apt-get install nodejs -y
|
||||
# replace nodejs packaged with playwright (18.17.0) with the one installed above
|
||||
RUN cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node
|
||||
# remove nodejs (except for the binary we moved into playwright)
|
||||
RUN apt-get remove -y nodejs
|
||||
RUN mkdir -p /etc/apt/keyrings && \
|
||||
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
|
||||
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y nodejs && \
|
||||
cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
|
||||
apt-get remove -y nodejs
|
||||
|
||||
# Cleanup for CVEs and size reduction
|
||||
RUN apt-get remove -y linux-libc-dev \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Remove tornado test key to placate vulnerability scanners
|
||||
# More details can be found here:
|
||||
# https://github.com/tornadoweb/tornado/issues/3107
|
||||
RUN rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
||||
RUN apt-get remove -y linux-libc-dev && \
|
||||
apt-get autoremove -y && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
||||
|
||||
# Set up application files
|
||||
WORKDIR /app
|
||||
COPY ./danswer /app/danswer
|
||||
COPY ./alembic /app/alembic
|
||||
COPY ./alembic.ini /app/alembic.ini
|
||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||
|
||||
# Create Vespa app zip
|
||||
WORKDIR /app/danswer/datastores/vespa/app_config
|
||||
RUN zip -r /app/danswer/vespa-app.zip .
|
||||
WORKDIR /app
|
||||
@ -57,5 +55,6 @@ COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py
|
||||
|
||||
ENV PYTHONPATH /app
|
||||
|
||||
# By default this container does nothing, it is used by api server and background which specify their own CMD
|
||||
# Default command which does nothing
|
||||
# This container is used by api server and background which specify their own CMD
|
||||
CMD ["tail", "-f", "/dev/null"]
|
||||
|
@ -37,7 +37,6 @@ from danswer.db.models import Connector
|
||||
from danswer.db.models import IndexAttempt
|
||||
from danswer.db.models import IndexingStatus
|
||||
from danswer.search.search_utils import warm_up_models
|
||||
from danswer.utils.acl import set_acl_for_vespa_nonblocking
|
||||
from danswer.utils.logger import IndexAttemptSingleton
|
||||
from danswer.utils.logger import setup_logger
|
||||
|
||||
@ -449,12 +448,6 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
|
||||
# This ensures that bad states get cleaned up
|
||||
mark_all_in_progress_cc_pairs_failed(db_session)
|
||||
|
||||
# TODO: remove this once everyone is migrated to ACL
|
||||
# does nothing if this has been successfully run before
|
||||
# NOTE: is done in another thread, to not block indexing runs from
|
||||
# getting kicked off
|
||||
set_acl_for_vespa_nonblocking(should_check_if_already_done=True)
|
||||
|
||||
while True:
|
||||
start = time.time()
|
||||
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
Loading…
x
Reference in New Issue
Block a user