mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-02 17:38:04 +02:00
Dockerfile to build smaller Images (#567)
This commit is contained in:
@@ -1,53 +1,51 @@
|
|||||||
FROM python:3.11.4-slim-bookworm
|
FROM python:3.11.4-slim-bookworm
|
||||||
|
|
||||||
RUN apt-get update \
|
# Install system dependencies
|
||||||
&& apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \
|
||||||
libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
|
libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \
|
||||||
supervisor zip \
|
supervisor zip ca-certificates gnupg && \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
apt-get clean
|
||||||
COPY ./requirements/default.txt /tmp/requirements.txt
|
|
||||||
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
|
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
# Remove py which is pulled in by retry, py is not needed and is a CVE
|
# Remove py which is pulled in by retry, py is not needed and is a CVE
|
||||||
RUN pip uninstall -y py
|
COPY ./requirements/default.txt /tmp/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \
|
||||||
|
pip uninstall -y py && \
|
||||||
|
playwright install chromium && \
|
||||||
|
playwright install-deps chromium
|
||||||
|
|
||||||
RUN playwright install chromium
|
# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
|
||||||
RUN playwright install-deps chromium
|
|
||||||
|
|
||||||
# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below
|
|
||||||
# based on the instructions found here:
|
# based on the instructions found here:
|
||||||
# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
|
# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
|
||||||
# this is temporarily needed until playwright updates their packaged node version to
|
# this is temporarily needed until playwright updates their packaged node version to
|
||||||
# 20.5.1+
|
# 20.5.1+
|
||||||
RUN apt-get update
|
RUN mkdir -p /etc/apt/keyrings && \
|
||||||
RUN apt-get install -y ca-certificates curl gnupg
|
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
|
||||||
RUN mkdir -p /etc/apt/keyrings
|
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
|
||||||
RUN curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg
|
apt-get update && \
|
||||||
RUN echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list
|
apt-get install -y nodejs && \
|
||||||
RUN apt-get update
|
cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
|
||||||
RUN apt-get install nodejs -y
|
apt-get remove -y nodejs
|
||||||
# replace nodejs packaged with playwright (18.17.0) with the one installed above
|
|
||||||
RUN cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node
|
|
||||||
# remove nodejs (except for the binary we moved into playwright)
|
|
||||||
RUN apt-get remove -y nodejs
|
|
||||||
|
|
||||||
# Cleanup for CVEs and size reduction
|
# Cleanup for CVEs and size reduction
|
||||||
RUN apt-get remove -y linux-libc-dev \
|
|
||||||
&& apt-get autoremove -y \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Remove tornado test key to placate vulnerability scanners
|
# Remove tornado test key to placate vulnerability scanners
|
||||||
# More details can be found here:
|
# More details can be found here:
|
||||||
# https://github.com/tornadoweb/tornado/issues/3107
|
# https://github.com/tornadoweb/tornado/issues/3107
|
||||||
RUN rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
RUN apt-get remove -y linux-libc-dev && \
|
||||||
|
apt-get autoremove -y && \
|
||||||
|
rm -rf /var/lib/apt/lists/* && \
|
||||||
|
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
|
||||||
|
|
||||||
|
# Set up application files
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY ./danswer /app/danswer
|
COPY ./danswer /app/danswer
|
||||||
COPY ./alembic /app/alembic
|
COPY ./alembic /app/alembic
|
||||||
COPY ./alembic.ini /app/alembic.ini
|
COPY ./alembic.ini /app/alembic.ini
|
||||||
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
|
||||||
|
# Create Vespa app zip
|
||||||
WORKDIR /app/danswer/datastores/vespa/app_config
|
WORKDIR /app/danswer/datastores/vespa/app_config
|
||||||
RUN zip -r /app/danswer/vespa-app.zip .
|
RUN zip -r /app/danswer/vespa-app.zip .
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -57,5 +55,6 @@ COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py
|
|||||||
|
|
||||||
ENV PYTHONPATH /app
|
ENV PYTHONPATH /app
|
||||||
|
|
||||||
# By default this container does nothing, it is used by api server and background which specify their own CMD
|
# Default command which does nothing
|
||||||
|
# This container is used by api server and background which specify their own CMD
|
||||||
CMD ["tail", "-f", "/dev/null"]
|
CMD ["tail", "-f", "/dev/null"]
|
||||||
|
@@ -37,7 +37,6 @@ from danswer.db.models import Connector
|
|||||||
from danswer.db.models import IndexAttempt
|
from danswer.db.models import IndexAttempt
|
||||||
from danswer.db.models import IndexingStatus
|
from danswer.db.models import IndexingStatus
|
||||||
from danswer.search.search_utils import warm_up_models
|
from danswer.search.search_utils import warm_up_models
|
||||||
from danswer.utils.acl import set_acl_for_vespa_nonblocking
|
|
||||||
from danswer.utils.logger import IndexAttemptSingleton
|
from danswer.utils.logger import IndexAttemptSingleton
|
||||||
from danswer.utils.logger import setup_logger
|
from danswer.utils.logger import setup_logger
|
||||||
|
|
||||||
@@ -449,12 +448,6 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non
|
|||||||
# This ensures that bad states get cleaned up
|
# This ensures that bad states get cleaned up
|
||||||
mark_all_in_progress_cc_pairs_failed(db_session)
|
mark_all_in_progress_cc_pairs_failed(db_session)
|
||||||
|
|
||||||
# TODO: remove this once everyone is migrated to ACL
|
|
||||||
# does nothing if this has been successfully run before
|
|
||||||
# NOTE: is done in another thread, to not block indexing runs from
|
|
||||||
# getting kicked off
|
|
||||||
set_acl_for_vespa_nonblocking(should_check_if_already_done=True)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
Reference in New Issue
Block a user