This commit is contained in:
Weves 2023-10-22 00:05:41 -07:00
parent 80eedebe86
commit cffcceac72
4 changed files with 38 additions and 14 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.11.4-slim-bookworm
FROM nvidia/cuda:12.2.2-devel-ubuntu22.04
# Install system dependencies
RUN apt-get update && \
@ -8,6 +8,18 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/* && \
apt-get clean
# RUN apt-get update && \
# apt-get install -y build-essential checkinstall \
# libreadline-gplv2-dev libncursesw5-dev libssl-dev \
# libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
RUN apt-get update && \
apt-get install software-properties-common -y && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install python3.11-dev python3.11-distutils -y && \
curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
# Install Python dependencies
# Remove py which is pulled in by retry, py is not needed and is a CVE
COPY ./requirements/default.txt /tmp/requirements.txt
@ -21,22 +33,22 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \
# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions
# this is temporarily needed until playwright updates their packaged node version to
# 20.5.1+
RUN mkdir -p /etc/apt/keyrings && \
curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
apt-get update && \
apt-get install -y nodejs && \
cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
apt-get remove -y nodejs
# RUN mkdir -p /etc/apt/keyrings && \
# curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \
# echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \
# apt-get update && \
# apt-get install -y nodejs && \
# cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \
# apt-get remove -y nodejs
# Cleanup for CVEs and size reduction
# Remove tornado test key to placate vulnerability scanners
# More details can be found here:
# https://github.com/tornadoweb/tornado/issues/3107
RUN apt-get remove -y linux-libc-dev && \
apt-get autoremove -y && \
rm -rf /var/lib/apt/lists/* && \
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# RUN apt-get remove -y linux-libc-dev && \
# apt-get autoremove -y && \
# rm -rf /var/lib/apt/lists/* && \
# rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key
# Set up application files
WORKDIR /app
@ -55,6 +67,9 @@ COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py
ENV PYTHONPATH /app
RUN ln -s /usr/bin/python3.11 /usr/bin/python & \
ln -s /usr/bin/pip3.11 /usr/bin/pip
# Default command which does nothing
# This container is used by api server and background which specify their own CMD
CMD ["tail", "-f", "/dev/null"]

View File

@ -93,7 +93,7 @@ VESPA_DEPLOYMENT_ZIP = (
os.environ.get("VESPA_DEPLOYMENT_ZIP") or "/app/danswer/vespa-app.zip"
)
# Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder)
INDEX_BATCH_SIZE = 16
INDEX_BATCH_SIZE = 128
# Below are intended to match the env variables names used by the official postgres docker image
# https://hub.docker.com/_/postgres

View File

@ -18,7 +18,7 @@ server {
listen 80;
server_name ${DOMAIN};
client_max_body_size 500M; # Maximum upload size
client_max_body_size 0; # Maximum upload size
location ~ ^/api(.*)$ {
rewrite ^/api(/.*)$ $1 break;

View File

@ -99,6 +99,15 @@ services:
- model_cache_torch:/root/.cache/torch/
- model_cache_nltk:/root/nltk_data/
- model_cache_huggingface:/root/.cache/huggingface/
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities:
- gpu
- utility # nvidia-smi
- compute # CUDA
web_server:
image: danswer/danswer-web-server:latest
build: