From cffcceac720ca5511c66ac2801b79ca75194fbe9 Mon Sep 17 00:00:00 2001 From: Weves Date: Sun, 22 Oct 2023 00:05:41 -0700 Subject: [PATCH] Testing --- backend/Dockerfile | 39 +++++++++++++------ backend/danswer/configs/app_configs.py | 2 +- deployment/data/nginx/app.conf.template.dev | 2 +- .../docker_compose/docker-compose.dev.yml | 9 +++++ 4 files changed, 38 insertions(+), 14 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 397bd7213..2cb854cde 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11.4-slim-bookworm +FROM nvidia/cuda:12.2.2-devel-ubuntu22.04 # Install system dependencies RUN apt-get update && \ @@ -8,6 +8,18 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* && \ apt-get clean +# RUN apt-get update && \ +# apt-get install -y build-essential checkinstall \ +# libreadline-gplv2-dev libncursesw5-dev libssl-dev \ +# libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata +RUN apt-get update && \ + apt-get install software-properties-common -y && \ + add-apt-repository ppa:deadsnakes/ppa && \ + apt-get install python3.11-dev python3.11-distutils -y && \ + curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 + + # Install Python dependencies # Remove py which is pulled in by retry, py is not needed and is a CVE COPY ./requirements/default.txt /tmp/requirements.txt @@ -21,22 +33,22 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \ # https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions # this is temporarily needed until playwright updates their packaged node version to # 20.5.1+ -RUN mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ - echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ - apt-get update && \ - apt-get install -y nodejs && \ - cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \ - apt-get remove -y nodejs +# RUN mkdir -p /etc/apt/keyrings && \ +# curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ +# echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ +# apt-get update && \ +# apt-get install -y nodejs && \ +# cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \ +# apt-get remove -y nodejs # Cleanup for CVEs and size reduction # Remove tornado test key to placate vulnerability scanners # More details can be found here: # https://github.com/tornadoweb/tornado/issues/3107 -RUN apt-get remove -y linux-libc-dev && \ - apt-get autoremove -y && \ - rm -rf /var/lib/apt/lists/* && \ - rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key +# RUN apt-get remove -y linux-libc-dev && \ +# apt-get autoremove -y && \ +# rm -rf /var/lib/apt/lists/* && \ +# rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key # Set up application files WORKDIR /app @@ -55,6 +67,9 @@ COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py ENV PYTHONPATH /app +RUN ln -s /usr/bin/python3.11 /usr/bin/python & \ + ln -s /usr/bin/pip3.11 /usr/bin/pip + # Default command which does nothing # This container is used by api server and background which specify their own CMD CMD ["tail", "-f", "/dev/null"] diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index 538a4766b..bdc8e1e64 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -93,7 +93,7 @@ VESPA_DEPLOYMENT_ZIP = ( os.environ.get("VESPA_DEPLOYMENT_ZIP") or "/app/danswer/vespa-app.zip" ) # Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder) -INDEX_BATCH_SIZE = 16 +INDEX_BATCH_SIZE = 128 # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres diff --git a/deployment/data/nginx/app.conf.template.dev b/deployment/data/nginx/app.conf.template.dev index 72787434a..26d6983cd 100644 --- a/deployment/data/nginx/app.conf.template.dev +++ b/deployment/data/nginx/app.conf.template.dev @@ -18,7 +18,7 @@ server { listen 80; server_name ${DOMAIN}; - client_max_body_size 500M; # Maximum upload size + client_max_body_size 0; # Maximum upload size location ~ ^/api(.*)$ { rewrite ^/api(/.*)$ $1 break; diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 9c4aa3d84..a784f5fe6 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -99,6 +99,15 @@ services: - model_cache_torch:/root/.cache/torch/ - model_cache_nltk:/root/nltk_data/ - model_cache_huggingface:/root/.cache/huggingface/ + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: + - gpu + - utility # nvidia-smi + - compute # CUDA web_server: image: danswer/danswer-web-server:latest build: