From aa67768c79d22a8260f89faa8238061d93ee77b1 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Fri, 29 Dec 2023 20:42:16 -0800 Subject: [PATCH] CVEs continued (#889) --- backend/.dockerignore | 17 ++++++++++++++ backend/Dockerfile | 38 +++++++++++--------------------- backend/requirements/default.txt | 7 +++--- 3 files changed, 34 insertions(+), 28 deletions(-) create mode 100644 backend/.dockerignore diff --git a/backend/.dockerignore b/backend/.dockerignore new file mode 100644 index 000000000..248a36792 --- /dev/null +++ b/backend/.dockerignore @@ -0,0 +1,17 @@ +**/__pycache__ +venv/ +env/ +*.egg-info +.cache +.git/ +.svn/ +.vscode/ +.idea/ +*.log +log/ +.env +secrets.yaml +build/ +dist/ +.coverage +htmlcov/ diff --git a/backend/Dockerfile b/backend/Dockerfile index f4e12f23a..045c8566b 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -6,10 +6,13 @@ ENV DANSWER_VERSION=${DANSWER_VERSION} RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" # Install system dependencies +# cmake needed for psycopg (postgres) +# libpq-dev needed for psycopg (postgres) +# curl included just for users' convenience +# zip for Vespa step futher down +# ca-certificates for HTTPS RUN apt-get update && \ - apt-get install -y git cmake pkg-config libprotobuf-c-dev protobuf-compiler \ - libprotobuf-dev libgoogle-perftools-dev libpq-dev build-essential cron curl \ - supervisor zip ca-certificates gnupg && \ + apt-get install -y cmake libpq-dev curl zip ca-certificates && \ rm -rf /var/lib/apt/lists/* && \ apt-get clean @@ -18,27 +21,15 @@ RUN apt-get update && \ COPY ./requirements/default.txt /tmp/requirements.txt RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt && \ pip uninstall -y py && \ - playwright install chromium && \ - playwright install-deps chromium - -# install nodejs and replace nodejs packaged with playwright (18.17.0) with the one installed below -# based on the instructions found here: -# https://nodejs.org/en/download/package-manager#debian-and-ubuntu-based-linux-distributions -# this is temporarily needed until playwright updates their packaged node version to -# 20.5.1+ -RUN mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ - echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ - apt-get update && \ - apt-get install -y nodejs && \ - cp /usr/bin/node /usr/local/lib/python3.11/site-packages/playwright/driver/node && \ - apt-get remove -y nodejs + playwright install chromium && playwright install-deps chromium && \ + ln -s /usr/local/bin/supervisord /usr/bin/supervisord # Cleanup for CVEs and size reduction -# Remove tornado test key to placate vulnerability scanners -# More details can be found here: # https://github.com/tornadoweb/tornado/issues/3107 -RUN apt-get remove -y linux-libc-dev git && \ +# xserver-common and xvfb included by playwright installation but not needed after +# perl-base is part of the base Python Debian image but not needed for Danswer functionality +# perl-base could only be removed with --allow-remove-essential +RUN apt-get remove -y --allow-remove-essential cmake perl-base xserver-common xvfb && \ apt-get autoremove -y && \ rm -rf /var/lib/apt/lists/* && \ rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key @@ -49,16 +40,13 @@ COPY ./danswer /app/danswer COPY ./shared_models /app/shared_models COPY ./alembic /app/alembic COPY ./alembic.ini /app/alembic.ini -COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf +COPY supervisord.conf /usr/etc/supervisord.conf # Create Vespa app zip WORKDIR /app/danswer/document_index/vespa/app_config RUN zip -r /app/danswer/vespa-app.zip . WORKDIR /app -# TODO: remove this once all users have migrated -COPY ./scripts/migrate_vespa_to_acl.py /app/migrate_vespa_to_acl.py - ENV PYTHONPATH /app # Default command which does nothing diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 8ff20f031..06ace7ace 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -30,10 +30,10 @@ nltk==3.8.1 docx2txt==0.8 openai==1.3.5 oauthlib==3.2.2 -playwright==1.37.0 +playwright==1.40.0 psutil==5.9.5 -psycopg2==2.9.6 -psycopg2-binary==2.9.6 +psycopg2==2.9.9 +psycopg2-binary==2.9.9 pycryptodome==3.19.0 pydantic==1.10.7 PyGithub==1.58.2 @@ -52,6 +52,7 @@ safetensors==0.3.1 sentence-transformers==2.2.2 slack-sdk==3.20.2 SQLAlchemy[mypy]==2.0.15 +supervisor==4.2.5 tensorflow==2.14.0 tiktoken==0.4.0 timeago==1.0.16