Standardize connectors + permissioning + new frontend for admin pages + small fixes / improvements (#75)

Introducing permissioning, standardize onboarding for connectors, re-make the data model for connectors / credentials / index-attempts, making all environment variables optional, a bunch of small fixes + improvements. Co-authored-by: Weves <chrisweaver101@gmail.com>
2025-09-26 11:58:28 +02:00 · 2023-05-30 19:59:57 -07:00
parent b05bf963bf
commit 6891e4f198
76 changed files with 6760 additions and 1468 deletions
--- a/deployment/.env
+++ b/deployment/.env
@@ -0,0 +1,5 @@
+# For a local deployment, no additional setup is needed
+# Refer to env.dev.template and env.prod.template for additional options
+
+# Setting Auth to false for local setup convenience to avoid setting up Google OAuth app in GPC.
+DISABLE_AUTH=True
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -1,33 +1,32 @@
 This serves as an example for how to deploy everything on a single machine. This is
 not optimal, but can get you started easily and cheaply. To run:

-1. Set up a `.env` file in this directory with relevant environment variables.
-   - Use the `env.template` as a reference.

-
-2. SKIP this step if running locally. If you are running this for production and need https do the following:
-   - Set up a `.env.nginx` file in this directory based on `env.nginx.template`.
-   - `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to set up https certificate.
-
-
-3. Run one of the docker compose commands below depending on your environment:
+1. Run one of the docker compose commands below depending on your environment:
   - For Local:
     - `docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build`
     - This will start Web/API servers, Postgres (backend DB), Qdrant (vector DB), and the background indexing job.
-   - For Prod:
-     - `docker compose -f docker-compose.prod.yml -p danswer-stack up -d --build`
-     - This will additionally run certbot and start Nginx.
+     - Downloading packages/requirements may take 20+ minutes depending on your internet connection.


-4. To shut down the deployment run:
+2. To shut down the deployment run (use stop to stop containers, down to remove containers):
   - For Local:
     - `docker compose -f docker-compose.dev.yml -p danswer-stack stop`
-   - For Prod:
-     - `docker compose -f docker-compose.prod.yml -p danswer-stack stop`


-5. To completely remove Danswer (**WARNING, this will also erase your indexed data and all users**) run:
+3. To completely remove Danswer (**WARNING, this will also erase your indexed data and all users**) run:
   - For Local:
-     - `docker compose -f docker-compose.dev.yml -p danswer-stack down`
-   - For Prod:
-     - `docker compose -f docker-compose.prod.yml -p danswer-stack down`
+     - `docker compose -f docker-compose.dev.yml -p danswer-stack down -v`
+
+
+Additional steps for setting up for Prod:
+
+1. Set up a `.env` file in this directory with relevant environment variables.
+   - Refer to env.dev.template and env.prod.template
+
+
+2. Set up https:
+   - Set up a `.env.nginx` file in this directory based on `env.nginx.template`.
+   - `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to set up https certificate.
+
+3. Follow the above steps but replacing dev with prod.
--- a/deployment/data/nginx/app.conf.template.dev
+++ b/deployment/data/nginx/app.conf.template.dev
@@ -0,0 +1,56 @@
+upstream app_server {
+    # fail_timeout=0 means we always retry an upstream even if it failed
+    # to return a good HTTP response
+
+    # for UNIX domain socket setups
+    #server unix:/tmp/gunicorn.sock fail_timeout=0;
+
+    # for a TCP configuration
+    # TODO: use gunicorn to manage multiple processes
+    server api_server:8080 fail_timeout=0;
+}
+
+upstream web_server {
+    server web_server:3000 fail_timeout=0;
+}
+
+server {
+    listen 80;
+    server_name ${DOMAIN};
+
+    location ~ ^/api(.*)$ {
+        rewrite ^/api(/.*)$ $1 break;
+
+        # misc headers
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_set_header X-Forwarded-Host $host; 
+        proxy_set_header Host $host;
+
+        # need to use 1.1 to support chunked transfers
+        proxy_http_version 1.1;
+        proxy_buffering off;
+
+        # we don't want nginx trying to do something clever with
+        # redirects, we set the Host: header above already.
+        proxy_redirect off;
+        proxy_pass http://app_server;
+    }
+
+    location / {
+        # misc headers
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_set_header X-Forwarded-Host $host; 
+        proxy_set_header Host $host;
+
+        proxy_http_version 1.1;
+
+        # we don't want nginx trying to do something clever with
+        # redirects, we set the Host: header above already.
+        proxy_redirect off;
+        proxy_pass http://web_server;
+    }
+}
--- a/deployment/docker-compose.dev.yml
+++ b/deployment/docker-compose.dev.yml
@@ -35,7 +35,7 @@ services:
  web_server:
    build:
      context: ../web
-      dockerfile: Dockerfile.dev
+      dockerfile: Dockerfile
    depends_on:
      - api_server
    restart: always
@@ -43,12 +43,12 @@ services:
      - .env
    environment:
      - INTERNAL_URL=http://api_server:8080
-    ports:
-      - "3000:3000"
  relational_db:
    image: postgres:15.2-alpine
    restart: always
-    # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
+    environment:
+      POSTGRES_USER: ${POSTGRES_USER:-postgres}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-password}
    env_file:
      - .env
    ports:
@@ -62,6 +62,22 @@ services:
      - "6333:6333"
    volumes:
      - qdrant_volume:/qdrant/storage
+  nginx:
+    image: nginx:1.23.4-alpine
+    restart: always
+    depends_on:
+      - api_server
+      - web_server
+    environment:
+      - DOMAIN=localhost
+    ports:
+      - "80:80"
+      - "3000:80"  # allow for localhost:3000 usage, since that is the norm
+    volumes:
+      - ./data/nginx:/etc/nginx/conf.d
+    command: > 
+      /bin/sh -c "envsubst '$$\{DOMAIN\}' < /etc/nginx/conf.d/app.conf.template.dev > /etc/nginx/conf.d/app.conf 
+      && while :; do sleep 6h & wait $${!}; nginx -s reload; done & nginx -g \"daemon off;\""
 volumes:
  local_dynamic_storage:
  db_volume:
--- a/deployment/docker-compose.prod.yml
+++ b/deployment/docker-compose.prod.yml
@@ -33,7 +33,7 @@ services:
  web_server:
    build:
      context: ../web
-      dockerfile: Dockerfile.prod
+      dockerfile: Dockerfile
    depends_on:
      - api_server
    restart: always
--- a/deployment/env.dev.template
+++ b/deployment/env.dev.template
@@ -1,20 +1,12 @@
-# Fill in the values and copy the contents of this file to .env in the deployment directory
-# Some valid default values are provided where applicable, delete the variables which you don't set values for
+# Very basic .env file with options that are easy to change. Allows you to deploy everything on a single machine.
+# We don't suggest using these settings for production.


-# Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
-OPENAI_API_KEY=
-
 # Choose between "openai-chat-completion" and "openai-completion"
 INTERNAL_MODEL_VERSION=openai-chat-completion

 # Use a valid model for the choice above, consult https://platform.openai.com/docs/models/model-endpoint-compatibility
-OPENAI_MODEL_VERSION=gpt-3.5-turbo
-
-
-# Can leave these as defaults
-POSTGRES_USER=postgres
-POSTGRES_PASSWORD=password
+OPENAPI_MODEL_VERSION=gpt-3.5-turbo


 # Auth not necessary for local
--- a/deployment/env.prod.template
+++ b/deployment/env.prod.template
@@ -2,8 +2,8 @@
 # Some valid default values are provided where applicable, delete the variables which you don't set values for


-# THE SECTION BELOW INCLUDE MUST HAVE CONFIGS
 # Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
+# If not provided here, UI will prompt on setup
 OPENAI_API_KEY=
 # Choose between "openai-chat-completion" and "openai-completion"
 INTERNAL_MODEL_VERSION=openai-chat-completion
@@ -14,13 +14,6 @@ OPENAI_MODEL_VERSION=gpt-4
 WEB_DOMAIN=http://localhost:3000


-# CONNECTOR CONFIGS (set for the ones you are using, delete the others)
-GITHUB_ACCESS_TOKEN=
-
-GOOGLE_DRIVE_CREDENTIAL_JSON=
-GOOGLE_DRIVE_TOKENS_JSON=
-
-
 # BACKEND DB can leave these as defaults
 POSTGRES_USER=postgres
 POSTGRES_PASSWORD=password
@@ -28,7 +21,6 @@ POSTGRES_PASSWORD=password

 # AUTH CONFIGS
 DISABLE_AUTH=False
-# Feel free remove everything after if DISABLE_AUTH=True

 # Currently frontend page doesn't have basic auth, use OAuth if user auth is enabled.
 ENABLE_OAUTH=True
@@ -42,7 +34,7 @@ SECRET=
 # How long before user needs to reauthenticate, default to 1 day. (cookie expiration time)
 SESSION_EXPIRE_TIME_SECONDS=86400

-# Only relevant if using basic auth
+# Only relevant if using basic auth (not supported on frontend yet)
 REQUIRE_EMAIL_VERIFICATION=True
 # The five settings below are only required if REQUIRE_EMAIL_VERIFICATION is True
 VALID_EMAIL_DOMAIN=