DAN-80 Example Env files (#48)

Also added alembic migrations running automatically
This commit is contained in:
Yuhong Sun
2023-05-16 01:18:08 -07:00
committed by GitHub
parent d76dbce09b
commit 0d9595733b
13 changed files with 227 additions and 53 deletions

View File

@@ -9,6 +9,8 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
WORKDIR /app
COPY ./danswer /app/danswer
COPY ./alembic /app/alembic
COPY ./alembic.ini /app/alembic.ini
ENV PYTHONPATH .
CMD ["uvicorn", "danswer.main:app", "--host", "0.0.0.0", "--port", "8080"]
ENV PYTHONPATH /app
CMD ["/bin/sh", "-c", "alembic upgrade head && uvicorn danswer.main:app --host 0.0.0.0 --port 8080"]

View File

@@ -12,5 +12,5 @@ RUN playwright install-deps
WORKDIR /app
COPY ./danswer /app/danswer
ENV PYTHONPATH .
ENV PYTHONPATH /app
CMD ["python3", "danswer/background/update.py"]

View File

@@ -36,7 +36,9 @@ SMTP_USER = os.environ.get("SMTP_USER", "your-email@gmail.com")
SMTP_PASS = os.environ.get("SMTP_PASS", "your-gmail-password")
SECRET = os.environ.get("SECRET", "")
SESSION_EXPIRE_TIME_SECONDS = int(os.environ.get("SESSION_EXPIRE_TIME_SECONDS", 3600))
SESSION_EXPIRE_TIME_SECONDS = int(
os.environ.get("SESSION_EXPIRE_TIME_SECONDS", 86400)
) # 1 day
VALID_EMAIL_DOMAIN = os.environ.get("VALID_EMAIL_DOMAIN", "")
# OAuth Login Flow
ENABLE_OAUTH = os.environ.get("ENABLE_OAUTH", "").lower() != "false"
@@ -45,7 +47,7 @@ GOOGLE_OAUTH_CLIENT_SECRET = os.environ.get("GOOGLE_OAUTH_CLIENT_SECRET", "")
#####
# Vector DB Configs
# DB Configs
#####
DEFAULT_VECTOR_STORE = os.environ.get("VECTOR_DB", "qdrant")
# Url / Key are used to connect to a remote Qdrant instance
@@ -58,12 +60,24 @@ QDRANT_DEFAULT_COLLECTION = os.environ.get("QDRANT_COLLECTION", "semantic_search
DB_CONN_TIMEOUT = 2 # Timeout seconds connecting to DBs
INDEX_BATCH_SIZE = 16 # File batches (not accounting file chunking)
# below are intended to match the env variables names used by the official postgres docker image
# https://hub.docker.com/_/postgres
POSTGRES_USER = os.environ.get("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD", "password")
POSTGRES_HOST = os.environ.get("POSTGRES_HOST", "localhost")
POSTGRES_PORT = os.environ.get("POSTGRES_PORT", "5432")
POSTGRES_DB = os.environ.get("POSTGRES_DB", "postgres")
#####
# Connector Configs
#####
GOOGLE_DRIVE_CREDENTIAL_JSON = os.environ.get("GOOGLE_DRIVE_CREDENTIAL_JSON", "")
GOOGLE_DRIVE_TOKENS_JSON = os.environ.get("GOOGLE_DRIVE_TOKENS_JSON", "")
GOOGLE_DRIVE_CREDENTIAL_JSON = os.environ.get(
"GOOGLE_DRIVE_CREDENTIAL_JSON", "/home/storage/google_drive_creds.json"
)
GOOGLE_DRIVE_TOKENS_JSON = os.environ.get(
"GOOGLE_DRIVE_TOKENS_JSON", "/home/storage/google_drive_tokens.json"
)
GOOGLE_DRIVE_INCLUDE_SHARED = False
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN", "")
@@ -76,7 +90,9 @@ DEFAULT_PROMPT = "generic-qa"
NUM_RETURNED_HITS = 15
NUM_RERANKED_RESULTS = 4
KEYWORD_MAX_HITS = 5
QUOTE_ALLOWED_ERROR_PERCENT = 0.05 # 1 edit per 2 characters
QUOTE_ALLOWED_ERROR_PERCENT = (
0.05 # 1 edit per 2 characters, currently unused due to fuzzy match being too slow
)
#####
@@ -111,3 +127,8 @@ CROSS_ENCODER_PORT = 9000
TYPESENSE_API_KEY = os.environ.get("TYPESENSE_API_KEY", "")
TYPESENSE_HOST = "localhost"
TYPESENSE_PORT = 8108
DYNAMIC_CONFIG_STORE = os.environ.get(
"DYNAMIC_CONFIG_STORE", "FileSystemBackedDynamicConfigStore"
)
DYNAMIC_CONFIG_DIR_PATH = os.environ.get("DYNAMIC_CONFIG_DIR_PATH", "/home/storage")

View File

@@ -1,6 +1,10 @@
import os
from collections.abc import AsyncGenerator
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD
from danswer.configs.app_configs import POSTGRES_PORT
from danswer.configs.app_configs import POSTGRES_USER
from sqlalchemy.engine import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.ext.asyncio import AsyncEngine
@@ -10,23 +14,16 @@ from sqlalchemy.ext.asyncio import create_async_engine
SYNC_DB_API = "psycopg2"
ASYNC_DB_API = "asyncpg"
# below are intended to match the env variables names used by the official
# postgres docker image https://hub.docker.com/_/postgres
DEFAULT_USER = os.environ.get("POSTGRES_USER", "postgres")
DEFAULT_PASSWORD = os.environ.get("POSTGRES_PASSWORD", "password")
DEFAULT_HOST = os.environ.get("POSTGRES_HOST", "localhost")
DEFULT_PORT = os.environ.get("POSTGRES_PORT", "5432")
DEFAULT_DB = os.environ.get("POSTGRES_DB", "postgres")
def build_connection_string(
*,
db_api: str = ASYNC_DB_API,
user: str = DEFAULT_USER,
password: str = DEFAULT_PASSWORD,
host: str = DEFAULT_HOST,
port: str = DEFULT_PORT,
db: str = DEFAULT_DB,
user: str = POSTGRES_USER,
password: str = POSTGRES_PASSWORD,
host: str = POSTGRES_HOST,
port: str = POSTGRES_PORT,
db: str = POSTGRES_DB,
) -> str:
return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}"

View File

@@ -1,15 +1,13 @@
import os
from danswer.dynamic_configs.file_system.store import (
FileSystemBackedDynamicConfigStore,
)
from danswer.configs.app_configs import DYNAMIC_CONFIG_DIR_PATH
from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE
from danswer.dynamic_configs.file_system.store import FileSystemBackedDynamicConfigStore
from danswer.dynamic_configs.interface import DynamicConfigStore
def get_dynamic_config_store() -> DynamicConfigStore:
dynamic_config_store_type = os.environ.get("DYNAMIC_CONFIG_STORE")
dynamic_config_store_type = DYNAMIC_CONFIG_STORE
if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__:
return FileSystemBackedDynamicConfigStore(os.environ["DYNAMIC_CONFIG_DIR_PATH"])
return FileSystemBackedDynamicConfigStore(DYNAMIC_CONFIG_DIR_PATH)
# TODO: change exception type
raise Exception("Unknown dynamic config store type")

View File

@@ -1,7 +1,20 @@
This serves as an example for how to deploy everything on a single machine. This is
not optimal, but can get you started easily and cheaply. To run:
1. Set up a `.env` + `.env.nginx` file in this directory with relevant environment variables
a. TODO: add description of required variables
2. `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to setup https certificate
2. `docker compose up -d --build` to start nginx, postgres, web/api servers, and the background indexing job
1. Set up a `.env` file in this directory with relevant environment variables.
- Use the `env.template` as a reference.
2. SKIP this step if running locally. If you are running this for production and need https do the following:
- Set up a `.env.nginx` file in this directory based on `env.nginx.template`.
- `chmod +x init-letsencrypt.sh` + `./init-letsencrypt.sh` to set up https certificate.
3. Run one of the docker compose commands below depending on your environment:
- For Local:
- `docker compose -f docker-compose.dev.yml -p danswer-stack up -d --build`
- This will start Web/API servers, Postgres (backend DB), Qdrant (vector DB), and the background indexing job.
- For Prod:
- `docker compose -f docker-compose.prod.yml -p danswer-stack up -d --build`
- This will additionally run certbot and start Nginx.
4. To shut down the deployment run:
- For Local:
- `docker compose -f docker-compose.dev.yml -p danswer-stack down`
- For Prod:
- `docker compose -f docker-compose.prod.yml -p danswer-stack down`

View File

@@ -7,11 +7,11 @@ upstream app_server {
# for a TCP configuration
# TODO: use gunicorn to manage multiple processes
server api:8080 fail_timeout=0;
server api_server:8080 fail_timeout=0;
}
upstream web_server {
server web:3000 fail_timeout=0;
server web_server:3000 fail_timeout=0;
}
server {

View File

@@ -0,0 +1,64 @@
version: '3'
services:
api_server:
build:
context: ../backend
dockerfile: Dockerfile
depends_on:
- relational_db
- vector_db
restart: always
ports:
- "8080:8080"
env_file:
- .env
environment:
- POSTGRES_HOST=relational_db
- QDRANT_HOST=vector_db
volumes:
- local_dynamic_storage:/home/storage
background:
build:
context: ../backend
dockerfile: Dockerfile.background
depends_on:
- relational_db
- vector_db
restart: always
env_file:
- .env
environment:
- POSTGRES_HOST=relational_db
- QDRANT_HOST=vector_db
volumes:
- local_dynamic_storage:/home/storage
web_server:
build:
context: ../web
dockerfile: Dockerfile
depends_on:
- api_server
restart: always
environment:
- INTERNAL_AUTH_URL=http://api_server:8080
ports:
- "3000:3000"
relational_db:
image: postgres:15.2-alpine
restart: always
# POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
env_file:
- .env
ports:
- "5432:5432"
volumes:
- db_volume:/var/lib/postgresql/data
vector_db:
image: qdrant/qdrant:v1.1.3
restart: always
volumes:
- qdrant_volume:/qdrant/storage
volumes:
local_dynamic_storage:
db_volume:
qdrant_volume:

View File

@@ -1,20 +1,18 @@
version: '3'
services:
api:
api_server:
build:
context: ../backend
dockerfile: Dockerfile
depends_on:
- db
- qdrant
# uncomment for local testing
# ports:
# - "8080:8080"
- relational_db
- vector_db
restart: always
env_file:
- .env
environment:
- POSTGRES_HOST=db
- QDRANT_HOST=qdrant
- POSTGRES_HOST=relational_db
- QDRANT_HOST=vector_db
volumes:
- local_dynamic_storage:/home/storage
background:
@@ -22,24 +20,26 @@ services:
context: ../backend
dockerfile: Dockerfile.background
depends_on:
- db
- qdrant
- relational_db
- vector_db
restart: always
env_file:
- .env
environment:
- POSTGRES_HOST=db
- QDRANT_HOST=qdrant
- POSTGRES_HOST=relational_db
- QDRANT_HOST=vector_db
volumes:
- local_dynamic_storage:/home/storage
web:
web_server:
build:
context: ../web
dockerfile: Dockerfile
depends_on:
- api
- api_server
restart: always
environment:
- INTERNAL_AUTH_URL=http://api:8080
db:
- INTERNAL_AUTH_URL=http://api_server:8080
relational_db:
image: postgres:15.2-alpine
restart: always
# POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file
@@ -49,12 +49,14 @@ services:
- "5432:5432"
volumes:
- db_volume:/var/lib/postgresql/data
qdrant:
vector_db:
image: qdrant/qdrant:v1.1.3
restart: always
volumes:
- qdrant_volume:/qdrant/storage
nginx:
image: nginx:1.23.4-alpine
restart: always
ports:
- "80:80"
- "443:443"
@@ -70,6 +72,7 @@ services:
# follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71
certbot:
image: certbot/certbot
restart: always
volumes:
- ./data/certbot/conf:/etc/letsencrypt
- ./data/certbot/www:/var/www/certbot

View File

@@ -0,0 +1,21 @@
# Fill in the values and copy the contents of this file to .env in the deployment directory
# Some valid default values are provided where applicable, delete the variables which you don't set values for
# Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
OPENAI_API_KEY=
# Choose between "openai-chat-completion" and "openai-completion"
INTERNAL_MODEL_VERSION=openai-chat-completion
# Use a valid model for the choice above, consult https://platform.openai.com/docs/models/model-endpoint-compatibility
OPENAPI_MODEL_VERSION=gpt-3.5-turbo
# Can leave these as defaults
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
# Auth not necessary for local
DISABLE_AUTH=True

View File

@@ -0,0 +1,3 @@
# DOMAIN is necessary for https setup, EMAIL is optional
DOMAIN=
EMAIL=

View File

@@ -0,0 +1,52 @@
# Fill in the values and copy the contents of this file to .env in the deployment directory
# Some valid default values are provided where applicable, delete the variables which you don't set values for
# THE SECTION BELOW INCLUDE MUST HAVE CONFIGS
# Insert your OpenAI API key here, currently the only Generative AI endpoint for QA that we support is OpenAI
OPENAI_API_KEY=
# Choose between "openai-chat-completion" and "openai-completion"
INTERNAL_MODEL_VERSION=openai-chat-completion
# Use a valid model for the choice above, consult https://platform.openai.com/docs/models/model-endpoint-compatibility
OPENAPI_MODEL_VERSION=gpt-4
# Could be something like danswer.companyname.com. Requires additional setup if not localhost
WEB_DOMAIN=http://localhost:3000
# CONNECTOR CONFIGS (set for the ones you are using, delete the others)
GITHUB_ACCESS_TOKEN=
GOOGLE_DRIVE_CREDENTIAL_JSON=
GOOGLE_DRIVE_TOKENS_JSON=
# BACKEND DB can leave these as defaults
POSTGRES_USER=postgres
POSTGRES_PASSWORD=password
# AUTH CONFIGS
DISABLE_AUTH=False
# Feel free remove everything after if DISABLE_AUTH=True
# Currently frontend page doesn't have basic auth, use OAuth if user auth is enabled.
ENABLE_OAUTH=True
# The two settings below are only required if ENABLE_OAUTH is true
GOOGLE_OAUTH_CLIENT_ID=
GOOGLE_OAUTH_CLIENT_SECRET=
# Used to generate values for security verification, use a random string
SECRET=
# How long before user needs to reauthenticate, default to 1 day. (cookie expiration time)
SESSION_EXPIRE_TIME_SECONDS=86400
# Only relevant if using basic auth
REQUIRE_EMAIL_VERIFICATION=True
# The five settings below are only required if REQUIRE_EMAIL_VERIFICATION is True
VALID_EMAIL_DOMAIN=
SMTP_SERVER=
SMTP_PORT=
SMTP_USER=
SMTP_PASS=

2
deployment/init-letsencrypt.sh Normal file → Executable file
View File

@@ -6,7 +6,7 @@ set -o allexport
source .env.nginx
set +o allexport
if ! [ -x "$(command -v docker compose)" ]; then
if ! docker compose --version >/dev/null 2>&1; then
echo 'Error: docker compose is not installed.' >&2
exit 1
fi