first cut at redis (#2226)

* first cut at redis

* fix startup dependencies on redis

* kombu cleanup - fail silently

* mypy

* add redis_host environment override

* update REDIS_HOST env var in docker-compose.dev.yml

* update the rest of the docker files

* update contributing guide

* renaming cache to cache_volume

* add redis password to various deployments

* try setting up pr testing for helm

* fix indent

* hopefully this release version actually exists

* fix command line option to --chart-dirs

* fetch-depth 0

* edit values.yaml

* try setting ct working directory

* bypass testing only on change for now

* move files and lint them

* update helm testing

* some issues suggest using --config works

* add vespa repo

* add postgresql repo

* increase timeout

* try amd64 runner

* fix redis password reference

* add comment to helm chart testing workflow

* rename helm testing workflow to disable it

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
This commit is contained in:
rkuo-danswer 2024-09-06 12:21:29 -07:00 committed by GitHub
parent aeb6060854
commit 2933c3598b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
43 changed files with 268 additions and 23 deletions

View File

@ -0,0 +1,67 @@
# This workflow is intentionally disabled while we're still working on it
# It's close to ready, but a race condition needs to be fixed with
# API server and Vespa startup, and it needs to have a way to build/test against
# local containers
name: Helm - Lint and Test Charts
on:
merge_group:
pull_request:
branches: [ main ]
jobs:
lint-test:
runs-on: Amd64
# fetch-depth 0 is required for helm/chart-testing-action
steps:
- name: Checkout code
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/setup-helm@v4.2.0
with:
version: v3.14.4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
cache-dependency-path: |
backend/requirements/default.txt
backend/requirements/dev.txt
backend/requirements/model_server.txt
- run: |
python -m pip install --upgrade pip
pip install -r backend/requirements/default.txt
pip install -r backend/requirements/dev.txt
pip install -r backend/requirements/model_server.txt
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.6.1
- name: Run chart-testing (list-changed)
id: list-changed
run: |
changed=$(ct list-changed --target-branch ${{ github.event.repository.default_branch }})
if [[ -n "$changed" ]]; then
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Run chart-testing (lint)
# if: steps.list-changed.outputs.changed == 'true'
run: ct lint --all --config ct.yaml --target-branch ${{ github.event.repository.default_branch }}
- name: Create kind cluster
# if: steps.list-changed.outputs.changed == 'true'
uses: helm/kind-action@v1.10.0
- name: Run chart-testing (install)
# if: steps.list-changed.outputs.changed == 'true'
run: ct install --all --config ct.yaml
# run: ct install --target-branch ${{ github.event.repository.default_branch }}

View File

@ -140,6 +140,7 @@ jobs:
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=postgres \
-e VESPA_HOST=index \
-e REDIS_HOST=cache \
-e API_SERVER_HOST=api_server \
-e OPENAI_API_KEY=${OPENAI_API_KEY} \
danswer/integration-test-runner:it

View File

@ -135,9 +135,9 @@ ensure it is running before continuing with the following docker commands.
First navigate to `danswer/deployment/docker_compose`, then start up Vespa and Postgres with:
```bash
docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db
docker compose -f docker-compose.dev.yml -p danswer-stack up -d index relational_db cache
```
(index refers to Vespa and relational_db refers to Postgres)
(index refers to Vespa, relational_db refers to Postgres, and cache refers to Redis)
#### Running Danswer
To start the frontend, navigate to `danswer/web` and run:

View File

@ -6,6 +6,7 @@ from typing import cast
from celery import Celery # type: ignore
from celery.contrib.abortable import AbortableTask # type: ignore
from celery.exceptions import TaskRevokedError
from sqlalchemy import inspect
from sqlalchemy import text
from sqlalchemy.orm import Session
@ -20,7 +21,10 @@ from danswer.background.task_utils import name_cc_cleanup_task
from danswer.background.task_utils import name_cc_prune_task
from danswer.background.task_utils import name_document_set_sync_task
from danswer.configs.app_configs import JOB_TIMEOUT
from danswer.configs.constants import POSTGRES_CELERY_APP_NAME
from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY
from danswer.configs.app_configs import REDIS_HOST
from danswer.configs.app_configs import REDIS_PASSWORD
from danswer.configs.app_configs import REDIS_PORT
from danswer.configs.constants import PostgresAdvisoryLocks
from danswer.connectors.factory import instantiate_connector
from danswer.connectors.models import InputType
@ -35,9 +39,7 @@ from danswer.db.document_set import fetch_document_sets_for_documents
from danswer.db.document_set import fetch_documents_for_document_set_paginated
from danswer.db.document_set import get_document_set_by_id
from danswer.db.document_set import mark_document_set_as_synced
from danswer.db.engine import build_connection_string
from danswer.db.engine import get_sqlalchemy_engine
from danswer.db.engine import SYNC_DB_API
from danswer.db.models import DocumentSet
from danswer.document_index.document_index_utils import get_both_index_names
from danswer.document_index.factory import get_default_document_index
@ -46,11 +48,17 @@ from danswer.utils.logger import setup_logger
logger = setup_logger()
connection_string = build_connection_string(
db_api=SYNC_DB_API, app_name=POSTGRES_CELERY_APP_NAME
CELERY_PASSWORD_PART = ""
if REDIS_PASSWORD:
CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@"
# example celery_broker_url: "redis://:password@localhost:6379/15"
celery_broker_url = (
f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
)
celery_backend_url = (
f"redis://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}"
)
celery_broker_url = f"sqla+{connection_string}"
celery_backend_url = f"db+{connection_string}"
celery_app = Celery(__name__, broker=celery_broker_url, backend=celery_backend_url)
@ -360,6 +368,15 @@ def kombu_message_cleanup_task_helper(ctx: dict, db_session: Session) -> bool:
bool: Returns True if there are more rows to process, False if not.
"""
inspector = inspect(db_session.bind)
if not inspector:
return False
# With the move to redis as celery's broker and backend, kombu tables may not even exist.
# We can fail silently.
if not inspector.has_table("kombu_message"):
return False
query = text(
"""
SELECT id, timestamp, payload

View File

@ -149,6 +149,16 @@ try:
except ValueError:
POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT
REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost"
REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379))
REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD") or ""
# Used for general redis things
REDIS_DB_NUMBER = int(os.environ.get("REDIS_DB_NUMBER", 0))
# Used by celery as broker and backend
REDIS_DB_NUMBER_CELERY = int(os.environ.get("REDIS_DB_NUMBER_CELERY", 15))
#####
# Connector Configs
#####

View File

@ -54,6 +54,7 @@ python-docx==1.1.0
python-dotenv==1.0.0
python-multipart==0.0.7
pywikibot==9.0.0
redis==5.0.8
requests==2.32.2
requests-oauthlib==1.3.1
retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from image

12
ct.yaml Normal file
View File

@ -0,0 +1,12 @@
# See https://github.com/helm/chart-testing#configuration
chart-dirs:
- deployment/helm/charts
chart-repos:
- vespa=https://unoplat.github.io/vespa-helm-charts
- postgresql=https://charts.bitnami.com/bitnami
helm-extra-args: --timeout 900s
validate-maintainers: false

View File

@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
ports:
@ -62,6 +63,7 @@ services:
# Other services
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
@ -107,6 +109,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
@ -137,6 +140,7 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
@ -330,9 +334,19 @@ services:
# in order to make this work on both Unix-like systems and windows
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data
volumes:
cache_volume:
db_volume:
vespa_volume: # Created by the container itself

View File

@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
ports:
@ -58,6 +59,7 @@ services:
# Other services
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
@ -99,6 +101,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
@ -129,6 +132,7 @@ services:
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-}
- POSTGRES_DB=${POSTGRES_DB:-}
- VESPA_HOST=index
- REDIS_HOST=cache
- WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors
# Don't change the NLP model configs unless you know what you're doing
- DOCUMENT_ENCODER_MODEL=${DOCUMENT_ENCODER_MODEL:-}
@ -341,9 +345,20 @@ services:
command: >
/bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data
volumes:
cache_volume:
db_volume:
vespa_volume:
# Created by the container itself

View File

@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
env_file:
@ -20,6 +21,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
extra_hosts:
- "host.docker.internal:host-gateway"
@ -39,6 +41,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
@ -48,6 +51,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
extra_hosts:
@ -204,7 +208,18 @@ services:
- .env.nginx
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data
volumes:
cache_volume:
db_volume:
vespa_volume:
# Created by the container itself

View File

@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
restart: always
env_file:
@ -20,6 +21,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
extra_hosts:
- "host.docker.internal:host-gateway"
@ -39,6 +41,7 @@ services:
depends_on:
- relational_db
- index
- cache
- inference_model_server
- indexing_model_server
restart: always
@ -48,6 +51,7 @@ services:
- AUTH_TYPE=${AUTH_TYPE:-oidc}
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
extra_hosts:
@ -221,7 +225,18 @@ services:
entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data
volumes:
cache_volume:
db_volume:
vespa_volume:
# Created by the container itself

View File

@ -12,6 +12,7 @@ services:
depends_on:
- relational_db
- index
- cache
restart: always
ports:
- "8080"
@ -21,6 +22,7 @@ services:
- AUTH_TYPE=disabled
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-}
@ -43,6 +45,7 @@ services:
depends_on:
- relational_db
- index
- cache
restart: always
env_file:
- .env_eval
@ -50,6 +53,7 @@ services:
- AUTH_TYPE=disabled
- POSTGRES_HOST=relational_db
- VESPA_HOST=index
- REDIS_HOST=cache
- MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server}
- MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-}
- INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server}
@ -200,7 +204,18 @@ services:
&& /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev"
cache:
image: redis:7.4-alpine
restart: always
ports:
- '6379:6379'
command: redis-server
volumes:
- cache_volume:/data
volumes:
cache_volume:
db_volume:
driver: local
driver_opts:

View File

@ -22,14 +22,11 @@ dependencies:
version: 14.3.1
repository: https://charts.bitnami.com/bitnami
condition: postgresql.enabled
- name: vespa
- name: vespa
version: 0.2.3
repository: https://unoplat.github.io/vespa-helm-charts
condition: vespa.enabled
- name: nginx
version: 15.14.0
repository: oci://registry-1.docker.io/bitnamicharts
condition: nginx.enabled
condition: nginx.enabled

View File

@ -84,7 +84,7 @@ postgresql:
auth:
existingSecret: danswer-secrets
secretKeys:
adminPasswordKey: postgres_password #overwriting as postgres typically expects 'postgres-password'
adminPasswordKey: postgres_password # overwriting as postgres typically expects 'postgres-password'
nginx:
containerPorts:
@ -330,7 +330,7 @@ vespa:
affinity: {}
#ingress:
# ingress:
# enabled: false
# className: ""
# annotations: {}
@ -358,8 +358,10 @@ persistence:
auth:
# for storing smtp, oauth, slack, and other secrets
# keys are lowercased version of env vars (e.g. SMTP_USER -> smtp_user)
existingSecret: "" # danswer-secrets
existingSecret: "" # danswer-secrets
# optionally override the secret keys to reference in the secret
# this is used to populate the env vars in individual deployments
# the values here reference the keys in secrets below
secretKeys:
postgres_password: "postgres_password"
smtp_pass: ""
@ -369,9 +371,11 @@ auth:
gen_ai_api_key: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "redis_password"
# will be overridden by the existingSecret if set
secretName: "danswer-secrets"
# set values as strings, they will be base64 encoded
# this is used to populate the secrets yaml
secrets:
postgres_password: "postgres"
smtp_pass: ""
@ -381,13 +385,14 @@ auth:
gen_ai_api_key: ""
danswer_bot_slack_app_token: ""
danswer_bot_slack_bot_token: ""
redis_password: "password"
configMap:
AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN
SESSION_EXPIRE_TIME_SECONDS: "86400" # 1 Day Default
VALID_EMAIL_DOMAINS: "" # Can be something like danswer.ai, as an extra double-check
SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
SMTP_SERVER: "" # For sending verification emails, if unspecified then defaults to 'smtp.gmail.com'
SMTP_PORT: "" # For sending verification emails, if unspecified then defaults to '587'
SMTP_USER: "" # 'your-email@company.com'
# SMTP_PASS: "" # 'your-gmail-password'
EMAIL_FROM: "" # 'your-email@company.com' SMTP_USER missing used instead

View File

@ -52,6 +52,11 @@ spec:
secretKeyRef:
name: danswer-secrets
key: google_oauth_client_secret
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: danswer-secrets
key: redis_password
envFrom:
- configMapRef:
name: env-configmap

View File

@ -19,6 +19,12 @@ spec:
command: ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
# There are some extra values since this is shared between services
# There are no conflicts though, extra env variables are simply ignored
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: danswer-secrets
key: redis_password
envFrom:
- configMapRef:
name: env-configmap

View File

@ -31,6 +31,7 @@ data:
# Other Services
POSTGRES_HOST: "relational-db-service"
VESPA_HOST: "document-index-service"
REDIS_HOST: "redis-service"
# Internet Search Tool
BING_API_KEY: ""
# Don't change the NLP models unless you know what you're doing

View File

@ -0,0 +1,41 @@
apiVersion: v1
kind: Service
metadata:
name: redis-service
spec:
selector:
app: redis
ports:
- name: redis
protocol: TCP
port: 6379
targetPort: 6379
type: ClusterIP
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: redis-deployment
spec:
replicas: 1
selector:
matchLabels:
app: redis
template:
metadata:
labels:
app: redis
spec:
containers:
- name: redis
image: redis:7.4-alpine
ports:
- containerPort: 6379
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: danswer-secrets
key: redis_password
command: ["redis-server"]
args: ["--requirepass", "$(REDIS_PASSWORD)"]

View File

@ -8,4 +8,6 @@ data:
postgres_user: cG9zdGdyZXM= # "postgres" base64 encoded
postgres_password: cGFzc3dvcmQ= # "password" base64 encoded
google_oauth_client_id: ZXhhbXBsZS1jbGllbnQtaWQ= # "example-client-id" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64
google_oauth_client_secret: example_google_oauth_secret # "example-client-secret" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64
google_oauth_client_secret: ZXhhbXBsZV9nb29nbGVfb2F1dGhfc2VjcmV0 # "example-client-secret" base64 encoded. You will need to provide this, use echo -n "your-client-id" | base64
redis_password: cGFzc3dvcmQ= # "password" base64 encoded

View File

@ -33,6 +33,12 @@ spec:
- containerPort: 3000
# There are some extra values since this is shared between services
# There are no conflicts though, extra env variables are simply ignored
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: danswer-secrets
key: redis_password
envFrom:
- configMapRef:
name: env-configmap