mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-26 01:31:51 +01:00
* first cut at redis * some new helper functions for the db * ignore kombu tables in alembic migrations (used by celery) * multiline commands for readability, add vespa_metadata_sync queue to worker * typo fix * fix returning tuple fields * add constants * fix _get_access_for_document * docstrings! * fix double function declaration and typing * fix type hinting * add a global redis pool * Add get_document function * use task_logger in various celery tasks * add celeryconfig.py to simplify configuration. Will be used in a subsequent commit * Add celery redis helper. used in a subsequent PR * kombu warning getting spammy since celery is not self managing its queue in Postgres any more * add last_modified and last_synced to documents * fix task naming convention * use celeryconfig.py * the big one. adds queues and tasks, updates functions to use the queues with priorities, etc * change vespa index log line to debug * mypy fixes * update alembic migration * fix fence ordering, rename to "monitor", fix fetch_versioned_implementation call * mypy * switch to monotonic time * fix startup dependencies on redis * rebase alembic migration * kombu cleanup - fail silently * mypy * add redis_host environment override * update REDIS_HOST env var in docker-compose.dev.yml * update the rest of the docker files * harden indexing-status endpoint against db changes happening in the background. Needs further improvement but OK for now. * allow no task syncs to run because we create certain objects with no entries but initially marked as out of date * add back writing to vespa on indexing * update contributing guide * backporting fixes from background_deletion * renaming cache to cache_volume * add redis password to various deployments * try setting up pr testing for helm * fix indent * hopefully this release version actually exists * fix command line option to --chart-dirs * fetch-depth 0 * edit values.yaml * try setting ct working directory * bypass testing only on change for now * move files and lint them * update helm testing * some issues suggest using --config works * add vespa repo * add postgresql repo * increase timeout * try amd64 runner * fix redis password reference * add comment to helm chart testing workflow * rename helm testing workflow to disable it * adding clarifying comments * address code review * missed a file * remove commented warning ... just not needed --------- Co-authored-by: Richard Kuo <rkuo@rkuo.com>
72 lines
2.6 KiB
Plaintext
72 lines
2.6 KiB
Plaintext
[supervisord]
|
|
nodaemon=true
|
|
user=root
|
|
logfile=/var/log/supervisord.log
|
|
|
|
# Indexing is the heaviest job, also requires some CPU intensive steps
|
|
# Cannot place this in Celery for now because Celery must run as a single process (see note below)
|
|
# Indexing uses multi-processing to speed things up
|
|
[program:document_indexing]
|
|
environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true,LOG_FILE_NAME=document_indexing
|
|
command=python danswer/background/update.py
|
|
redirect_stderr=true
|
|
autorestart=true
|
|
|
|
# Background jobs that must be run async due to long time to completion
|
|
# NOTE: due to an issue with Celery + SQLAlchemy
|
|
# (https://github.com/celery/celery/issues/7007#issuecomment-1740139367)
|
|
# we must use the threads pool instead of the default prefork pool for now
|
|
# in order to avoid intermittent errors like:
|
|
# `billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 11 (SIGSEGV)`.
|
|
#
|
|
# This means workers will not be able take advantage of multiple CPU cores
|
|
# on a system, but this should be okay for now since all our celery tasks are
|
|
# relatively compute-light (e.g. they tend to just make a bunch of requests to
|
|
# Vespa / Postgres)
|
|
[program:celery_worker]
|
|
command=celery -A danswer.background.celery.celery_run:celery_app worker
|
|
--pool=threads
|
|
--concurrency=6
|
|
--loglevel=INFO
|
|
--logfile=/var/log/celery_worker_supervisor.log
|
|
-Q celery,vespa_metadata_sync
|
|
environment=LOG_FILE_NAME=celery_worker
|
|
redirect_stderr=true
|
|
autorestart=true
|
|
|
|
# Job scheduler for periodic tasks
|
|
[program:celery_beat]
|
|
command=celery -A danswer.background.celery.celery_run:celery_app beat
|
|
--loglevel=INFO
|
|
--logfile=/var/log/celery_beat_supervisor.log
|
|
environment=LOG_FILE_NAME=celery_beat
|
|
redirect_stderr=true
|
|
autorestart=true
|
|
|
|
# Listens for Slack messages and responds with answers
|
|
# for all channels that the DanswerBot has been added to.
|
|
# If not setup, this will just fail 5 times and then stop.
|
|
# More details on setup here: https://docs.danswer.dev/slack_bot_setup
|
|
[program:slack_bot]
|
|
command=python danswer/danswerbot/slack/listener.py
|
|
environment=LOG_FILE_NAME=slack_bot
|
|
redirect_stderr=true
|
|
autorestart=true
|
|
startretries=5
|
|
startsecs=60
|
|
|
|
# Pushes all logs from the above programs to stdout
|
|
# No log rotation here, since it's stdout it's handled by the Docker container logging
|
|
[program:log-redirect-handler]
|
|
command=tail -qF
|
|
/var/log/document_indexing_info.log
|
|
/var/log/celery_beat_supervisor.log
|
|
/var/log/celery_worker_supervisor.log
|
|
/var/log/celery_beat_debug.log
|
|
/var/log/celery_worker_debug.log
|
|
/var/log/slack_bot_debug.log
|
|
stdout_logfile=/dev/stdout
|
|
stdout_logfile_maxbytes=0
|
|
redirect_stderr=true
|
|
autorestart=true
|