mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-05 12:39:33 +02:00
* try using a redis replica in some areas * harden up replica usage * comment * slow down cloud dispatch temporarily * add ignored syncing list back * raise multiplier to 8 * comment out per tenant code (no longer used by fanout) --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
261 lines
9.5 KiB
Python
261 lines
9.5 KiB
Python
from datetime import timedelta
|
|
from typing import Any
|
|
|
|
from onyx.configs.app_configs import LLM_MODEL_UPDATE_API_URL
|
|
from onyx.configs.constants import ONYX_CLOUD_CELERY_TASK_PREFIX
|
|
from onyx.configs.constants import OnyxCeleryPriority
|
|
from onyx.configs.constants import OnyxCeleryQueues
|
|
from onyx.configs.constants import OnyxCeleryTask
|
|
from shared_configs.configs import MULTI_TENANT
|
|
|
|
# choosing 15 minutes because it roughly gives us enough time to process many tasks
|
|
# we might be able to reduce this greatly if we can run a unified
|
|
# loop across all tenants rather than tasks per tenant
|
|
|
|
# we set expires because it isn't necessary to queue up these tasks
|
|
# it's only important that they run relatively regularly
|
|
BEAT_EXPIRES_DEFAULT = 15 * 60 # 15 minutes (in seconds)
|
|
|
|
# hack to slow down task dispatch in the cloud until
|
|
# we have a better implementation (backpressure, etc)
|
|
CLOUD_BEAT_SCHEDULE_MULTIPLIER = 8
|
|
|
|
# tasks that only run in the cloud
|
|
# the name attribute must start with ONYX_CLOUD_CELERY_TASK_PREFIX = "cloud" to be filtered
|
|
# by the DynamicTenantScheduler
|
|
cloud_tasks_to_schedule = [
|
|
# cloud specific tasks
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-alembic",
|
|
"task": OnyxCeleryTask.CLOUD_CHECK_ALEMBIC,
|
|
"schedule": timedelta(hours=1 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"queue": OnyxCeleryQueues.MONITORING,
|
|
"priority": OnyxCeleryPriority.HIGH,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
# remaining tasks are cloud generators for per tenant tasks
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-indexing",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_INDEXING,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-connector-deletion",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-vespa-sync",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-prune",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_PRUNING,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-vespa-sync",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=15 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.MONITOR_VESPA_SYNC,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-doc-permissions-sync",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=30 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-external-group-sync",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(seconds=20 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
|
|
},
|
|
},
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_monitor-background-processes",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(minutes=5 * CLOUD_BEAT_SCHEDULE_MULTIPLIER),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
|
|
"queue": OnyxCeleryQueues.MONITORING,
|
|
"priority": OnyxCeleryPriority.LOW,
|
|
},
|
|
},
|
|
]
|
|
|
|
if LLM_MODEL_UPDATE_API_URL:
|
|
cloud_tasks_to_schedule.append(
|
|
{
|
|
"name": f"{ONYX_CLOUD_CELERY_TASK_PREFIX}_check-for-llm-model-update",
|
|
"task": OnyxCeleryTask.CLOUD_BEAT_TASK_GENERATOR,
|
|
"schedule": timedelta(
|
|
hours=1 * CLOUD_BEAT_SCHEDULE_MULTIPLIER
|
|
), # Check every hour
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.HIGHEST,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
"kwargs": {
|
|
"task_name": OnyxCeleryTask.CHECK_FOR_LLM_MODEL_UPDATE,
|
|
"priority": OnyxCeleryPriority.LOW,
|
|
},
|
|
}
|
|
)
|
|
|
|
# tasks that run in either self-hosted on cloud
|
|
tasks_to_schedule: list[dict] = []
|
|
|
|
if not MULTI_TENANT:
|
|
tasks_to_schedule.extend(
|
|
[
|
|
{
|
|
"name": "check-for-indexing",
|
|
"task": OnyxCeleryTask.CHECK_FOR_INDEXING,
|
|
"schedule": timedelta(seconds=15),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "check-for-connector-deletion",
|
|
"task": OnyxCeleryTask.CHECK_FOR_CONNECTOR_DELETION,
|
|
"schedule": timedelta(seconds=20),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "check-for-vespa-sync",
|
|
"task": OnyxCeleryTask.CHECK_FOR_VESPA_SYNC_TASK,
|
|
"schedule": timedelta(seconds=20),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "check-for-pruning",
|
|
"task": OnyxCeleryTask.CHECK_FOR_PRUNING,
|
|
"schedule": timedelta(hours=1),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "monitor-vespa-sync",
|
|
"task": OnyxCeleryTask.MONITOR_VESPA_SYNC,
|
|
"schedule": timedelta(seconds=5),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "check-for-doc-permissions-sync",
|
|
"task": OnyxCeleryTask.CHECK_FOR_DOC_PERMISSIONS_SYNC,
|
|
"schedule": timedelta(seconds=30),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "check-for-external-group-sync",
|
|
"task": OnyxCeleryTask.CHECK_FOR_EXTERNAL_GROUP_SYNC,
|
|
"schedule": timedelta(seconds=20),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.MEDIUM,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
},
|
|
{
|
|
"name": "monitor-background-processes",
|
|
"task": OnyxCeleryTask.MONITOR_BACKGROUND_PROCESSES,
|
|
"schedule": timedelta(minutes=5),
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.LOW,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
"queue": OnyxCeleryQueues.MONITORING,
|
|
},
|
|
},
|
|
]
|
|
)
|
|
|
|
# Only add the LLM model update task if the API URL is configured
|
|
if LLM_MODEL_UPDATE_API_URL:
|
|
tasks_to_schedule.append(
|
|
{
|
|
"name": "check-for-llm-model-update",
|
|
"task": OnyxCeleryTask.CHECK_FOR_LLM_MODEL_UPDATE,
|
|
"schedule": timedelta(hours=1), # Check every hour
|
|
"options": {
|
|
"priority": OnyxCeleryPriority.LOW,
|
|
"expires": BEAT_EXPIRES_DEFAULT,
|
|
},
|
|
}
|
|
)
|
|
|
|
|
|
def get_cloud_tasks_to_schedule() -> list[dict[str, Any]]:
|
|
return cloud_tasks_to_schedule
|
|
|
|
|
|
def get_tasks_to_schedule() -> list[dict[str, Any]]:
|
|
return tasks_to_schedule
|