Feature/celery refactor (#2813)

* fresh indexing feature branch

* cherry pick test

* Revert "cherry pick test"

This reverts commit 2a62422068.

* set multitenant so that vespa fields match when indexing

* cleanup pass

* mypy

* pass through env var to control celery indexing concurrency

* comments on task kickoff and some logging improvements

* disentangle configuration for different workers and beats.

* use get_session_with_tenant

* comment out all of update.py

* rename to RedisConnectorIndexingFenceData

* first check num_indexing_workers

* refactor RedisConnectorIndexingFenceData

* comment out on_worker_process_init

* missed a file

* scope db sessions to short lengths

* update launch.json template

* fix types

* code review
This commit is contained in:
rkuo-danswer
2024-10-22 15:57:36 -07:00
committed by GitHub
parent eccec6ab7c
commit 9105f95d13
38 changed files with 1684 additions and 885 deletions

View File

@ -20,14 +20,13 @@ def run_jobs() -> None:
cmd_worker_primary = [
"celery",
"-A",
"ee.danswer.background.celery.celery_app",
"danswer.background.celery.versioned_apps.primary",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"-n",
"primary@%n",
"--hostname=primary@%n",
"-Q",
"celery",
]
@ -35,14 +34,13 @@ def run_jobs() -> None:
cmd_worker_light = [
"celery",
"-A",
"ee.danswer.background.celery.celery_app",
"danswer.background.celery.versioned_apps.light",
"worker",
"--pool=threads",
"--concurrency=16",
"--prefetch-multiplier=8",
"--loglevel=INFO",
"-n",
"light@%n",
"--hostname=light@%n",
"-Q",
"vespa_metadata_sync,connector_deletion",
]
@ -50,14 +48,13 @@ def run_jobs() -> None:
cmd_worker_heavy = [
"celery",
"-A",
"ee.danswer.background.celery.celery_app",
"danswer.background.celery.versioned_apps.heavy",
"worker",
"--pool=threads",
"--concurrency=6",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"-n",
"heavy@%n",
"--hostname=heavy@%n",
"-Q",
"connector_pruning",
]
@ -65,21 +62,20 @@ def run_jobs() -> None:
cmd_worker_indexing = [
"celery",
"-A",
"ee.danswer.background.celery.celery_app",
"danswer.background.celery.versioned_apps.indexing",
"worker",
"--pool=threads",
"--concurrency=1",
"--prefetch-multiplier=1",
"--loglevel=INFO",
"-n",
"indexing@%n",
"--hostname=indexing@%n",
"--queues=connector_indexing",
]
cmd_beat = [
"celery",
"-A",
"ee.danswer.background.celery.celery_app",
"danswer.background.celery.versioned_apps.beat",
"beat",
"--loglevel=INFO",
]