mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-29 17:20:44 +02:00
fresh indexing feature branch (#2790)
* fresh indexing feature branch
* cherry pick test
* Revert "cherry pick test"
This reverts commit 2a62422068
.
* set multitenant so that vespa fields match when indexing
* cleanup pass
* mypy
* pass through env var to control celery indexing concurrency
* comments on task kickoff and some logging improvements
* use get_session_with_tenant
* comment out all of update.py
* rename to RedisConnectorIndexingFenceData
* first check num_indexing_workers
* refactor RedisConnectorIndexingFenceData
* comment out on_worker_process_init
* fix where num_indexing_workers falls back
* remove extra brace
This commit is contained in:
@ -1,5 +1,4 @@
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import threading
|
||||
|
||||
@ -17,7 +16,7 @@ def monitor_process(process_name: str, process: subprocess.Popen) -> None:
|
||||
break
|
||||
|
||||
|
||||
def run_jobs(exclude_indexing: bool) -> None:
|
||||
def run_jobs() -> None:
|
||||
# command setup
|
||||
cmd_worker_primary = [
|
||||
"celery",
|
||||
@ -26,6 +25,7 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=6",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"-n",
|
||||
"primary@%n",
|
||||
@ -40,6 +40,7 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=16",
|
||||
"--prefetch-multiplier=8",
|
||||
"--loglevel=INFO",
|
||||
"-n",
|
||||
"light@%n",
|
||||
@ -54,6 +55,7 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=6",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"-n",
|
||||
"heavy@%n",
|
||||
@ -61,6 +63,20 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
"connector_pruning",
|
||||
]
|
||||
|
||||
cmd_worker_indexing = [
|
||||
"celery",
|
||||
"-A",
|
||||
"ee.danswer.background.celery.celery_app",
|
||||
"worker",
|
||||
"--pool=threads",
|
||||
"--concurrency=1",
|
||||
"--prefetch-multiplier=1",
|
||||
"--loglevel=INFO",
|
||||
"-n",
|
||||
"indexing@%n",
|
||||
"--queues=connector_indexing",
|
||||
]
|
||||
|
||||
cmd_beat = [
|
||||
"celery",
|
||||
"-A",
|
||||
@ -82,6 +98,10 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
cmd_worker_heavy, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
|
||||
)
|
||||
|
||||
worker_indexing_process = subprocess.Popen(
|
||||
cmd_worker_indexing, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
|
||||
)
|
||||
|
||||
beat_process = subprocess.Popen(
|
||||
cmd_beat, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
|
||||
)
|
||||
@ -96,44 +116,26 @@ def run_jobs(exclude_indexing: bool) -> None:
|
||||
worker_heavy_thread = threading.Thread(
|
||||
target=monitor_process, args=("HEAVY", worker_heavy_process)
|
||||
)
|
||||
worker_indexing_thread = threading.Thread(
|
||||
target=monitor_process, args=("INDEX", worker_indexing_process)
|
||||
)
|
||||
beat_thread = threading.Thread(target=monitor_process, args=("BEAT", beat_process))
|
||||
|
||||
worker_primary_thread.start()
|
||||
worker_light_thread.start()
|
||||
worker_heavy_thread.start()
|
||||
worker_indexing_thread.start()
|
||||
beat_thread.start()
|
||||
|
||||
if not exclude_indexing:
|
||||
update_env = os.environ.copy()
|
||||
update_env["PYTHONPATH"] = "."
|
||||
cmd_indexing = ["python", "danswer/background/update.py"]
|
||||
|
||||
indexing_process = subprocess.Popen(
|
||||
cmd_indexing,
|
||||
env=update_env,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True,
|
||||
)
|
||||
|
||||
indexing_thread = threading.Thread(
|
||||
target=monitor_process, args=("INDEXING", indexing_process)
|
||||
)
|
||||
|
||||
indexing_thread.start()
|
||||
indexing_thread.join()
|
||||
|
||||
worker_primary_thread.join()
|
||||
worker_light_thread.join()
|
||||
worker_heavy_thread.join()
|
||||
worker_indexing_thread.join()
|
||||
beat_thread.join()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Run background jobs.")
|
||||
parser.add_argument(
|
||||
"--no-indexing", action="store_true", help="Do not run indexing process"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
run_jobs(args.no_indexing)
|
||||
run_jobs()
|
||||
|
Reference in New Issue
Block a user