figuring out why multiprocessing set_start_method isn't working.

This commit is contained in:
Richard Kuo (Danswer) 2025-01-09 16:29:37 -08:00
parent 439217317f
commit 962240031f
6 changed files with 15 additions and 4 deletions

View File

@ -1,5 +1,4 @@
import logging
import multiprocessing
import time
from typing import Any
@ -163,7 +162,10 @@ def on_task_postrun(
def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None:
"""The first signal sent on celery worker startup"""
multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
# rkuo: commenting out as set_start_method seems to work here on macOS
# but not in the cloud and it is unclear why.
# logger.info(f"Multiprocessing start method - setting to spawn.")
# multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
def wait_for_redis(sender: Any, **kwargs: Any) -> None:

View File

@ -56,6 +56,7 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None
@worker_init.connect
def on_worker_init(sender: Any, **kwargs: Any) -> None:
logger.info("worker_init signal received.")
multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME)

View File

@ -57,6 +57,7 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None
@worker_init.connect
def on_worker_init(sender: Any, **kwargs: Any) -> None:
logger.info("worker_init signal received.")
multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_APP_NAME)

View File

@ -56,7 +56,9 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None
@worker_init.connect
def on_worker_init(sender: Any, **kwargs: Any) -> None:
logger.info("worker_init signal received.")
multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")
logger.info(f"Concurrency: {sender.concurrency}")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME)
SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8)

View File

@ -80,6 +80,7 @@ def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None
@worker_init.connect
def on_worker_init(sender: Any, **kwargs: Any) -> None:
logger.info("worker_init signal received.")
multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn
logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}")
SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME)

View File

@ -1,3 +1,4 @@
import multiprocessing
import os
import sys
import time
@ -853,11 +854,14 @@ def connector_indexing_proxy_task(
search_settings_id: int,
tenant_id: str | None,
) -> None:
"""celery tasks are forked, but forking is unstable. This proxies work to a spawned task."""
"""celery tasks are forked, but forking is unstable.
This is a thread that proxies work to a spawned task."""
task_logger.info(
f"Indexing watchdog - starting: attempt={index_attempt_id} "
f"cc_pair={cc_pair_id} "
f"search_settings={search_settings_id}"
f"search_settings={search_settings_id} "
f"multiprocessing={multiprocessing.get_start_method()}"
)
if not self.request.id: