diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py index 5391586a9952..ea610c00d365 100755 --- a/backend/danswer/background/update.py +++ b/backend/danswer/background/update.py @@ -3,15 +3,14 @@ import time from datetime import datetime from datetime import timezone +import torch from dask.distributed import Client from dask.distributed import Future from distributed import LocalCluster from sqlalchemy.orm import Session from danswer.configs.app_configs import NUM_INDEXING_WORKERS -from danswer.configs.model_configs import ( - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED, -) +from danswer.configs.model_configs import MIN_THREADS_ML_MODELS from danswer.connectors.factory import instantiate_connector from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector @@ -351,15 +350,9 @@ def _run_indexing_entrypoint(index_attempt_id: int) -> None: """Entrypoint for indexing run when using dask distributed. Wraps the actual logic in a `try` block so that we can catch any exceptions and mark the attempt as failed.""" - import torch - import os - # force torch to use more cores if available. On VMs pytorch only takes - # advantage of a single core by default - cpu_cores_to_use = max( - (os.cpu_count() or 1) - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED, - torch.get_num_threads(), - ) + cpu_cores_to_use = max(MIN_THREADS_ML_MODELS, torch.get_num_threads()) + logger.info(f"Setting task to use {cpu_cores_to_use} threads") torch.set_num_threads(cpu_cores_to_use) diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py index 2a8f5b4d9836..d296a6b90f68 100644 --- a/backend/danswer/configs/model_configs.py +++ b/backend/danswer/configs/model_configs.py @@ -30,13 +30,9 @@ ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "") ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "") # Purely an optimization, memory limitation consideration BATCH_SIZE_ENCODE_CHUNKS = 8 -# This controls the number of pytorch "threads" to allocate to the embedding -# model. Specifically, this is computed as `num_cpu_cores - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED`. -# This is useful for limiting the number of CPU cores that the background job consumes to leave some -# compute for other processes (most importantly the api_server and web_server). -BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED = int( - os.environ.get("BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED") or 1 -) +# This controls the minimum number of pytorch "threads" to allocate to the embedding +# model. If torch finds more threads on its own, this value is not used. +MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1) # Cross Encoder Settings diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 2cc80f01f5be..0fdcb8b46d68 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -96,7 +96,7 @@ services: - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-} - SKIP_RERANKING=${SKIP_RERANKING:-} - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-} - - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED=${BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED:-} + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} volumes: