diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py
index 5391586a9952..ea610c00d365 100755
--- a/backend/danswer/background/update.py
+++ b/backend/danswer/background/update.py
@@ -3,15 +3,14 @@ import time
 from datetime import datetime
 from datetime import timezone
 
+import torch
 from dask.distributed import Client
 from dask.distributed import Future
 from distributed import LocalCluster
 from sqlalchemy.orm import Session
 
 from danswer.configs.app_configs import NUM_INDEXING_WORKERS
-from danswer.configs.model_configs import (
-    BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED,
-)
+from danswer.configs.model_configs import MIN_THREADS_ML_MODELS
 from danswer.connectors.factory import instantiate_connector
 from danswer.connectors.interfaces import GenerateDocumentsOutput
 from danswer.connectors.interfaces import LoadConnector
@@ -351,15 +350,9 @@ def _run_indexing_entrypoint(index_attempt_id: int) -> None:
     """Entrypoint for indexing run when using dask distributed.
     Wraps the actual logic in a `try` block so that we can catch any exceptions
     and mark the attempt as failed."""
-    import torch
-    import os
 
-    # force torch to use more cores if available. On VMs pytorch only takes
-    # advantage of a single core by default
-    cpu_cores_to_use = max(
-        (os.cpu_count() or 1) - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED,
-        torch.get_num_threads(),
-    )
+    cpu_cores_to_use = max(MIN_THREADS_ML_MODELS, torch.get_num_threads())
+
     logger.info(f"Setting task to use {cpu_cores_to_use} threads")
     torch.set_num_threads(cpu_cores_to_use)
 
diff --git a/backend/danswer/configs/model_configs.py b/backend/danswer/configs/model_configs.py
index 2a8f5b4d9836..d296a6b90f68 100644
--- a/backend/danswer/configs/model_configs.py
+++ b/backend/danswer/configs/model_configs.py
@@ -30,13 +30,9 @@ ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "")
 ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "")
 # Purely an optimization, memory limitation consideration
 BATCH_SIZE_ENCODE_CHUNKS = 8
-# This controls the number of pytorch "threads" to allocate to the embedding
-# model. Specifically, this is computed as `num_cpu_cores - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED`.
-# This is useful for limiting the number of CPU cores that the background job consumes to leave some
-# compute for other processes (most importantly the api_server and web_server).
-BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED = int(
-    os.environ.get("BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED") or 1
-)
+# This controls the minimum number of pytorch "threads" to allocate to the embedding
+# model. If torch finds more threads on its own, this value is not used.
+MIN_THREADS_ML_MODELS = int(os.environ.get("MIN_THREADS_ML_MODELS") or 1)
 
 
 # Cross Encoder Settings
diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
index 2cc80f01f5be..0fdcb8b46d68 100644
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -96,7 +96,7 @@ services:
       - ASYM_PASSAGE_PREFIX=${ASYM_PASSAGE_PREFIX:-}
       - SKIP_RERANKING=${SKIP_RERANKING:-}
       - EDIT_KEYWORD_QUERY=${EDIT_KEYWORD_QUERY:-}
-      - BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED=${BACKGROUND_JOB_EMBEDDING_MODEL_CPU_CORES_LEFT_UNUSED:-}
+      - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-}
       # Set to debug to get more fine-grained logs
       - LOG_LEVEL=${LOG_LEVEL:-info}
     volumes: