danswer/backend/onyx/utils/gpu_utils.py
Chris Weaver d123713c00
Fix GPU status request in sync flow (#4318)
* Fix GPU status request in sync flow

* tweak

* Fix test

* Fix more tests
2025-03-21 11:11:00 -07:00

43 lines
1.4 KiB
Python

from functools import lru_cache
import requests
from retry import retry
from onyx.utils.logger import setup_logger
from shared_configs.configs import INDEXING_MODEL_SERVER_HOST
from shared_configs.configs import INDEXING_MODEL_SERVER_PORT
from shared_configs.configs import MODEL_SERVER_HOST
from shared_configs.configs import MODEL_SERVER_PORT
logger = setup_logger()
def _get_gpu_status_from_model_server(indexing: bool) -> bool:
if indexing:
model_server_url = f"{INDEXING_MODEL_SERVER_HOST}:{INDEXING_MODEL_SERVER_PORT}"
else:
model_server_url = f"{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}"
if "http" not in model_server_url:
model_server_url = f"http://{model_server_url}"
try:
response = requests.get(f"{model_server_url}/api/gpu-status", timeout=10)
response.raise_for_status()
gpu_status = response.json()
return gpu_status["gpu_available"]
except requests.RequestException as e:
logger.error(f"Error: Unable to fetch GPU status. Error: {str(e)}")
raise # Re-raise exception to trigger a retry
@retry(tries=5, delay=5)
def gpu_status_request(indexing: bool) -> bool:
return _get_gpu_status_from_model_server(indexing)
@lru_cache(maxsize=1)
def fast_gpu_status_request(indexing: bool) -> bool:
"""For use in sync flows, where we don't want to retry / we want to cache this."""
return gpu_status_request(indexing=indexing)