diff --git a/backend/danswer/document_index/vespa/app_config/validation-overrides.xml b/backend/danswer/document_index/vespa/app_config/validation-overrides.xml index 58bb2a0ce..d1ac1c119 100644 --- a/backend/danswer/document_index/vespa/app_config/validation-overrides.xml +++ b/backend/danswer/document_index/vespa/app_config/validation-overrides.xml @@ -2,4 +2,7 @@ schema-removal + indexing-change diff --git a/backend/model_server/main.py b/backend/model_server/main.py index c0851df6c..5c7979475 100644 --- a/backend/model_server/main.py +++ b/backend/model_server/main.py @@ -54,9 +54,11 @@ def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) - @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: if torch.cuda.is_available(): - logger.notice("GPU is available") + logger.notice("CUDA GPU is available") + elif torch.backends.mps.is_available(): + logger.notice("Mac MPS is available") else: - logger.notice("GPU is not available") + logger.notice("GPU is not available, using CPU") if TEMP_HF_CACHE_PATH.is_dir(): logger.notice("Moving contents of temp_huggingface to huggingface cache.") diff --git a/backend/model_server/management_endpoints.py b/backend/model_server/management_endpoints.py index d2d45d69d..56640a2fa 100644 --- a/backend/model_server/management_endpoints.py +++ b/backend/model_server/management_endpoints.py @@ -11,6 +11,10 @@ def healthcheck() -> Response: @router.get("/gpu-status") -def gpu_status() -> dict[str, bool]: - has_gpu = torch.cuda.is_available() - return {"gpu_available": has_gpu} +def gpu_status() -> dict[str, bool | str]: + if torch.cuda.is_available(): + return {"gpu_available": True, "type": "cuda"} + elif torch.backends.mps.is_available(): + return {"gpu_available": True, "type": "mps"} + else: + return {"gpu_available": False, "type": "none"}