Changed default local model to nomic (#1943)

2025-08-02 05:02:48 +02:00 · 2024-07-31 18:54:02 -07:00
parent 1654378850
commit 1be1959d80
12 changed files with 78 additions and 28 deletions
--- a/backend/model_server/main.py
+++ b/backend/model_server/main.py
@@ -1,6 +1,9 @@
+import asyncio
 import os
+import shutil
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
+from pathlib import Path

 import torch
 import uvicorn
@@ -29,6 +32,24 @@ transformer_logging.set_verbosity_error()
 logger = setup_logger()


+async def manage_huggingface_cache() -> None:
+    temp_hf_cache = Path("/root/.cache/temp_huggingface")
+    hf_cache = Path("/root/.cache/huggingface")
+    if temp_hf_cache.is_dir() and any(temp_hf_cache.iterdir()):
+        hf_cache.mkdir(parents=True, exist_ok=True)
+        for item in temp_hf_cache.iterdir():
+            if item.is_dir():
+                await asyncio.to_thread(
+                    shutil.copytree, item, hf_cache / item.name, dirs_exist_ok=True
+                )
+            else:
+                await asyncio.to_thread(shutil.copy2, item, hf_cache)
+        await asyncio.to_thread(shutil.rmtree, temp_hf_cache)
+        logger.info("Copied contents of temp_huggingface and deleted the directory.")
+    else:
+        logger.info("Source directory is empty or does not exist. Skipping copy.")
+
+
@asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator:
    if torch.cuda.is_available():
@@ -36,6 +57,8 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
    else:
        logger.info("GPU is not available")

+    await manage_huggingface_cache()
+
    torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))
    logger.info(f"Torch Threads: {torch.get_num_threads()}")