mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-03-29 11:12:02 +01:00
Speed up docker launch (#2099)
* use move instead of copy * added logging * fix overwrites * tested throughly * fixes * clearer commenting
This commit is contained in:
parent
83e945ba57
commit
bbb8c5ff0b
@ -1,4 +1,3 @@
|
||||
import asyncio
|
||||
import os
|
||||
import shutil
|
||||
from collections.abc import AsyncGenerator
|
||||
@ -24,27 +23,32 @@ from shared_configs.configs import MODEL_SERVER_PORT
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
||||
|
||||
HF_CACHE_PATH = Path("/root/.cache/huggingface/")
|
||||
TEMP_HF_CACHE_PATH = Path("/root/.cache/temp_huggingface/")
|
||||
|
||||
transformer_logging.set_verbosity_error()
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
async def manage_huggingface_cache() -> None:
|
||||
temp_hf_cache = Path("/root/.cache/temp_huggingface")
|
||||
hf_cache = Path("/root/.cache/huggingface")
|
||||
if temp_hf_cache.is_dir() and any(temp_hf_cache.iterdir()):
|
||||
hf_cache.mkdir(parents=True, exist_ok=True)
|
||||
for item in temp_hf_cache.iterdir():
|
||||
if item.is_dir():
|
||||
await asyncio.to_thread(
|
||||
shutil.copytree, item, hf_cache / item.name, dirs_exist_ok=True
|
||||
)
|
||||
else:
|
||||
await asyncio.to_thread(shutil.copy2, item, hf_cache)
|
||||
await asyncio.to_thread(shutil.rmtree, temp_hf_cache)
|
||||
logger.info("Copied contents of temp_huggingface and deleted the directory.")
|
||||
else:
|
||||
logger.info("Source directory is empty or does not exist. Skipping copy.")
|
||||
def _move_files_recursively(source: Path, dest: Path, overwrite: bool = False) -> None:
|
||||
"""
|
||||
This moves the files from the temp huggingface cache to the huggingface cache
|
||||
|
||||
We have to move each file individually because the directories might
|
||||
have the same name but not the same contents and we dont want to remove
|
||||
the files in the existing huggingface cache that don't exist in the temp
|
||||
huggingface cache.
|
||||
"""
|
||||
for item in source.iterdir():
|
||||
target_path = dest / item.relative_to(source)
|
||||
if item.is_dir():
|
||||
_move_files_recursively(item, target_path, overwrite)
|
||||
else:
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
if target_path.exists() and not overwrite:
|
||||
continue
|
||||
shutil.move(str(item), str(target_path))
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
@ -54,7 +58,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator:
|
||||
else:
|
||||
logger.info("GPU is not available")
|
||||
|
||||
await manage_huggingface_cache()
|
||||
if TEMP_HF_CACHE_PATH.is_dir():
|
||||
logger.info("Moving contents of temp_huggingface to huggingface cache.")
|
||||
_move_files_recursively(TEMP_HF_CACHE_PATH, HF_CACHE_PATH)
|
||||
shutil.rmtree(TEMP_HF_CACHE_PATH, ignore_errors=True)
|
||||
logger.info("Moved contents of temp_huggingface to huggingface cache.")
|
||||
|
||||
torch.set_num_threads(max(MIN_THREADS_ML_MODELS, torch.get_num_threads()))
|
||||
logger.info(f"Torch Threads: {torch.get_num_threads()}")
|
||||
|
Loading…
x
Reference in New Issue
Block a user