diff --git a/backend/danswer/datastores/qdrant/indexing.py b/backend/danswer/datastores/qdrant/indexing.py index 66a5c6c8f337..67299acaa6df 100644 --- a/backend/danswer/datastores/qdrant/indexing.py +++ b/backend/danswer/datastores/qdrant/indexing.py @@ -27,6 +27,18 @@ logger = setup_logger() DEFAULT_BATCH_SIZE = 30 +def create_collection( + collection_name: str, embedding_dim: int = DOC_EMBEDDING_DIM +) -> None: + logger.info(f"Attempting to create collection {collection_name}") + result = get_qdrant_client().create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE), + ) + if not result: + raise RuntimeError("Could not create Qdrant collection") + + def recreate_collection( collection_name: str, embedding_dim: int = DOC_EMBEDDING_DIM ) -> None: diff --git a/backend/danswer/main.py b/backend/danswer/main.py index c578a3e760be..714a6a932b55 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -93,9 +93,15 @@ def get_application() -> FastAPI: @application.on_event("startup") def startup_event() -> None: + # To avoid circular imports from danswer.semantic_search.semantic_search import ( warm_up_models, - ) # To avoid circular imports + ) + from danswer.datastores.qdrant.indexing import create_collection + from danswer.configs.app_configs import QDRANT_DEFAULT_COLLECTION + + create_collection(collection_name=QDRANT_DEFAULT_COLLECTION) + logger.info("Collection ready") warm_up_models() logger.info("Semantic Search models are ready.")