diff --git a/backend/danswer/background/celery/tasks/indexing/tasks.py b/backend/danswer/background/celery/tasks/indexing/tasks.py index 980266ec8..2b5b98767 100644 --- a/backend/danswer/background/celery/tasks/indexing/tasks.py +++ b/backend/danswer/background/celery/tasks/indexing/tasks.py @@ -24,6 +24,7 @@ from danswer.background.indexing.job_client import SimpleJobClient from danswer.background.indexing.run_indexing import run_indexing_entrypoint from danswer.background.indexing.run_indexing import RunIndexingCallbackInterface from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.configs.app_configs import MULTI_TENANT from danswer.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT from danswer.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX @@ -47,9 +48,14 @@ from danswer.db.models import IndexAttempt from danswer.db.models import SearchSettings from danswer.db.search_settings import get_current_search_settings from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.swap_index import check_index_swap +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder from danswer.redis.redis_pool import get_redis_client from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import global_version +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import INDEXING_MODEL_SERVER_PORT logger = setup_logger() @@ -98,6 +104,21 @@ def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None: if not lock_beat.acquire(blocking=False): return None + with get_session_with_tenant(tenant_id=tenant_id) as db_session: + check_index_swap(db_session=db_session) + current_search_settings = get_current_search_settings(db_session) + # So that the first time users aren't surprised by really slow speed of first + # batch of documents indexed + if current_search_settings.provider_type is None and not MULTI_TENANT: + embedding_model = EmbeddingModel.from_db_model( + search_settings=current_search_settings, + server_host=INDEXING_MODEL_SERVER_HOST, + server_port=INDEXING_MODEL_SERVER_PORT, + ) + warm_up_bi_encoder( + embedding_model=embedding_model, + ) + cc_pair_ids: list[int] = [] with get_session_with_tenant(tenant_id) as db_session: cc_pairs = fetch_connector_credential_pairs(db_session) diff --git a/backend/danswer/db/swap_index.py b/backend/danswer/db/swap_index.py index a52b2c37d..415ade5df 100644 --- a/backend/danswer/db/swap_index.py +++ b/backend/danswer/db/swap_index.py @@ -42,6 +42,7 @@ def check_index_swap(db_session: Session) -> SearchSettings | None: logger.error("More unique indexings than cc pairs, should not occur") if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings: + # Swap indices now_old_search_settings = get_current_search_settings(db_session) update_search_settings_status( search_settings=now_old_search_settings, @@ -68,6 +69,4 @@ def check_index_swap(db_session: Session) -> SearchSettings | None: if MULTI_TENANT: return now_old_search_settings - else: - logger.warning("No need to swap indices") return None