From f52d1142ebdb77a1c78be10fe67ea6b2bc93b30c Mon Sep 17 00:00:00 2001 From: rkuo-danswer Date: Mon, 9 Sep 2024 20:10:25 -0700 Subject: [PATCH] =?UTF-8?q?Fail=20instead=20of=20continuing=20if=20vespa?= =?UTF-8?q?=20cannot=20be=20reached=20within=20the=20time=E2=80=A6=20(#237?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fail instead of continuing if vespa cannot be reached within the timeout period * improve startup readability --------- Co-authored-by: Richard Kuo --- backend/danswer/main.py | 32 ++++++++++++++----- .../search_nlp_models.py | 4 +-- .../tests/integration/common_utils/reset.py | 4 ++- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/backend/danswer/main.py b/backend/danswer/main.py index c518e463e6d..a00826f11c8 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -320,21 +320,32 @@ def setup_vespa( document_index: DocumentIndex, index_setting: IndexingSetting, secondary_index_setting: IndexingSetting | None, -) -> None: +) -> bool: # Vespa startup is a bit slow, so give it a few seconds - wait_time = 5 - for _ in range(5): + WAIT_SECONDS = 5 + VESPA_ATTEMPTS = 5 + for x in range(VESPA_ATTEMPTS): try: + logger.notice(f"Setting up Vespa (attempt {x+1}/{VESPA_ATTEMPTS})...") document_index.ensure_indices_exist( index_embedding_dim=index_setting.model_dim, secondary_index_embedding_dim=secondary_index_setting.model_dim if secondary_index_setting else None, ) - break + + logger.notice("Vespa setup complete.") + return True except Exception: - logger.notice(f"Waiting on Vespa, retrying in {wait_time} seconds...") - time.sleep(wait_time) + logger.notice( + f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." + ) + time.sleep(WAIT_SECONDS) + + logger.error( + f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})" + ) + return False @asynccontextmanager @@ -357,7 +368,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # fill up Postgres connection pools await warm_up_connections() - # We cache this at the beginning so there is no delay in the first telemtry + # We cache this at the beginning so there is no delay in the first telemetry get_or_generate_uuid() with Session(engine) as db_session: @@ -419,13 +430,18 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: if secondary_search_settings else None, ) - setup_vespa( + + success = setup_vespa( document_index, IndexingSetting.from_db_model(search_settings), IndexingSetting.from_db_model(secondary_search_settings) if secondary_search_settings else None, ) + if not success: + raise RuntimeError( + "Could not connect to Vespa within the specified timeout." + ) logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") if search_settings.provider_type is None: diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index 117205761fe..6dcec724345 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -352,8 +352,8 @@ def warm_up_retry( return func(*args, **kwargs) except Exception as e: exceptions.append(e) - logger.exception( - f"Attempt {attempt + 1} failed; retrying in {delay} seconds..." + logger.info( + f"Attempt {attempt + 1}/{tries} failed; retrying in {delay} seconds..." ) time.sleep(delay) raise Exception(f"All retries failed: {exceptions}") diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 0b13b96501f..a13ec184b45 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -131,11 +131,13 @@ def reset_vespa() -> None: search_settings = get_current_search_settings(db_session) index_name = search_settings.index_name - setup_vespa( + success = setup_vespa( document_index=VespaIndex(index_name=index_name, secondary_index_name=None), index_setting=IndexingSetting.from_db_model(search_settings), secondary_index_setting=None, ) + if not success: + raise RuntimeError("Could not connect to Vespa within the specified timeout.") for _ in range(5): try: