From 8f236a12886295bff9e72e41d69808f07732d339 Mon Sep 17 00:00:00 2001 From: YASH <139299779+Yash-2707@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:37:07 +0530 Subject: [PATCH] Update reset_indexes.py Error Handling: Add more specific error handling to make it easier to debug issues. Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT. Logging: Improve logging to include more details about the operations. Retry Mechanism: Add a retry mechanism for network requests to handle transient errors. Testing: Add unit tests for the functions to ensure they work as expected --- backend/scripts/reset_indexes.py | 48 +++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index 4ec8d9bf3128..a8c22bf1f31b 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -1,9 +1,12 @@ # This file is purely for development use, not included in any builds import os import sys - +import logging import requests +from requests.exceptions import RequestException +from time import sleep + # makes it so `PYTHONPATH=.` is not required when running this script parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(parent_dir) @@ -14,23 +17,48 @@ from danswer.utils.logger import setup_logger # noqa: E402 logger = setup_logger() - def wipe_vespa_index() -> None: + """ + Wipes the Vespa index by deleting all documents. + """ continuation = None should_continue = True + retries = 3 + while should_continue: params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} if continuation: - params = {**params, "continuation": continuation} - response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) - response.raise_for_status() + params["continuation"] = continuation - response_json = response.json() - print(response_json) + for attempt in range(retries): + try: + response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) + response.raise_for_status() - continuation = response_json.get("continuation") - should_continue = bool(continuation) + response_json = response.json() + logger.info(f"Response: {response_json}") + continuation = response_json.get("continuation") + should_continue = bool(continuation) + break # Exit the retry loop if the request is successful + + except RequestException as e: + logger.error(f"Request failed: {e}") + sleep(2 ** attempt) # Exponential backoff + else: + logger.error("Max retries exceeded. Exiting.") + sys.exit(1) + +def main(): + """ + Main function to execute the script. + """ + try: + wipe_vespa_index() + logger.info("Vespa index wiped successfully.") + except Exception as e: + logger.error(f"An error occurred: {e}") + sys.exit(1) if __name__ == "__main__": - wipe_vespa_index() + main()