Update reset_indexes.py

Error Handling: Add more specific error handling to make it easier to debug issues. Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT. Logging: Improve logging to include more details about the operations. Retry Mechanism: Add a retry mechanism for network requests to handle transient errors. Testing: Add unit tests for the functions to ensure they work as expected
2025-09-18 19:43:26 +02:00 · 2024-10-22 17:37:07 +05:30
parent 6e9b6a1075
commit 8f236a1288
1 changed files with 38 additions and 10 deletions
--- a/backend/scripts/reset_indexes.py
+++ b/backend/scripts/reset_indexes.py
@@ -1,9 +1,12 @@
 # This file is purely for development use, not included in any builds
 import os
 import sys
-
+import logging
 import requests
 from requests.exceptions import RequestException
 from time import sleep
 # makes it so `PYTHONPATH=.` is not required when running this script
 parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(parent_dir)
@@ -14,23 +17,48 @@ from danswer.utils.logger import setup_logger  # noqa: E402
 logger = setup_logger()
 def wipe_vespa_index() -> None:
    """
    Wipes the Vespa index by deleting all documents.
    """
    continuation = None
    should_continue = True
    retries = 3
    while should_continue:
        params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
        if continuation:
-            params = {**params, "continuation": continuation}
+            params["continuation"] = continuation
        response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
        response.raise_for_status()
-        response_json = response.json()
+        for attempt in range(retries):
-        print(response_json)
+            try:
                response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
                response.raise_for_status()
-        continuation = response_json.get("continuation")
+                response_json = response.json()
-        should_continue = bool(continuation)
+                logger.info(f"Response: {response_json}")
                continuation = response_json.get("continuation")
                should_continue = bool(continuation)
                break  # Exit the retry loop if the request is successful
            except RequestException as e:
                logger.error(f"Request failed: {e}")
                sleep(2 ** attempt)  # Exponential backoff
        else:
            logger.error("Max retries exceeded. Exiting.")
            sys.exit(1)
 def main():
    """
    Main function to execute the script.
    """
    try:
        wipe_vespa_index()
        logger.info("Vespa index wiped successfully.")
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        sys.exit(1)
 if __name__ == "__main__":
-    wipe_vespa_index()
+    main()