Update reset_indexes.py

Error Handling: Add more specific error handling to make it easier to debug issues.
Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT.
Logging: Improve logging to include more details about the operations.
Retry Mechanism: Add a retry mechanism for network requests to handle transient errors.
Testing: Add unit tests for the functions to ensure they work as expected
This commit is contained in:
YASH
2024-10-22 17:37:07 +05:30
committed by GitHub
parent 6e9b6a1075
commit 8f236a1288

View File

@@ -1,9 +1,12 @@
# This file is purely for development use, not included in any builds # This file is purely for development use, not included in any builds
import os import os
import sys import sys
import logging
import requests import requests
from requests.exceptions import RequestException
from time import sleep
# makes it so `PYTHONPATH=.` is not required when running this script # makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir) sys.path.append(parent_dir)
@@ -14,23 +17,48 @@ from danswer.utils.logger import setup_logger # noqa: E402
logger = setup_logger() logger = setup_logger()
def wipe_vespa_index() -> None: def wipe_vespa_index() -> None:
"""
Wipes the Vespa index by deleting all documents.
"""
continuation = None continuation = None
should_continue = True should_continue = True
retries = 3
while should_continue: while should_continue:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
if continuation: if continuation:
params = {**params, "continuation": continuation} params["continuation"] = continuation
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
response_json = response.json() for attempt in range(retries):
print(response_json) try:
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
continuation = response_json.get("continuation") response_json = response.json()
should_continue = bool(continuation) logger.info(f"Response: {response_json}")
continuation = response_json.get("continuation")
should_continue = bool(continuation)
break # Exit the retry loop if the request is successful
except RequestException as e:
logger.error(f"Request failed: {e}")
sleep(2 ** attempt) # Exponential backoff
else:
logger.error("Max retries exceeded. Exiting.")
sys.exit(1)
def main():
"""
Main function to execute the script.
"""
try:
wipe_vespa_index()
logger.info("Vespa index wiped successfully.")
except Exception as e:
logger.error(f"An error occurred: {e}")
sys.exit(1)
if __name__ == "__main__": if __name__ == "__main__":
wipe_vespa_index() main()