Update reset_indexes.py

Error Handling: Add more specific error handling to make it easier to debug issues.
Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT.
Logging: Improve logging to include more details about the operations.
Retry Mechanism: Add a retry mechanism for network requests to handle transient errors.
Testing: Add unit tests for the functions to ensure they work as expected
This commit is contained in:
YASH
2024-10-22 17:37:07 +05:30
committed by GitHub
parent 6e9b6a1075
commit 8f236a1288

View File

@@ -1,9 +1,12 @@
# This file is purely for development use, not included in any builds
import os
import sys
import logging
import requests
from requests.exceptions import RequestException
from time import sleep
# makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(parent_dir)
@@ -14,23 +17,48 @@ from danswer.utils.logger import setup_logger # noqa: E402
logger = setup_logger()
def wipe_vespa_index() -> None:
"""
Wipes the Vespa index by deleting all documents.
"""
continuation = None
should_continue = True
retries = 3
while should_continue:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
if continuation:
params = {**params, "continuation": continuation}
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
params["continuation"] = continuation
response_json = response.json()
print(response_json)
for attempt in range(retries):
try:
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
continuation = response_json.get("continuation")
should_continue = bool(continuation)
response_json = response.json()
logger.info(f"Response: {response_json}")
continuation = response_json.get("continuation")
should_continue = bool(continuation)
break # Exit the retry loop if the request is successful
except RequestException as e:
logger.error(f"Request failed: {e}")
sleep(2 ** attempt) # Exponential backoff
else:
logger.error("Max retries exceeded. Exiting.")
sys.exit(1)
def main():
"""
Main function to execute the script.
"""
try:
wipe_vespa_index()
logger.info("Vespa index wiped successfully.")
except Exception as e:
logger.error(f"An error occurred: {e}")
sys.exit(1)
if __name__ == "__main__":
wipe_vespa_index()
main()