mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-18 19:43:26 +02:00
Update reset_indexes.py
Error Handling: Add more specific error handling to make it easier to debug issues. Configuration Management: Use environment variables or a configuration file for settings like DOCUMENT_INDEX_NAME and DOCUMENT_ID_ENDPOINT. Logging: Improve logging to include more details about the operations. Retry Mechanism: Add a retry mechanism for network requests to handle transient errors. Testing: Add unit tests for the functions to ensure they work as expected
This commit is contained in:
@@ -1,9 +1,12 @@
|
|||||||
# This file is purely for development use, not included in any builds
|
# This file is purely for development use, not included in any builds
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import logging
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from requests.exceptions import RequestException
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
# makes it so `PYTHONPATH=.` is not required when running this script
|
# makes it so `PYTHONPATH=.` is not required when running this script
|
||||||
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
sys.path.append(parent_dir)
|
sys.path.append(parent_dir)
|
||||||
@@ -14,23 +17,48 @@ from danswer.utils.logger import setup_logger # noqa: E402
|
|||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
def wipe_vespa_index() -> None:
|
def wipe_vespa_index() -> None:
|
||||||
|
"""
|
||||||
|
Wipes the Vespa index by deleting all documents.
|
||||||
|
"""
|
||||||
continuation = None
|
continuation = None
|
||||||
should_continue = True
|
should_continue = True
|
||||||
|
retries = 3
|
||||||
|
|
||||||
while should_continue:
|
while should_continue:
|
||||||
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
|
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
|
||||||
if continuation:
|
if continuation:
|
||||||
params = {**params, "continuation": continuation}
|
params["continuation"] = continuation
|
||||||
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
response_json = response.json()
|
for attempt in range(retries):
|
||||||
print(response_json)
|
try:
|
||||||
|
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
continuation = response_json.get("continuation")
|
response_json = response.json()
|
||||||
should_continue = bool(continuation)
|
logger.info(f"Response: {response_json}")
|
||||||
|
|
||||||
|
continuation = response_json.get("continuation")
|
||||||
|
should_continue = bool(continuation)
|
||||||
|
break # Exit the retry loop if the request is successful
|
||||||
|
|
||||||
|
except RequestException as e:
|
||||||
|
logger.error(f"Request failed: {e}")
|
||||||
|
sleep(2 ** attempt) # Exponential backoff
|
||||||
|
else:
|
||||||
|
logger.error("Max retries exceeded. Exiting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Main function to execute the script.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
wipe_vespa_index()
|
||||||
|
logger.info("Vespa index wiped successfully.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"An error occurred: {e}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
wipe_vespa_index()
|
main()
|
||||||
|
Reference in New Issue
Block a user