Merge pull request #2921 from danswer-ai/feature/reset_indexes

Feature/reset indexes
This commit is contained in:
hagen-danswer 2024-10-28 06:46:04 -07:00 committed by GitHub
commit 52bd1ad8ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,8 +1,10 @@
# This file is purely for development use, not included in any builds # This file is purely for development use, not included in any builds
import os import os
import sys import sys
from time import sleep
import requests import requests
from requests.exceptions import RequestException
# makes it so `PYTHONPATH=.` is not required when running this script # makes it so `PYTHONPATH=.` is not required when running this script
parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@ -15,22 +17,58 @@ from danswer.utils.logger import setup_logger # noqa: E402
logger = setup_logger() logger = setup_logger()
def wipe_vespa_index() -> None: def wipe_vespa_index() -> bool:
"""
Wipes the Vespa index by deleting all documents.
"""
continuation = None continuation = None
should_continue = True should_continue = True
RETRIES = 3
while should_continue: while should_continue:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
if continuation: if continuation:
params = {**params, "continuation": continuation} params["continuation"] = continuation
for attempt in range(RETRIES):
try:
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status() response.raise_for_status()
response_json = response.json() response_json = response.json()
print(response_json) logger.info(f"Response: {response_json}")
continuation = response_json.get("continuation") continuation = response_json.get("continuation")
should_continue = bool(continuation) should_continue = bool(continuation)
break # Exit the retry loop if the request is successful
except RequestException:
logger.exception("Request failed")
sleep(2**attempt) # Exponential backoff
else:
logger.error(f"Max retries ({RETRIES}) exceeded. Exiting.")
return False
return True
def main() -> int:
"""
Main function to execute the script.
"""
try:
succeeded = wipe_vespa_index()
except Exception:
logger.exception("wipe_vespa_index exceptioned.")
return 1
if not succeeded:
logger.info("Vespa index wipe failed.")
return 0
logger.info("Vespa index wiped successfully.")
return 1
if __name__ == "__main__": if __name__ == "__main__":
wipe_vespa_index() sys.exit(main())