From d73d81c867c0eeafe6ce653a64e8ddf2fc6a6516 Mon Sep 17 00:00:00 2001 From: Yuhong Sun Date: Fri, 1 Sep 2023 14:43:04 -0700 Subject: [PATCH] Scripts to Reset Postgres and Vespa (#382) --- backend/scripts/reset_indexes.py | 20 +++++++++++-- backend/scripts/reset_postgres.py | 49 ++++++++++++++++++------------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index b7f94c593..2158bc18c 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -1,10 +1,16 @@ # This file is purely for development use, not included in any builds +import requests from qdrant_client.http.models import Distance from qdrant_client.http.models import VectorParams from typesense.exceptions import ObjectNotFound # type: ignore +from danswer.configs.app_configs import DOCUMENT_INDEX_NAME from danswer.configs.model_configs import DOC_EMBEDDING_DIM +from danswer.datastores.document_index import get_default_document_index +from danswer.datastores.document_index import SplitDocumentIndex from danswer.datastores.typesense.store import create_typesense_collection +from danswer.datastores.vespa.store import DOCUMENT_ID_ENDPOINT +from danswer.datastores.vespa.store import VespaIndex from danswer.utils.clients import get_qdrant_client from danswer.utils.clients import get_typesense_client from danswer.utils.logger import setup_logger @@ -35,6 +41,16 @@ def recreate_typesense_collection(collection_name: str) -> None: create_typesense_collection(collection_name) +def wipe_vespa_index() -> None: + params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} + response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) + response.raise_for_status() + + if __name__ == "__main__": - recreate_qdrant_collection("danswer_index") - recreate_typesense_collection("danswer_index") + document_index = get_default_document_index() + if isinstance(document_index, SplitDocumentIndex): + recreate_qdrant_collection("danswer_index") + recreate_typesense_collection("danswer_index") + elif isinstance(document_index, VespaIndex): + wipe_vespa_index() diff --git a/backend/scripts/reset_postgres.py b/backend/scripts/reset_postgres.py index f88cf0dd6..59a3fd055 100644 --- a/backend/scripts/reset_postgres.py +++ b/backend/scripts/reset_postgres.py @@ -5,6 +5,7 @@ from danswer.configs.app_configs import POSTGRES_HOST from danswer.configs.app_configs import POSTGRES_PASSWORD from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER +from danswer.db.credentials import create_initial_public_credential def wipe_all_rows(database: str) -> None: @@ -15,38 +16,46 @@ def wipe_all_rows(database: str) -> None: host=POSTGRES_HOST, port=POSTGRES_PORT, ) - cur = conn.cursor() + + # Disable triggers to prevent foreign key constraints from being checked + cur.execute("SET session_replication_role = 'replica';") + + # Fetch all table names in the current database cur.execute( """ - SELECT table_name - FROM information_schema.tables - WHERE table_schema = 'public' - AND table_type = 'BASE TABLE' - """ + SELECT tablename + FROM pg_tables + WHERE schemaname = 'public' + """ ) - table_names = cur.fetchall() + tables = cur.fetchall() - # have to delete from these first to not run into psycopg2.errors.ForeignKeyViolation - cur.execute("DELETE FROM chunk") - cur.execute("DELETE FROM document_by_connector_credential_pair") - cur.execute("DELETE FROM document") - cur.execute("DELETE FROM connector_credential_pair") - cur.execute("DELETE FROM index_attempt") - cur.execute("DELETE FROM credential") - conn.commit() + for table in tables: + table_name = table[0] - for table_name in table_names: - if table_name[0] == "alembic_version": + # Don't touch migration history + if table_name == "alembic_version": continue - cur.execute(f'DELETE FROM "{table_name[0]}"') - print(f"Deleted all rows from table {table_name[0]}") - conn.commit() + print(f"Deleting all rows from {table_name}...") + cur.execute(f'DELETE FROM "{table_name}"') + + # Re-enable triggers + cur.execute("SET session_replication_role = 'origin';") + + conn.commit() cur.close() conn.close() + print("Finished wiping all rows.") if __name__ == "__main__": + print("Cleaning up all Danswer tables") wipe_all_rows(POSTGRES_DB) + create_initial_public_credential() + print("To keep data consistent, it's best to wipe the document index as well.") + print( + "To be safe, it's best to restart the Danswer services (API Server and Background Tasks" + )