Scripts to Reset Postgres and Vespa (#382)

This commit is contained in:
Yuhong Sun 2023-09-01 14:43:04 -07:00 committed by GitHub
parent 493648d28b
commit d73d81c867
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 22 deletions

View File

@ -1,10 +1,16 @@
# This file is purely for development use, not included in any builds
import requests
from qdrant_client.http.models import Distance
from qdrant_client.http.models import VectorParams
from typesense.exceptions import ObjectNotFound # type: ignore
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
from danswer.configs.model_configs import DOC_EMBEDDING_DIM
from danswer.datastores.document_index import get_default_document_index
from danswer.datastores.document_index import SplitDocumentIndex
from danswer.datastores.typesense.store import create_typesense_collection
from danswer.datastores.vespa.store import DOCUMENT_ID_ENDPOINT
from danswer.datastores.vespa.store import VespaIndex
from danswer.utils.clients import get_qdrant_client
from danswer.utils.clients import get_typesense_client
from danswer.utils.logger import setup_logger
@ -35,6 +41,16 @@ def recreate_typesense_collection(collection_name: str) -> None:
create_typesense_collection(collection_name)
def wipe_vespa_index() -> None:
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
response.raise_for_status()
if __name__ == "__main__":
recreate_qdrant_collection("danswer_index")
recreate_typesense_collection("danswer_index")
document_index = get_default_document_index()
if isinstance(document_index, SplitDocumentIndex):
recreate_qdrant_collection("danswer_index")
recreate_typesense_collection("danswer_index")
elif isinstance(document_index, VespaIndex):
wipe_vespa_index()

View File

@ -5,6 +5,7 @@ from danswer.configs.app_configs import POSTGRES_HOST
from danswer.configs.app_configs import POSTGRES_PASSWORD
from danswer.configs.app_configs import POSTGRES_PORT
from danswer.configs.app_configs import POSTGRES_USER
from danswer.db.credentials import create_initial_public_credential
def wipe_all_rows(database: str) -> None:
@ -15,38 +16,46 @@ def wipe_all_rows(database: str) -> None:
host=POSTGRES_HOST,
port=POSTGRES_PORT,
)
cur = conn.cursor()
# Disable triggers to prevent foreign key constraints from being checked
cur.execute("SET session_replication_role = 'replica';")
# Fetch all table names in the current database
cur.execute(
"""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_type = 'BASE TABLE'
"""
SELECT tablename
FROM pg_tables
WHERE schemaname = 'public'
"""
)
table_names = cur.fetchall()
tables = cur.fetchall()
# have to delete from these first to not run into psycopg2.errors.ForeignKeyViolation
cur.execute("DELETE FROM chunk")
cur.execute("DELETE FROM document_by_connector_credential_pair")
cur.execute("DELETE FROM document")
cur.execute("DELETE FROM connector_credential_pair")
cur.execute("DELETE FROM index_attempt")
cur.execute("DELETE FROM credential")
conn.commit()
for table in tables:
table_name = table[0]
for table_name in table_names:
if table_name[0] == "alembic_version":
# Don't touch migration history
if table_name == "alembic_version":
continue
cur.execute(f'DELETE FROM "{table_name[0]}"')
print(f"Deleted all rows from table {table_name[0]}")
conn.commit()
print(f"Deleting all rows from {table_name}...")
cur.execute(f'DELETE FROM "{table_name}"')
# Re-enable triggers
cur.execute("SET session_replication_role = 'origin';")
conn.commit()
cur.close()
conn.close()
print("Finished wiping all rows.")
if __name__ == "__main__":
print("Cleaning up all Danswer tables")
wipe_all_rows(POSTGRES_DB)
create_initial_public_credential()
print("To keep data consistent, it's best to wipe the document index as well.")
print(
"To be safe, it's best to restart the Danswer services (API Server and Background Tasks"
)