mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-10 21:09:51 +02:00
Scripts to Reset Postgres and Vespa (#382)
This commit is contained in:
parent
493648d28b
commit
d73d81c867
@ -1,10 +1,16 @@
|
||||
# This file is purely for development use, not included in any builds
|
||||
import requests
|
||||
from qdrant_client.http.models import Distance
|
||||
from qdrant_client.http.models import VectorParams
|
||||
from typesense.exceptions import ObjectNotFound # type: ignore
|
||||
|
||||
from danswer.configs.app_configs import DOCUMENT_INDEX_NAME
|
||||
from danswer.configs.model_configs import DOC_EMBEDDING_DIM
|
||||
from danswer.datastores.document_index import get_default_document_index
|
||||
from danswer.datastores.document_index import SplitDocumentIndex
|
||||
from danswer.datastores.typesense.store import create_typesense_collection
|
||||
from danswer.datastores.vespa.store import DOCUMENT_ID_ENDPOINT
|
||||
from danswer.datastores.vespa.store import VespaIndex
|
||||
from danswer.utils.clients import get_qdrant_client
|
||||
from danswer.utils.clients import get_typesense_client
|
||||
from danswer.utils.logger import setup_logger
|
||||
@ -35,6 +41,16 @@ def recreate_typesense_collection(collection_name: str) -> None:
|
||||
create_typesense_collection(collection_name)
|
||||
|
||||
|
||||
def wipe_vespa_index() -> None:
|
||||
params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME}
|
||||
response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params)
|
||||
response.raise_for_status()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
recreate_qdrant_collection("danswer_index")
|
||||
recreate_typesense_collection("danswer_index")
|
||||
document_index = get_default_document_index()
|
||||
if isinstance(document_index, SplitDocumentIndex):
|
||||
recreate_qdrant_collection("danswer_index")
|
||||
recreate_typesense_collection("danswer_index")
|
||||
elif isinstance(document_index, VespaIndex):
|
||||
wipe_vespa_index()
|
||||
|
@ -5,6 +5,7 @@ from danswer.configs.app_configs import POSTGRES_HOST
|
||||
from danswer.configs.app_configs import POSTGRES_PASSWORD
|
||||
from danswer.configs.app_configs import POSTGRES_PORT
|
||||
from danswer.configs.app_configs import POSTGRES_USER
|
||||
from danswer.db.credentials import create_initial_public_credential
|
||||
|
||||
|
||||
def wipe_all_rows(database: str) -> None:
|
||||
@ -15,38 +16,46 @@ def wipe_all_rows(database: str) -> None:
|
||||
host=POSTGRES_HOST,
|
||||
port=POSTGRES_PORT,
|
||||
)
|
||||
|
||||
cur = conn.cursor()
|
||||
|
||||
# Disable triggers to prevent foreign key constraints from being checked
|
||||
cur.execute("SET session_replication_role = 'replica';")
|
||||
|
||||
# Fetch all table names in the current database
|
||||
cur.execute(
|
||||
"""
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = 'public'
|
||||
AND table_type = 'BASE TABLE'
|
||||
"""
|
||||
SELECT tablename
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'public'
|
||||
"""
|
||||
)
|
||||
|
||||
table_names = cur.fetchall()
|
||||
tables = cur.fetchall()
|
||||
|
||||
# have to delete from these first to not run into psycopg2.errors.ForeignKeyViolation
|
||||
cur.execute("DELETE FROM chunk")
|
||||
cur.execute("DELETE FROM document_by_connector_credential_pair")
|
||||
cur.execute("DELETE FROM document")
|
||||
cur.execute("DELETE FROM connector_credential_pair")
|
||||
cur.execute("DELETE FROM index_attempt")
|
||||
cur.execute("DELETE FROM credential")
|
||||
conn.commit()
|
||||
for table in tables:
|
||||
table_name = table[0]
|
||||
|
||||
for table_name in table_names:
|
||||
if table_name[0] == "alembic_version":
|
||||
# Don't touch migration history
|
||||
if table_name == "alembic_version":
|
||||
continue
|
||||
cur.execute(f'DELETE FROM "{table_name[0]}"')
|
||||
print(f"Deleted all rows from table {table_name[0]}")
|
||||
conn.commit()
|
||||
|
||||
print(f"Deleting all rows from {table_name}...")
|
||||
cur.execute(f'DELETE FROM "{table_name}"')
|
||||
|
||||
# Re-enable triggers
|
||||
cur.execute("SET session_replication_role = 'origin';")
|
||||
|
||||
conn.commit()
|
||||
cur.close()
|
||||
conn.close()
|
||||
print("Finished wiping all rows.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Cleaning up all Danswer tables")
|
||||
wipe_all_rows(POSTGRES_DB)
|
||||
create_initial_public_credential()
|
||||
print("To keep data consistent, it's best to wipe the document index as well.")
|
||||
print(
|
||||
"To be safe, it's best to restart the Danswer services (API Server and Background Tasks"
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user