From af647959f6dbaa5c410d442f84c0c3942fd55b8b Mon Sep 17 00:00:00 2001
From: Chris Weaver <25087905+Weves@users.noreply.github.com>
Date: Mon, 19 Aug 2024 11:07:00 -0700
Subject: [PATCH] Performance Improvements (#2162)
---
...9164_chosen_assistants_changed_to_jsonb.py | 65 ++++
backend/danswer/auth/users.py | 4 +-
backend/danswer/configs/app_configs.py | 3 +
backend/danswer/db/chat.py | 4 +
backend/danswer/db/engine.py | 34 ++-
backend/danswer/db/index_attempt.py | 24 +-
backend/danswer/db/models.py | 2 +-
backend/danswer/db/persona.py | 13 +-
backend/danswer/server/documents/connector.py | 1 -
.../danswer/server/features/persona/api.py | 6 +-
.../docker_compose/docker-compose.dev.yml | 4 +-
web/src/app/assistants/gallery/page.tsx | 43 +--
.../assistants/mine/WrappedInputPrompts.tsx | 1 -
web/src/app/assistants/mine/page.tsx | 41 +--
web/src/app/chat/ChatPage.tsx | 6 +-
.../chat/modal/configuration/FiltersTab.tsx | 281 ------------------
web/src/app/chat/page.tsx | 3 -
.../chat/sessionSidebar/HistorySidebar.tsx | 10 +-
web/src/app/layout.tsx | 28 +-
web/src/app/search/page.tsx | 2 +-
web/src/components/settings/lib.ts | 18 +-
web/src/lib/chat/fetchSomeChatData.ts | 236 +++++++++++++++
22 files changed, 426 insertions(+), 403 deletions(-)
create mode 100644 backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
delete mode 100644 web/src/app/chat/modal/configuration/FiltersTab.tsx
create mode 100644 web/src/lib/chat/fetchSomeChatData.ts
diff --git a/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
new file mode 100644
index 000000000000..e94ab75fb178
--- /dev/null
+++ b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py
@@ -0,0 +1,65 @@
+"""chosen_assistants changed to jsonb
+
+Revision ID: da4c21c69164
+Revises: c5b692fa265c
+Create Date: 2024-08-18 19:06:47.291491
+
+"""
+import json
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "da4c21c69164"
+down_revision = "c5b692fa265c"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+ conn = op.get_bind()
+ existing_ids_and_chosen_assistants = conn.execute(
+ sa.text("select id, chosen_assistants from public.user")
+ )
+ op.drop_column(
+ "user",
+ "chosen_assistants",
+ )
+ op.add_column(
+ "user",
+ sa.Column(
+ "chosen_assistants",
+ postgresql.JSONB(astext_type=sa.Text()),
+ nullable=True,
+ ),
+ )
+ for id, chosen_assistants in existing_ids_and_chosen_assistants:
+ conn.execute(
+ sa.text(
+ "update public.user set chosen_assistants = :chosen_assistants where id = :id"
+ ),
+ {"chosen_assistants": json.dumps(chosen_assistants), "id": id},
+ )
+
+
+def downgrade() -> None:
+ conn = op.get_bind()
+ existing_ids_and_chosen_assistants = conn.execute(
+ sa.text("select id, chosen_assistants from public.user")
+ )
+ op.drop_column(
+ "user",
+ "chosen_assistants",
+ )
+ op.add_column(
+ "user",
+ sa.Column("chosen_assistants", postgresql.ARRAY(sa.Integer()), nullable=True),
+ )
+ for id, chosen_assistants in existing_ids_and_chosen_assistants:
+ conn.execute(
+ sa.text(
+ "update public.user set chosen_assistants = :chosen_assistants where id = :id"
+ ),
+ {"chosen_assistants": chosen_assistants, "id": id},
+ )
diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py
index ce3b8e88f359..76b4ca812c95 100644
--- a/backend/danswer/auth/users.py
+++ b/backend/danswer/auth/users.py
@@ -59,9 +59,7 @@ from danswer.db.users import get_user_by_email
from danswer.utils.logger import setup_logger
from danswer.utils.telemetry import optional_telemetry
from danswer.utils.telemetry import RecordType
-from danswer.utils.variable_functionality import (
- fetch_versioned_implementation,
-)
+from danswer.utils.variable_functionality import fetch_versioned_implementation
logger = setup_logger()
diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py
index d89c39ca8cce..cccb81c9aa22 100644
--- a/backend/danswer/configs/app_configs.py
+++ b/backend/danswer/configs/app_configs.py
@@ -326,6 +326,9 @@ LOG_VESPA_TIMING_INFORMATION = (
)
LOG_ENDPOINT_LATENCY = os.environ.get("LOG_ENDPOINT_LATENCY", "").lower() == "true"
LOG_POSTGRES_LATENCY = os.environ.get("LOG_POSTGRES_LATENCY", "").lower() == "true"
+LOG_POSTGRES_CONN_COUNTS = (
+ os.environ.get("LOG_POSTGRES_CONN_COUNTS", "").lower() == "true"
+)
# Anonymous usage telemetry
DISABLE_TELEMETRY = os.environ.get("DISABLE_TELEMETRY", "").lower() == "true"
diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py
index 301c481033d5..06ece1e922f3 100644
--- a/backend/danswer/db/chat.py
+++ b/backend/danswer/db/chat.py
@@ -117,6 +117,7 @@ def get_chat_sessions_by_user(
deleted: bool | None,
db_session: Session,
only_one_shot: bool = False,
+ limit: int = 50,
) -> list[ChatSession]:
stmt = select(ChatSession).where(ChatSession.user_id == user_id)
@@ -130,6 +131,9 @@ def get_chat_sessions_by_user(
if deleted is not None:
stmt = stmt.where(ChatSession.deleted == deleted)
+ if limit:
+ stmt = stmt.limit(limit)
+
result = db_session.execute(stmt)
chat_sessions = result.scalars().all()
diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py
index 6268018901cc..94b5d0123ccc 100644
--- a/backend/danswer/db/engine.py
+++ b/backend/danswer/db/engine.py
@@ -15,6 +15,7 @@ from sqlalchemy.ext.asyncio import create_async_engine
from sqlalchemy.orm import Session
from sqlalchemy.orm import sessionmaker
+from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS
from danswer.configs.app_configs import LOG_POSTGRES_LATENCY
from danswer.configs.app_configs import POSTGRES_DB
from danswer.configs.app_configs import POSTGRES_HOST
@@ -65,6 +66,37 @@ if LOG_POSTGRES_LATENCY:
)
+if LOG_POSTGRES_CONN_COUNTS:
+ # Global counter for connection checkouts and checkins
+ checkout_count = 0
+ checkin_count = 0
+
+ @event.listens_for(Engine, "checkout")
+ def log_checkout(dbapi_connection, connection_record, connection_proxy): # type: ignore
+ global checkout_count
+ checkout_count += 1
+
+ active_connections = connection_proxy._pool.checkedout()
+ idle_connections = connection_proxy._pool.checkedin()
+ pool_size = connection_proxy._pool.size()
+ logger.debug(
+ "Connection Checkout\n"
+ f"Active Connections: {active_connections};\n"
+ f"Idle: {idle_connections};\n"
+ f"Pool Size: {pool_size};\n"
+ f"Total connection checkouts: {checkout_count}"
+ )
+
+ @event.listens_for(Engine, "checkin")
+ def log_checkin(dbapi_connection, connection_record): # type: ignore
+ global checkin_count
+ checkin_count += 1
+ logger.debug(f"Total connection checkins: {checkin_count}")
+
+
+"""END DEBUGGING LOGGING"""
+
+
def get_db_current_time(db_session: Session) -> datetime:
"""Get the current time from Postgres representing the start of the transaction
Within the same transaction this value will not update
@@ -152,7 +184,7 @@ async def get_async_session() -> AsyncGenerator[AsyncSession, None]:
async def warm_up_connections(
- sync_connections_to_warm_up: int = 10, async_connections_to_warm_up: int = 10
+ sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20
) -> None:
sync_postgres_engine = get_sqlalchemy_engine()
connections = [
diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py
index a87f8f45f1ab..3d8668427b41 100644
--- a/backend/danswer/db/index_attempt.py
+++ b/backend/danswer/db/index_attempt.py
@@ -1,11 +1,9 @@
from collections.abc import Sequence
from sqlalchemy import and_
-from sqlalchemy import ColumnElement
from sqlalchemy import delete
from sqlalchemy import desc
from sqlalchemy import func
-from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import update
from sqlalchemy.orm import joinedload
@@ -184,13 +182,12 @@ def get_last_attempt(
def get_latest_index_attempts(
- connector_credential_pair_identifiers: list[ConnectorCredentialPairIdentifier],
secondary_index: bool,
db_session: Session,
) -> Sequence[IndexAttempt]:
ids_stmt = select(
IndexAttempt.connector_credential_pair_id,
- func.max(IndexAttempt.time_created).label("max_time_created"),
+ func.max(IndexAttempt.id).label("max_id"),
).join(EmbeddingModel, IndexAttempt.embedding_model_id == EmbeddingModel.id)
if secondary_index:
@@ -198,23 +195,6 @@ def get_latest_index_attempts(
else:
ids_stmt = ids_stmt.where(EmbeddingModel.status == IndexModelStatus.PRESENT)
- where_stmts: list[ColumnElement] = []
- for connector_credential_pair_identifier in connector_credential_pair_identifiers:
- where_stmts.append(
- IndexAttempt.connector_credential_pair_id
- == (
- select(ConnectorCredentialPair.id)
- .where(
- ConnectorCredentialPair.connector_id
- == connector_credential_pair_identifier.connector_id,
- ConnectorCredentialPair.credential_id
- == connector_credential_pair_identifier.credential_id,
- )
- .scalar_subquery()
- )
- )
- if where_stmts:
- ids_stmt = ids_stmt.where(or_(*where_stmts))
ids_stmt = ids_stmt.group_by(IndexAttempt.connector_credential_pair_id)
ids_subquery = ids_stmt.subquery()
@@ -225,7 +205,7 @@ def get_latest_index_attempts(
IndexAttempt.connector_credential_pair_id
== ids_subquery.c.connector_credential_pair_id,
)
- .where(IndexAttempt.time_created == ids_subquery.c.max_time_created)
+ .where(IndexAttempt.id == ids_subquery.c.max_id)
)
return db_session.execute(stmt).scalars().all()
diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py
index cd8f1721c3b3..c92b6c3c153b 100644
--- a/backend/danswer/db/models.py
+++ b/backend/danswer/db/models.py
@@ -120,7 +120,7 @@ class User(SQLAlchemyBaseUserTableUUID, Base):
# if specified, controls the assistants that are shown to the user + their order
# if not specified, all assistants are shown
chosen_assistants: Mapped[list[int]] = mapped_column(
- postgresql.ARRAY(Integer), nullable=True
+ postgresql.JSONB(), nullable=True
)
oidc_expiry: Mapped[datetime.datetime] = mapped_column(
diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py
index 067ae477ae68..8c25b27b961f 100644
--- a/backend/danswer/db/persona.py
+++ b/backend/danswer/db/persona.py
@@ -9,6 +9,7 @@ from sqlalchemy import not_
from sqlalchemy import or_
from sqlalchemy import select
from sqlalchemy import update
+from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
from sqlalchemy.orm import Session
@@ -169,6 +170,7 @@ def get_personas(
include_default: bool = True,
include_slack_bot_personas: bool = False,
include_deleted: bool = False,
+ joinedload_all: bool = False,
) -> Sequence[Persona]:
stmt = select(Persona).distinct()
if user_id is not None:
@@ -200,7 +202,16 @@ def get_personas(
if not include_deleted:
stmt = stmt.where(Persona.deleted.is_(False))
- return db_session.scalars(stmt).all()
+ if joinedload_all:
+ stmt = stmt.options(
+ joinedload(Persona.prompts),
+ joinedload(Persona.tools),
+ joinedload(Persona.document_sets),
+ joinedload(Persona.groups),
+ joinedload(Persona.users),
+ )
+
+ return db_session.execute(stmt).unique().scalars().all()
def mark_persona_as_deleted(
diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py
index e78b2ebb85ca..abc9de1f9dd6 100644
--- a/backend/danswer/server/documents/connector.py
+++ b/backend/danswer/server/documents/connector.py
@@ -387,7 +387,6 @@ def get_connector_indexing_status(
]
latest_index_attempts = get_latest_index_attempts(
- connector_credential_pair_identifiers=cc_pair_identifiers,
secondary_index=secondary_index,
db_session=db_session,
)
diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py
index cf2c0e26174d..2ea68f5812c4 100644
--- a/backend/danswer/server/features/persona/api.py
+++ b/backend/danswer/server/features/persona/api.py
@@ -79,6 +79,7 @@ def list_personas_admin(
db_session=db_session,
user_id=None, # user_id = None -> give back all personas
include_deleted=include_deleted,
+ joinedload_all=True,
)
]
@@ -190,7 +191,10 @@ def list_personas(
return [
PersonaSnapshot.from_model(persona)
for persona in get_personas(
- user_id=user_id, include_deleted=include_deleted, db_session=db_session
+ user_id=user_id,
+ include_deleted=include_deleted,
+ db_session=db_session,
+ joinedload_all=True,
)
]
diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml
index 0bbe15b8d180..ea5e8e1e5d5b 100644
--- a/deployment/docker_compose/docker-compose.dev.yml
+++ b/deployment/docker_compose/docker-compose.dev.yml
@@ -88,7 +88,9 @@ services:
# (time spent on finding the right docs + time spent fetching summaries from disk)
- LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-}
- LOG_ENDPOINT_LATENCY=${LOG_ENDPOINT_LATENCY:-}
-
+ - LOG_POSTGRES_LATENCY=${LOG_POSTGRES_LATENCY:-}
+ - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-}
+
# Enterprise Edition only
- ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false}
- API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-}
diff --git a/web/src/app/assistants/gallery/page.tsx b/web/src/app/assistants/gallery/page.tsx
index 6ac8d61c7328..e955eb0117e4 100644
--- a/web/src/app/assistants/gallery/page.tsx
+++ b/web/src/app/assistants/gallery/page.tsx
@@ -1,7 +1,4 @@
-import { HistorySidebar } from "@/app/chat/sessionSidebar/HistorySidebar";
import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh";
-import { UserDropdown } from "@/components/UserDropdown";
-import { ChatProvider } from "@/components/context/ChatContext";
import { WelcomeModal } from "@/components/initialSetup/welcome/WelcomeModalWrapper";
import { fetchChatData } from "@/lib/chat/fetchChatData";
import { unstable_noStore as noStore } from "next/cache";
@@ -24,47 +21,27 @@ export default async function GalleryPage({
const {
user,
chatSessions,
- availableSources,
- documentSets,
assistants,
- tags,
- llmProviders,
folders,
openedFolders,
shouldShowWelcomeModal,
toggleSidebar,
- userInputPrompts,
} = data;
return (
<>
-
- {tag.tag_key}={tag.tag_value} -
{" "} -No selected tags
- )} -