mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-29 21:37:21 +02:00
Make doc count query more efficient (#3461)
This commit is contained in:
@@ -0,0 +1,32 @@
|
||||
"""Add composite index to document_by_connector_credential_pair
|
||||
|
||||
Revision ID: dab04867cd88
|
||||
Revises: 54a74a0417fc
|
||||
Create Date: 2024-12-13 22:43:20.119990
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "dab04867cd88"
|
||||
down_revision = "54a74a0417fc"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# Composite index on (connector_id, credential_id)
|
||||
op.create_index(
|
||||
"idx_document_cc_pair_connector_credential",
|
||||
"document_by_connector_credential_pair",
|
||||
["connector_id", "credential_id"],
|
||||
unique=False,
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index(
|
||||
"idx_document_cc_pair_connector_credential",
|
||||
table_name="document_by_connector_credential_pair",
|
||||
)
|
@@ -12,6 +12,7 @@ from sqlalchemy import func
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import Select
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import tuple_
|
||||
from sqlalchemy.dialects.postgresql import insert
|
||||
from sqlalchemy.engine.util import TransactionalContext
|
||||
from sqlalchemy.exc import OperationalError
|
||||
@@ -210,6 +211,10 @@ def get_document_counts_for_cc_pairs(
|
||||
db_session: Session, cc_pair_identifiers: list[ConnectorCredentialPairIdentifier]
|
||||
) -> Sequence[tuple[int, int, int]]:
|
||||
"""Returns a sequence of tuples of (connector_id, credential_id, document count)"""
|
||||
|
||||
# Prepare a list of (connector_id, credential_id) tuples
|
||||
cc_ids = [(x.connector_id, x.credential_id) for x in cc_pair_identifiers]
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
DocumentByConnectorCredentialPair.connector_id,
|
||||
@@ -217,17 +222,10 @@ def get_document_counts_for_cc_pairs(
|
||||
func.count(),
|
||||
)
|
||||
.where(
|
||||
or_(
|
||||
*[
|
||||
and_(
|
||||
DocumentByConnectorCredentialPair.connector_id
|
||||
== cc_pair_identifier.connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id
|
||||
== cc_pair_identifier.credential_id,
|
||||
)
|
||||
for cc_pair_identifier in cc_pair_identifiers
|
||||
]
|
||||
)
|
||||
tuple_(
|
||||
DocumentByConnectorCredentialPair.connector_id,
|
||||
DocumentByConnectorCredentialPair.credential_id,
|
||||
).in_(cc_ids)
|
||||
)
|
||||
.group_by(
|
||||
DocumentByConnectorCredentialPair.connector_id,
|
||||
|
@@ -865,6 +865,15 @@ class DocumentByConnectorCredentialPair(Base):
|
||||
"Credential", back_populates="documents_by_credential"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"idx_document_cc_pair_connector_credential",
|
||||
"connector_id",
|
||||
"credential_id",
|
||||
unique=False,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
"""
|
||||
Messages Tables
|
||||
|
@@ -239,9 +239,9 @@ def get_application() -> FastAPI:
|
||||
include_router_with_global_prefix_prepended(application, chat_router)
|
||||
include_router_with_global_prefix_prepended(application, query_router)
|
||||
include_router_with_global_prefix_prepended(application, document_router)
|
||||
include_router_with_global_prefix_prepended(application, user_router)
|
||||
include_router_with_global_prefix_prepended(application, admin_query_router)
|
||||
include_router_with_global_prefix_prepended(application, admin_router)
|
||||
include_router_with_global_prefix_prepended(application, user_router)
|
||||
include_router_with_global_prefix_prepended(application, connector_router)
|
||||
include_router_with_global_prefix_prepended(application, credential_router)
|
||||
include_router_with_global_prefix_prepended(application, cc_pair_router)
|
||||
|
Reference in New Issue
Block a user