Make doc count query more efficient (#3461)

This commit is contained in:
pablonyx
2024-12-14 16:26:36 -08:00
committed by GitHub
parent 47735e2044
commit 0c3dab8e8d
4 changed files with 51 additions and 12 deletions

View File

@@ -0,0 +1,32 @@
"""Add composite index to document_by_connector_credential_pair
Revision ID: dab04867cd88
Revises: 54a74a0417fc
Create Date: 2024-12-13 22:43:20.119990
"""
from alembic import op
# revision identifiers, used by Alembic.
revision = "dab04867cd88"
down_revision = "54a74a0417fc"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Composite index on (connector_id, credential_id)
op.create_index(
"idx_document_cc_pair_connector_credential",
"document_by_connector_credential_pair",
["connector_id", "credential_id"],
unique=False,
)
def downgrade() -> None:
op.drop_index(
"idx_document_cc_pair_connector_credential",
table_name="document_by_connector_credential_pair",
)

View File

@@ -12,6 +12,7 @@ from sqlalchemy import func
from sqlalchemy import or_
from sqlalchemy import Select
from sqlalchemy import select
from sqlalchemy import tuple_
from sqlalchemy.dialects.postgresql import insert
from sqlalchemy.engine.util import TransactionalContext
from sqlalchemy.exc import OperationalError
@@ -210,6 +211,10 @@ def get_document_counts_for_cc_pairs(
db_session: Session, cc_pair_identifiers: list[ConnectorCredentialPairIdentifier]
) -> Sequence[tuple[int, int, int]]:
"""Returns a sequence of tuples of (connector_id, credential_id, document count)"""
# Prepare a list of (connector_id, credential_id) tuples
cc_ids = [(x.connector_id, x.credential_id) for x in cc_pair_identifiers]
stmt = (
select(
DocumentByConnectorCredentialPair.connector_id,
@@ -217,17 +222,10 @@ def get_document_counts_for_cc_pairs(
func.count(),
)
.where(
or_(
*[
and_(
DocumentByConnectorCredentialPair.connector_id
== cc_pair_identifier.connector_id,
DocumentByConnectorCredentialPair.credential_id
== cc_pair_identifier.credential_id,
)
for cc_pair_identifier in cc_pair_identifiers
]
)
tuple_(
DocumentByConnectorCredentialPair.connector_id,
DocumentByConnectorCredentialPair.credential_id,
).in_(cc_ids)
)
.group_by(
DocumentByConnectorCredentialPair.connector_id,

View File

@@ -865,6 +865,15 @@ class DocumentByConnectorCredentialPair(Base):
"Credential", back_populates="documents_by_credential"
)
__table_args__ = (
Index(
"idx_document_cc_pair_connector_credential",
"connector_id",
"credential_id",
unique=False,
),
)
"""
Messages Tables

View File

@@ -239,9 +239,9 @@ def get_application() -> FastAPI:
include_router_with_global_prefix_prepended(application, chat_router)
include_router_with_global_prefix_prepended(application, query_router)
include_router_with_global_prefix_prepended(application, document_router)
include_router_with_global_prefix_prepended(application, user_router)
include_router_with_global_prefix_prepended(application, admin_query_router)
include_router_with_global_prefix_prepended(application, admin_router)
include_router_with_global_prefix_prepended(application, user_router)
include_router_with_global_prefix_prepended(application, connector_router)
include_router_with_global_prefix_prepended(application, credential_router)
include_router_with_global_prefix_prepended(application, cc_pair_router)