mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-13 09:30:53 +02:00
try more efficient query (#4047)
This commit is contained in:
parent
c4b8519381
commit
4958a5355d
@ -0,0 +1,27 @@
|
|||||||
|
"""Add composite index for last_modified and last_synced to document
|
||||||
|
|
||||||
|
Revision ID: f13db29f3101
|
||||||
|
Revises: b388730a2899
|
||||||
|
Create Date: 2025-02-18 22:48:11.511389
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "f13db29f3101"
|
||||||
|
down_revision = "acaab4ef4507"
|
||||||
|
branch_labels: str | None = None
|
||||||
|
depends_on: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_index(
|
||||||
|
"ix_document_sync_status",
|
||||||
|
"document",
|
||||||
|
["last_modified", "last_synced"],
|
||||||
|
unique=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_document_sync_status", table_name="document")
|
@ -60,9 +60,8 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
|||||||
This function executes the query and returns the count of
|
This function executes the query and returns the count of
|
||||||
documents matching the criteria."""
|
documents matching the criteria."""
|
||||||
|
|
||||||
count = (
|
return (
|
||||||
session.query(func.count(DbDocument.id.distinct()))
|
session.query(DbDocument.id)
|
||||||
.select_from(DbDocument)
|
|
||||||
.join(
|
.join(
|
||||||
DocumentByConnectorCredentialPair,
|
DocumentByConnectorCredentialPair,
|
||||||
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||||
@ -73,62 +72,52 @@ def count_documents_by_needs_sync(session: Session) -> int:
|
|||||||
DbDocument.last_synced.is_(None),
|
DbDocument.last_synced.is_(None),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
.scalar()
|
.count()
|
||||||
)
|
)
|
||||||
|
|
||||||
return count
|
|
||||||
|
|
||||||
|
|
||||||
def construct_document_select_for_connector_credential_pair_by_needs_sync(
|
def construct_document_select_for_connector_credential_pair_by_needs_sync(
|
||||||
connector_id: int, credential_id: int
|
connector_id: int, credential_id: int
|
||||||
) -> Select:
|
) -> Select:
|
||||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
return (
|
||||||
|
select(DbDocument)
|
||||||
|
.join(
|
||||||
|
DocumentByConnectorCredentialPair,
|
||||||
|
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||||
|
)
|
||||||
|
.where(
|
||||||
and_(
|
and_(
|
||||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
stmt = (
|
|
||||||
select(DbDocument)
|
|
||||||
.where(
|
|
||||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
|
||||||
or_(
|
or_(
|
||||||
DbDocument.last_modified
|
DbDocument.last_modified > DbDocument.last_synced,
|
||||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
DbDocument.last_synced.is_(None),
|
||||||
DbDocument.last_synced.is_(None), # never synced
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.distinct()
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
return stmt
|
|
||||||
|
|
||||||
|
|
||||||
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
|
def construct_document_id_select_for_connector_credential_pair_by_needs_sync(
|
||||||
connector_id: int, credential_id: int
|
connector_id: int, credential_id: int
|
||||||
) -> Select:
|
) -> Select:
|
||||||
initial_doc_ids_stmt = select(DocumentByConnectorCredentialPair.id).where(
|
return (
|
||||||
|
select(DbDocument.id)
|
||||||
|
.join(
|
||||||
|
DocumentByConnectorCredentialPair,
|
||||||
|
DbDocument.id == DocumentByConnectorCredentialPair.id,
|
||||||
|
)
|
||||||
|
.where(
|
||||||
and_(
|
and_(
|
||||||
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
DocumentByConnectorCredentialPair.connector_id == connector_id,
|
||||||
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
DocumentByConnectorCredentialPair.credential_id == credential_id,
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
stmt = (
|
|
||||||
select(DbDocument.id)
|
|
||||||
.where(
|
|
||||||
DbDocument.id.in_(initial_doc_ids_stmt),
|
|
||||||
or_(
|
or_(
|
||||||
DbDocument.last_modified
|
DbDocument.last_modified > DbDocument.last_synced,
|
||||||
> DbDocument.last_synced, # last_modified is newer than last_synced
|
DbDocument.last_synced.is_(None),
|
||||||
DbDocument.last_synced.is_(None), # never synced
|
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
.distinct()
|
|
||||||
)
|
)
|
||||||
|
)
|
||||||
return stmt
|
|
||||||
|
|
||||||
|
|
||||||
def get_all_documents_needing_vespa_sync_for_cc_pair(
|
def get_all_documents_needing_vespa_sync_for_cc_pair(
|
||||||
|
@ -570,6 +570,14 @@ class Document(Base):
|
|||||||
back_populates="documents",
|
back_populates="documents",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
__table_args__ = (
|
||||||
|
Index(
|
||||||
|
"ix_document_sync_status",
|
||||||
|
last_modified,
|
||||||
|
last_synced,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class Tag(Base):
|
class Tag(Base):
|
||||||
__tablename__ = "tag"
|
__tablename__ = "tag"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user