mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-06-21 21:41:03 +02:00
Make docs indexed cnt more accurate (#579)
This commit is contained in:
parent
bb9a18b22c
commit
37e9ccf864
@ -0,0 +1,35 @@
|
|||||||
|
"""Add index for getting documents just by connector id / credential id
|
||||||
|
|
||||||
|
Revision ID: 7f99be1cb9f5
|
||||||
|
Revises: 78dbe7e38469
|
||||||
|
Create Date: 2023-10-15 22:48:15.487762
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = "7f99be1cb9f5"
|
||||||
|
down_revision = "78dbe7e38469"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.create_index(
|
||||||
|
op.f(
|
||||||
|
"ix_document_by_connector_credential_pair_pkey__connector_id__credential_id"
|
||||||
|
),
|
||||||
|
"document_by_connector_credential_pair",
|
||||||
|
["connector_id", "credential_id"],
|
||||||
|
unique=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index(
|
||||||
|
op.f(
|
||||||
|
"ix_document_by_connector_credential_pair_pkey__connector_id__credential_id"
|
||||||
|
),
|
||||||
|
table_name="document_by_connector_credential_pair",
|
||||||
|
)
|
@ -5,6 +5,7 @@ from uuid import UUID
|
|||||||
from sqlalchemy import and_
|
from sqlalchemy import and_
|
||||||
from sqlalchemy import delete
|
from sqlalchemy import delete
|
||||||
from sqlalchemy import func
|
from sqlalchemy import func
|
||||||
|
from sqlalchemy import or_
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.dialects.postgresql import insert
|
from sqlalchemy.dialects.postgresql import insert
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
@ -53,6 +54,37 @@ def get_document_connector_cnts(
|
|||||||
return db_session.execute(stmt).all() # type: ignore
|
return db_session.execute(stmt).all() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def get_document_cnts_for_cc_pairs(
|
||||||
|
db_session: Session, cc_pair_identifiers: list[ConnectorCredentialPairIdentifier]
|
||||||
|
) -> Sequence[tuple[int, int, int]]:
|
||||||
|
stmt = (
|
||||||
|
select(
|
||||||
|
DocumentByConnectorCredentialPair.connector_id,
|
||||||
|
DocumentByConnectorCredentialPair.credential_id,
|
||||||
|
func.count(),
|
||||||
|
)
|
||||||
|
.where(
|
||||||
|
or_(
|
||||||
|
*[
|
||||||
|
and_(
|
||||||
|
DocumentByConnectorCredentialPair.connector_id
|
||||||
|
== cc_pair_identifier.connector_id,
|
||||||
|
DocumentByConnectorCredentialPair.credential_id
|
||||||
|
== cc_pair_identifier.credential_id,
|
||||||
|
)
|
||||||
|
for cc_pair_identifier in cc_pair_identifiers
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.group_by(
|
||||||
|
DocumentByConnectorCredentialPair.connector_id,
|
||||||
|
DocumentByConnectorCredentialPair.credential_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return db_session.execute(stmt).all() # type: ignore
|
||||||
|
|
||||||
|
|
||||||
def get_acccess_info_for_documents(
|
def get_acccess_info_for_documents(
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
document_ids: list[str],
|
document_ids: list[str],
|
||||||
|
@ -50,6 +50,7 @@ from danswer.db.credentials import create_credential
|
|||||||
from danswer.db.credentials import delete_google_drive_service_account_credentials
|
from danswer.db.credentials import delete_google_drive_service_account_credentials
|
||||||
from danswer.db.credentials import fetch_credential_by_id
|
from danswer.db.credentials import fetch_credential_by_id
|
||||||
from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
|
from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
|
||||||
|
from danswer.db.document import get_document_cnts_for_cc_pairs
|
||||||
from danswer.db.engine import get_session
|
from danswer.db.engine import get_session
|
||||||
from danswer.db.feedback import fetch_docs_ranked_by_boost
|
from danswer.db.feedback import fetch_docs_ranked_by_boost
|
||||||
from danswer.db.feedback import update_document_boost
|
from danswer.db.feedback import update_document_boost
|
||||||
@ -294,20 +295,31 @@ def get_connector_indexing_status(
|
|||||||
|
|
||||||
# TODO: make this one query
|
# TODO: make this one query
|
||||||
cc_pairs = get_connector_credential_pairs(db_session)
|
cc_pairs = get_connector_credential_pairs(db_session)
|
||||||
latest_index_attempts = get_latest_index_attempts(
|
cc_pair_identifiers = [
|
||||||
db_session=db_session,
|
|
||||||
connector_credential_pair_identifiers=[
|
|
||||||
ConnectorCredentialPairIdentifier(
|
ConnectorCredentialPairIdentifier(
|
||||||
connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id
|
connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id
|
||||||
)
|
)
|
||||||
for cc_pair in cc_pairs
|
for cc_pair in cc_pairs
|
||||||
],
|
]
|
||||||
|
|
||||||
|
latest_index_attempts = get_latest_index_attempts(
|
||||||
|
db_session=db_session,
|
||||||
|
connector_credential_pair_identifiers=cc_pair_identifiers,
|
||||||
)
|
)
|
||||||
cc_pair_to_latest_index_attempt = {
|
cc_pair_to_latest_index_attempt = {
|
||||||
(index_attempt.connector_id, index_attempt.credential_id): index_attempt
|
(index_attempt.connector_id, index_attempt.credential_id): index_attempt
|
||||||
for index_attempt in latest_index_attempts
|
for index_attempt in latest_index_attempts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
document_count_info = get_document_cnts_for_cc_pairs(
|
||||||
|
db_session=db_session,
|
||||||
|
cc_pair_identifiers=cc_pair_identifiers,
|
||||||
|
)
|
||||||
|
cc_pair_to_document_cnt = {
|
||||||
|
(connector_id, credential_id): cnt
|
||||||
|
for connector_id, credential_id, cnt in document_count_info
|
||||||
|
}
|
||||||
|
|
||||||
for cc_pair in cc_pairs:
|
for cc_pair in cc_pairs:
|
||||||
connector = cc_pair.connector
|
connector = cc_pair.connector
|
||||||
credential = cc_pair.credential
|
credential = cc_pair.credential
|
||||||
@ -324,7 +336,9 @@ def get_connector_indexing_status(
|
|||||||
owner=credential.user.email if credential.user else "",
|
owner=credential.user.email if credential.user else "",
|
||||||
last_status=cc_pair.last_attempt_status,
|
last_status=cc_pair.last_attempt_status,
|
||||||
last_success=cc_pair.last_successful_index_time,
|
last_success=cc_pair.last_successful_index_time,
|
||||||
docs_indexed=cc_pair.total_docs_indexed,
|
docs_indexed=cc_pair_to_document_cnt.get(
|
||||||
|
(connector.id, credential.id), 0
|
||||||
|
),
|
||||||
error_msg=latest_index_attempt.error_msg
|
error_msg=latest_index_attempt.error_msg
|
||||||
if latest_index_attempt
|
if latest_index_attempt
|
||||||
else None,
|
else None,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user