Files
danswer/backend/danswer/db/index_attempt.py
Chris Weaver 3e8f5fa47e Fix a few bugs with Google Drive polling (#250)
- Adds some offset to the `start` for the Google Drive connector to give time for `modifiedTime` to propagate so we don't miss updates
- Moves fetching folders into a separate call since folder `modifiedTime` doesn't get updated when a file in the folder is updated
- Uses `connector_credential_pair.last_successful_index_time` instead of `updated_at` to determine the `start` for poll connectors
2023-07-28 18:27:32 -07:00

90 lines
2.5 KiB
Python

from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.utils.logger import setup_logger
from sqlalchemy import desc
from sqlalchemy import select
from sqlalchemy.orm import Session
logger = setup_logger()
def create_index_attempt(
connector_id: int,
credential_id: int,
db_session: Session,
) -> int:
new_attempt = IndexAttempt(
connector_id=connector_id,
credential_id=credential_id,
status=IndexingStatus.NOT_STARTED,
)
db_session.add(new_attempt)
db_session.commit()
return new_attempt.id
def get_inprogress_index_attempts(
connector_id: int | None,
db_session: Session,
) -> list[IndexAttempt]:
stmt = select(IndexAttempt)
if connector_id is not None:
stmt = stmt.where(IndexAttempt.connector_id == connector_id)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.IN_PROGRESS)
incomplete_attempts = db_session.scalars(stmt)
return list(incomplete_attempts.all())
def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]:
stmt = select(IndexAttempt)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.NOT_STARTED)
new_attempts = db_session.scalars(stmt)
return list(new_attempts.all())
def mark_attempt_in_progress(
index_attempt: IndexAttempt,
db_session: Session,
) -> None:
index_attempt.status = IndexingStatus.IN_PROGRESS
db_session.add(index_attempt)
db_session.commit()
def mark_attempt_succeeded(
index_attempt: IndexAttempt,
docs_indexed: list[str],
db_session: Session,
) -> None:
index_attempt.status = IndexingStatus.SUCCESS
index_attempt.document_ids = docs_indexed
db_session.add(index_attempt)
db_session.commit()
def mark_attempt_failed(
index_attempt: IndexAttempt, db_session: Session, failure_reason: str = "Unknown"
) -> None:
index_attempt.status = IndexingStatus.FAILED
index_attempt.error_msg = failure_reason
db_session.add(index_attempt)
db_session.commit()
def get_last_successful_attempt(
connector_id: int,
credential_id: int,
db_session: Session,
) -> IndexAttempt | None:
stmt = select(IndexAttempt)
stmt = stmt.where(IndexAttempt.connector_id == connector_id)
stmt = stmt.where(IndexAttempt.credential_id == credential_id)
stmt = stmt.where(IndexAttempt.status == IndexingStatus.SUCCESS)
# Note, the below is using time_created instead of time_updated
stmt = stmt.order_by(desc(IndexAttempt.time_created))
return db_session.execute(stmt).scalars().first()