mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-21 14:12:42 +02:00
Prevent Scheduling Multiple Queued Indexings (#997)
This commit is contained in:
@@ -40,8 +40,13 @@ def _default_end_time(
|
|||||||
|
|
||||||
|
|
||||||
def find_end_time_for_indexing_attempt(
|
def find_end_time_for_indexing_attempt(
|
||||||
last_successful_run: datetime.datetime | None, source_type: DocumentSource
|
last_successful_run: datetime.datetime | None,
|
||||||
|
# source_type can be used to override the default for certain connectors, currently unused
|
||||||
|
source_type: DocumentSource,
|
||||||
) -> datetime.datetime | None:
|
) -> datetime.datetime | None:
|
||||||
|
"""Is the current time unless the connector is run over a large period, in which case it is
|
||||||
|
split up into large time segments that become smaller as it approaches the present
|
||||||
|
"""
|
||||||
# NOTE: source_type can be used to override the default for certain connectors
|
# NOTE: source_type can be used to override the default for certain connectors
|
||||||
end_of_window = _default_end_time(last_successful_run)
|
end_of_window = _default_end_time(last_successful_run)
|
||||||
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
now = datetime.datetime.now(tz=datetime.timezone.utc)
|
||||||
|
@@ -63,7 +63,9 @@ def _should_create_new_indexing(
|
|||||||
if not last_index:
|
if not last_index:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# only one scheduled job per connector at a time
|
# Only one scheduled job per connector at a time
|
||||||
|
# Can schedule another one if the current one is already running however
|
||||||
|
# Because the currently running one will not be until the latest time
|
||||||
if last_index.status == IndexingStatus.NOT_STARTED:
|
if last_index.status == IndexingStatus.NOT_STARTED:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@@ -166,20 +166,24 @@ def get_latest_index_attempts(
|
|||||||
|
|
||||||
|
|
||||||
def get_index_attempts_for_cc_pair(
|
def get_index_attempts_for_cc_pair(
|
||||||
db_session: Session, cc_pair_identifier: ConnectorCredentialPairIdentifier
|
db_session: Session,
|
||||||
|
cc_pair_identifier: ConnectorCredentialPairIdentifier,
|
||||||
|
disinclude_finished: bool = False,
|
||||||
) -> Sequence[IndexAttempt]:
|
) -> Sequence[IndexAttempt]:
|
||||||
stmt = (
|
stmt = select(IndexAttempt).where(
|
||||||
select(IndexAttempt)
|
and_(
|
||||||
.where(
|
IndexAttempt.connector_id == cc_pair_identifier.connector_id,
|
||||||
and_(
|
IndexAttempt.credential_id == cc_pair_identifier.credential_id,
|
||||||
IndexAttempt.connector_id == cc_pair_identifier.connector_id,
|
|
||||||
IndexAttempt.credential_id == cc_pair_identifier.credential_id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
.order_by(
|
|
||||||
IndexAttempt.time_created.desc(),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
if disinclude_finished:
|
||||||
|
stmt = stmt.where(
|
||||||
|
IndexAttempt.status.in_(
|
||||||
|
[IndexingStatus.NOT_STARTED, IndexingStatus.IN_PROGRESS]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
stmt = stmt.order_by(IndexAttempt.time_created.desc())
|
||||||
return db_session.execute(stmt).scalars().all()
|
return db_session.execute(stmt).scalars().all()
|
||||||
|
|
||||||
|
|
||||||
|
@@ -56,6 +56,7 @@ from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed
|
|||||||
from danswer.db.document import get_document_cnts_for_cc_pairs
|
from danswer.db.document import get_document_cnts_for_cc_pairs
|
||||||
from danswer.db.engine import get_session
|
from danswer.db.engine import get_session
|
||||||
from danswer.db.index_attempt import create_index_attempt
|
from danswer.db.index_attempt import create_index_attempt
|
||||||
|
from danswer.db.index_attempt import get_index_attempts_for_cc_pair
|
||||||
from danswer.db.index_attempt import get_latest_index_attempts
|
from danswer.db.index_attempt import get_latest_index_attempts
|
||||||
from danswer.db.models import User
|
from danswer.db.models import User
|
||||||
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
from danswer.dynamic_configs.interface import ConfigNotFoundError
|
||||||
@@ -512,10 +513,31 @@ def connector_run_once(
|
|||||||
detail="Connector has no valid credentials, cannot create index attempts.",
|
detail="Connector has no valid credentials, cannot create index attempts.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
skipped_credentials = [
|
||||||
|
credential_id
|
||||||
|
for credential_id in credential_ids
|
||||||
|
if get_index_attempts_for_cc_pair(
|
||||||
|
cc_pair_identifier=ConnectorCredentialPairIdentifier(
|
||||||
|
connector_id=run_info.connector_id,
|
||||||
|
credential_id=credential_id,
|
||||||
|
),
|
||||||
|
disinclude_finished=True,
|
||||||
|
db_session=db_session,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
index_attempt_ids = [
|
index_attempt_ids = [
|
||||||
create_index_attempt(run_info.connector_id, credential_id, db_session)
|
create_index_attempt(run_info.connector_id, credential_id, db_session)
|
||||||
for credential_id in credential_ids
|
for credential_id in credential_ids
|
||||||
|
if credential_id not in skipped_credentials
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if not index_attempt_ids:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="No new indexing attempts created, indexing jobs are queued or running.",
|
||||||
|
)
|
||||||
|
|
||||||
return StatusResponse(
|
return StatusResponse(
|
||||||
success=True,
|
success=True,
|
||||||
message=f"Successfully created {len(index_attempt_ids)} index attempts",
|
message=f"Successfully created {len(index_attempt_ids)} index attempts",
|
||||||
|
Reference in New Issue
Block a user