mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-10-11 05:36:03 +02:00
Multiple cloud/indexing fixes (#3609)
* more debugging * test reacquire outside of loop * more logging * move lock_beat test outside the try catch so that we don't worry about testing locks we never took * use a larger scan_iter value for performance * batch stale document sync batches * add debug logging for a particular timeout issue --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
@@ -8,6 +8,7 @@ from redis import Redis
|
||||
from redis.lock import Lock as RedisLock
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from onyx.configs.app_configs import DB_YIELD_PER_DEFAULT
|
||||
from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
@@ -50,17 +51,21 @@ class RedisDocumentSet(RedisObjectHelper):
|
||||
|
||||
def generate_tasks(
|
||||
self,
|
||||
max_tasks: int,
|
||||
celery_app: Celery,
|
||||
db_session: Session,
|
||||
redis_client: Redis,
|
||||
lock: RedisLock,
|
||||
tenant_id: str | None,
|
||||
) -> tuple[int, int] | None:
|
||||
"""Max tasks is ignored for now until we can build the logic to mark the
|
||||
document set up to date over multiple batches.
|
||||
"""
|
||||
last_lock_time = time.monotonic()
|
||||
|
||||
async_results = []
|
||||
stmt = construct_document_select_by_docset(int(self._id), current_only=False)
|
||||
for doc in db_session.scalars(stmt).yield_per(1):
|
||||
for doc in db_session.scalars(stmt).yield_per(DB_YIELD_PER_DEFAULT):
|
||||
doc = cast(Document, doc)
|
||||
current_time = time.monotonic()
|
||||
if current_time - last_lock_time >= (
|
||||
|
Reference in New Issue
Block a user