clear indexing fences with no celery tasks queued (#3482)

* allow beat tasks to expire. it isn't important that they all run

* validate fences are in a good state and cancel/fail them if not

* add function timings for important beat tasks

* optimize lookups, add lots of comments

* review changes

---------

Co-authored-by: Richard Kuo <rkuo@rkuo.com>
Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
rkuo-danswer
2024-12-16 16:55:58 -08:00
committed by GitHub
parent 8b249cbe63
commit 2dd51230ed
6 changed files with 318 additions and 15 deletions

View File

@@ -1,4 +1,6 @@
# These are helper objects for tracking the keys we need to write in redis
import json
from typing import Any
from typing import cast
from redis import Redis
@@ -23,3 +25,25 @@ def celery_get_queue_length(queue: str, r: Redis) -> int:
total_length += cast(int, length)
return total_length
def celery_find_task(task_id: str, queue: str, r: Redis) -> int:
"""This is a redis specific way to find a task for a particular queue in redis.
It is priority aware and knows how to look through the multiple redis lists
used to implement task prioritization.
This operation is not atomic.
This is a linear search O(n) ... so be careful using it when the task queues can be larger.
Returns true if the id is in the queue, False if not.
"""
for priority in range(len(OnyxCeleryPriority)):
queue_name = f"{queue}{CELERY_SEPARATOR}{priority}" if priority > 0 else queue
tasks = cast(list[bytes], r.lrange(queue_name, 0, -1))
for task in tasks:
task_dict: dict[str, Any] = json.loads(task.decode("utf-8"))
if task_dict.get("headers", {}).get("id") == task_id:
return True
return False