mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-04 17:00:24 +02:00
* WIP for external group sync lock fixes * prototyping permissions validation * validate permission sync tasks in celery * mypy * cleanup and wire off external group sync checks for now * add active key to reset * improve logging * reset on payload format change * return False on exception * missed a return * add count of tasks scanned * add comment * better logging * add return * more return * catch payload exceptions * code review fixes * push to restart test --------- Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
169 lines
5.4 KiB
Python
169 lines
5.4 KiB
Python
from datetime import datetime
|
|
from typing import cast
|
|
|
|
import redis
|
|
from celery import Celery
|
|
from pydantic import BaseModel
|
|
from redis.lock import Lock as RedisLock
|
|
from sqlalchemy.orm import Session
|
|
|
|
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
|
|
|
|
|
|
class RedisConnectorExternalGroupSyncPayload(BaseModel):
|
|
submitted: datetime
|
|
started: datetime | None
|
|
celery_task_id: str | None
|
|
|
|
|
|
class RedisConnectorExternalGroupSync:
|
|
"""Manages interactions with redis for external group syncing tasks. Should only be accessed
|
|
through RedisConnector."""
|
|
|
|
PREFIX = "connectorexternalgroupsync"
|
|
|
|
FENCE_PREFIX = f"{PREFIX}_fence"
|
|
|
|
# phase 1 - geneartor task and progress signals
|
|
GENERATORTASK_PREFIX = f"{PREFIX}+generator" # connectorexternalgroupsync+generator
|
|
GENERATOR_PROGRESS_PREFIX = (
|
|
PREFIX + "_generator_progress"
|
|
) # connectorexternalgroupsync_generator_progress
|
|
GENERATOR_COMPLETE_PREFIX = (
|
|
PREFIX + "_generator_complete"
|
|
) # connectorexternalgroupsync_generator_complete
|
|
|
|
TASKSET_PREFIX = f"{PREFIX}_taskset" # connectorexternalgroupsync_taskset
|
|
SUBTASK_PREFIX = f"{PREFIX}+sub" # connectorexternalgroupsync+sub
|
|
|
|
def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None:
|
|
self.tenant_id: str | None = tenant_id
|
|
self.id = id
|
|
self.redis = redis
|
|
|
|
self.fence_key: str = f"{self.FENCE_PREFIX}_{id}"
|
|
self.generator_task_key = f"{self.GENERATORTASK_PREFIX}_{id}"
|
|
self.generator_progress_key = f"{self.GENERATOR_PROGRESS_PREFIX}_{id}"
|
|
self.generator_complete_key = f"{self.GENERATOR_COMPLETE_PREFIX}_{id}"
|
|
|
|
self.taskset_key = f"{self.TASKSET_PREFIX}_{id}"
|
|
|
|
self.subtask_prefix: str = f"{self.SUBTASK_PREFIX}_{id}"
|
|
|
|
def taskset_clear(self) -> None:
|
|
self.redis.delete(self.taskset_key)
|
|
|
|
def generator_clear(self) -> None:
|
|
self.redis.delete(self.generator_progress_key)
|
|
self.redis.delete(self.generator_complete_key)
|
|
|
|
def get_remaining(self) -> int:
|
|
# todo: move into fence
|
|
remaining = cast(int, self.redis.scard(self.taskset_key))
|
|
return remaining
|
|
|
|
def get_active_task_count(self) -> int:
|
|
"""Count of active external group syncing tasks"""
|
|
count = 0
|
|
for _ in self.redis.scan_iter(
|
|
RedisConnectorExternalGroupSync.FENCE_PREFIX + "*",
|
|
count=SCAN_ITER_COUNT_DEFAULT,
|
|
):
|
|
count += 1
|
|
return count
|
|
|
|
@property
|
|
def fenced(self) -> bool:
|
|
if self.redis.exists(self.fence_key):
|
|
return True
|
|
|
|
return False
|
|
|
|
@property
|
|
def payload(self) -> RedisConnectorExternalGroupSyncPayload | None:
|
|
# read related data and evaluate/print task progress
|
|
fence_bytes = cast(bytes, self.redis.get(self.fence_key))
|
|
if fence_bytes is None:
|
|
return None
|
|
|
|
fence_str = fence_bytes.decode("utf-8")
|
|
payload = RedisConnectorExternalGroupSyncPayload.model_validate_json(
|
|
cast(str, fence_str)
|
|
)
|
|
|
|
return payload
|
|
|
|
def set_fence(
|
|
self,
|
|
payload: RedisConnectorExternalGroupSyncPayload | None,
|
|
) -> None:
|
|
if not payload:
|
|
self.redis.delete(self.fence_key)
|
|
return
|
|
|
|
self.redis.set(self.fence_key, payload.model_dump_json())
|
|
|
|
@property
|
|
def generator_complete(self) -> int | None:
|
|
"""the fence payload is an int representing the starting number of
|
|
external group syncing tasks to be processed ... just after the generator completes.
|
|
"""
|
|
fence_bytes = self.redis.get(self.generator_complete_key)
|
|
if fence_bytes is None:
|
|
return None
|
|
|
|
if fence_bytes == b"None":
|
|
return None
|
|
|
|
fence_int = int(cast(bytes, fence_bytes).decode())
|
|
return fence_int
|
|
|
|
@generator_complete.setter
|
|
def generator_complete(self, payload: int | None) -> None:
|
|
"""Set the payload to an int to set the fence, otherwise if None it will
|
|
be deleted"""
|
|
if payload is None:
|
|
self.redis.delete(self.generator_complete_key)
|
|
return
|
|
|
|
self.redis.set(self.generator_complete_key, payload)
|
|
|
|
def generate_tasks(
|
|
self,
|
|
celery_app: Celery,
|
|
db_session: Session,
|
|
lock: RedisLock | None,
|
|
) -> int | None:
|
|
pass
|
|
|
|
def reset(self) -> None:
|
|
self.redis.delete(self.generator_progress_key)
|
|
self.redis.delete(self.generator_complete_key)
|
|
self.redis.delete(self.taskset_key)
|
|
self.redis.delete(self.fence_key)
|
|
|
|
@staticmethod
|
|
def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None:
|
|
taskset_key = f"{RedisConnectorExternalGroupSync.TASKSET_PREFIX}_{id}"
|
|
r.srem(taskset_key, task_id)
|
|
return
|
|
|
|
@staticmethod
|
|
def reset_all(r: redis.Redis) -> None:
|
|
"""Deletes all redis values for all connectors"""
|
|
for key in r.scan_iter(RedisConnectorExternalGroupSync.TASKSET_PREFIX + "*"):
|
|
r.delete(key)
|
|
|
|
for key in r.scan_iter(
|
|
RedisConnectorExternalGroupSync.GENERATOR_COMPLETE_PREFIX + "*"
|
|
):
|
|
r.delete(key)
|
|
|
|
for key in r.scan_iter(
|
|
RedisConnectorExternalGroupSync.GENERATOR_PROGRESS_PREFIX + "*"
|
|
):
|
|
r.delete(key)
|
|
|
|
for key in r.scan_iter(RedisConnectorExternalGroupSync.FENCE_PREFIX + "*"):
|
|
r.delete(key)
|