welcome to onyx

2025-07-28 13:53:28 +02:00 · 2024-12-13 09:48:43 -08:00
parent 54dcbfa288
commit 21ec5ed795
813 changed files with 7021 additions and 6824 deletions
--- a/backend/onyx/redis/redis_object_helper.py
+++ b/backend/onyx/redis/redis_object_helper.py
@@ -0,0 +1,97 @@
+from abc import ABC
+from abc import abstractmethod
+
+from celery import Celery
+from redis import Redis
+from redis.lock import Lock as RedisLock
+from sqlalchemy.orm import Session
+
+from onyx.redis.redis_pool import get_redis_client
+
+
+class RedisObjectHelper(ABC):
+    PREFIX = "base"
+    FENCE_PREFIX = PREFIX + "_fence"
+    TASKSET_PREFIX = PREFIX + "_taskset"
+
+    def __init__(self, tenant_id: str | None, id: str):
+        self._tenant_id: str | None = tenant_id
+        self._id: str = id
+        self.redis = get_redis_client(tenant_id=tenant_id)
+
+    @property
+    def task_id_prefix(self) -> str:
+        return f"{self.PREFIX}_{self._id}"
+
+    @property
+    def fence_key(self) -> str:
+        # example: documentset_fence_1
+        return f"{self.FENCE_PREFIX}_{self._id}"
+
+    @property
+    def taskset_key(self) -> str:
+        # example: documentset_taskset_1
+        return f"{self.TASKSET_PREFIX}_{self._id}"
+
+    @staticmethod
+    def get_id_from_fence_key(key: str) -> str | None:
+        """
+        Extracts the object ID from a fence key in the format `PREFIX_fence_X`.
+
+        Args:
+            key (str): The fence key string.
+
+        Returns:
+            Optional[int]: The extracted ID if the key is in the correct format, otherwise None.
+        """
+        parts = key.split("_")
+        if len(parts) != 3:
+            return None
+
+        object_id = parts[2]
+        return object_id
+
+    @staticmethod
+    def get_id_from_task_id(task_id: str) -> str | None:
+        """
+        Extracts the object ID from a task ID string.
+
+        This method assumes the task ID is formatted as `prefix_objectid_suffix`, where:
+        - `prefix` is an arbitrary string (e.g., the name of the task or entity),
+        - `objectid` is the ID you want to extract,
+        - `suffix` is another arbitrary string (e.g., a UUID).
+
+        Example:
+            If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`,
+            this method will return the string `"1"`.
+
+        Args:
+            task_id (str): The task ID string from which to extract the object ID.
+
+        Returns:
+            str | None: The extracted object ID if the task ID is in the correct format, otherwise None.
+        """
+        # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc
+        parts = task_id.split("_")
+        if len(parts) != 3:
+            return None
+
+        object_id = parts[1]
+        return object_id
+
+    @abstractmethod
+    def generate_tasks(
+        self,
+        celery_app: Celery,
+        db_session: Session,
+        redis_client: Redis,
+        lock: RedisLock,
+        tenant_id: str | None,
+    ) -> tuple[int, int] | None:
+        """First element should be the number of actual tasks generated, second should
+        be the number of docs that were candidates to be synced for the cc pair.
+
+        The need for this is when we are syncing stale docs referenced by multiple
+        connectors. In a single pass across multiple cc pairs, we only want a task
+        for be created for a particular document id the first time we see it.
+        The rest can be skipped."""