mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 03:58:30 +02:00
first full cut
This commit is contained in:
@@ -17,6 +17,8 @@ class RedisConnector:
|
||||
associated background tasks / associated redis interactions."""
|
||||
|
||||
def __init__(self, tenant_id: str | None, id: int) -> None:
|
||||
"""id: a connector credential pair id"""
|
||||
|
||||
self.tenant_id: str | None = tenant_id
|
||||
self.id: int = id
|
||||
self.redis: redis.Redis = get_redis_client(tenant_id=tenant_id)
|
||||
|
@@ -2,6 +2,7 @@ import time
|
||||
from typing import cast
|
||||
from uuid import uuid4
|
||||
|
||||
import redis
|
||||
from celery import Celery
|
||||
from redis import Redis
|
||||
from redis.lock import Lock as RedisLock
|
||||
@@ -12,6 +13,7 @@ from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.document import (
|
||||
construct_document_select_for_connector_credential_pair_by_needs_sync,
|
||||
@@ -28,10 +30,9 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
||||
all connectors and is not per connector."""
|
||||
|
||||
PREFIX = "connectorsync"
|
||||
FENCE_PREFIX = PREFIX + "_fence"
|
||||
TASKSET_PREFIX = PREFIX + "_taskset"
|
||||
|
||||
SYNCING_PREFIX = PREFIX + ":vespa_syncing"
|
||||
# SYNCING_PREFIX = PREFIX + ":vespa_syncing"
|
||||
|
||||
def __init__(self, tenant_id: str | None, id: int) -> None:
|
||||
super().__init__(tenant_id, str(id))
|
||||
@@ -39,10 +40,6 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
||||
# documents that should be skipped
|
||||
self.skip_docs: set[str] = set()
|
||||
|
||||
@classmethod
|
||||
def get_fence_key(cls) -> str:
|
||||
return RedisConnectorCredentialPair.FENCE_PREFIX
|
||||
|
||||
@classmethod
|
||||
def get_taskset_key(cls) -> str:
|
||||
return RedisConnectorCredentialPair.TASKSET_PREFIX
|
||||
@@ -51,18 +48,18 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
||||
def taskset_key(self) -> str:
|
||||
"""Notice that this is intentionally reusing the same taskset for all
|
||||
connector syncs"""
|
||||
# example: connector_taskset
|
||||
# example: connectorsync_taskset
|
||||
return f"{self.TASKSET_PREFIX}"
|
||||
|
||||
def set_skip_docs(self, skip_docs: set[str]) -> None:
|
||||
# documents that should be skipped. Note that this classes updates
|
||||
# documents that should be skipped. Note that this class updates
|
||||
# the list on the fly
|
||||
self.skip_docs = skip_docs
|
||||
|
||||
@staticmethod
|
||||
def make_redis_syncing_key(doc_id: str) -> str:
|
||||
"""used to create a key in redis to block a doc from syncing"""
|
||||
return f"{RedisConnectorCredentialPair.SYNCING_PREFIX}:{doc_id}"
|
||||
# @staticmethod
|
||||
# def make_redis_syncing_key(doc_id: str) -> str:
|
||||
# """used to create a key in redis to block a doc from syncing"""
|
||||
# return f"{RedisConnectorCredentialPair.SYNCING_PREFIX}:{doc_id}"
|
||||
|
||||
def generate_tasks(
|
||||
self,
|
||||
@@ -148,3 +145,78 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
||||
break
|
||||
|
||||
return len(async_results), num_docs
|
||||
|
||||
|
||||
class RedisGlobalConnectorCredentialPair:
|
||||
"""This class is used to scan documents by cc_pair in the db and collect them into
|
||||
a unified set for syncing.
|
||||
|
||||
It differs from the other redis helpers in that the taskset used spans
|
||||
all connectors and is not per connector."""
|
||||
|
||||
PREFIX = "connectorsync"
|
||||
FENCE_KEY = PREFIX + "_fence"
|
||||
TASKSET_KEY = PREFIX + "_taskset"
|
||||
|
||||
def __init__(self, redis: redis.Redis) -> None:
|
||||
self.redis = redis
|
||||
|
||||
@property
|
||||
def fenced(self) -> bool:
|
||||
if self.redis.exists(self.fence_key):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@property
|
||||
def payload(self) -> int | None:
|
||||
bytes = self.redis.get(self.fence_key)
|
||||
if bytes is None:
|
||||
return None
|
||||
|
||||
progress = int(cast(int, bytes))
|
||||
return progress
|
||||
|
||||
def get_remaining(self) -> int:
|
||||
remaining = cast(int, self.redis.scard(self.taskset_key))
|
||||
return remaining
|
||||
|
||||
@property
|
||||
def fence_key(self) -> str:
|
||||
"""Notice that this is intentionally reusing the same fence for all
|
||||
connector syncs"""
|
||||
# example: connectorsync_fence
|
||||
return f"{self.FENCE_KEY}"
|
||||
|
||||
@property
|
||||
def taskset_key(self) -> str:
|
||||
"""Notice that this is intentionally reusing the same taskset for all
|
||||
connector syncs"""
|
||||
# example: connectorsync_taskset
|
||||
return f"{self.TASKSET_KEY}"
|
||||
|
||||
def set_fence(self, payload: int | None) -> None:
|
||||
if payload is None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload)
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
def delete_taskset(self) -> None:
|
||||
self.redis.delete(self.taskset_key)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
|
||||
@staticmethod
|
||||
def reset_all(r: redis.Redis) -> None:
|
||||
r.srem(
|
||||
OnyxRedisConstants.ACTIVE_FENCES,
|
||||
RedisGlobalConnectorCredentialPair.FENCE_KEY,
|
||||
)
|
||||
r.delete(RedisGlobalConnectorCredentialPair.TASKSET_KEY)
|
||||
r.delete(RedisGlobalConnectorCredentialPair.FENCE_KEY)
|
||||
|
@@ -14,6 +14,7 @@ from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.db.document import construct_document_select_for_connector_credential_pair
|
||||
from onyx.db.models import Document as DbDocument
|
||||
@@ -69,10 +70,12 @@ class RedisConnectorDelete:
|
||||
|
||||
def set_fence(self, payload: RedisConnectorDeletePayload | None) -> None:
|
||||
if not payload:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload.model_dump_json())
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
def _generate_task_id(self) -> str:
|
||||
# celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
|
||||
@@ -136,6 +139,7 @@ class RedisConnectorDelete:
|
||||
return len(async_results)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
|
||||
|
@@ -13,6 +13,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
|
||||
|
||||
|
||||
@@ -102,10 +103,12 @@ class RedisConnectorPermissionSync:
|
||||
payload: RedisConnectorPermissionSyncPayload | None,
|
||||
) -> None:
|
||||
if not payload:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload.model_dump_json())
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
@property
|
||||
def generator_complete(self) -> int | None:
|
||||
@@ -173,6 +176,7 @@ class RedisConnectorPermissionSync:
|
||||
return len(async_results)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.generator_progress_key)
|
||||
self.redis.delete(self.generator_complete_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
|
@@ -5,6 +5,8 @@ from uuid import uuid4
|
||||
import redis
|
||||
from pydantic import BaseModel
|
||||
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
|
||||
|
||||
class RedisConnectorIndexPayload(BaseModel):
|
||||
index_attempt_id: int | None
|
||||
@@ -103,10 +105,12 @@ class RedisConnectorIndex:
|
||||
payload: RedisConnectorIndexPayload | None,
|
||||
) -> None:
|
||||
if not payload:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload.model_dump_json())
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
def terminating(self, celery_task_id: str) -> bool:
|
||||
if self.redis.exists(f"{self.terminate_key}_{celery_task_id}"):
|
||||
@@ -188,6 +192,7 @@ class RedisConnectorIndex:
|
||||
return status
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.active_key)
|
||||
self.redis.delete(self.generator_lock_key)
|
||||
self.redis.delete(self.generator_progress_key)
|
||||
|
@@ -11,6 +11,7 @@ from onyx.configs.constants import CELERY_GENERIC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.db.connector_credential_pair import get_connector_credential_pair_from_id
|
||||
from onyx.redis.redis_pool import SCAN_ITER_COUNT_DEFAULT
|
||||
|
||||
@@ -79,10 +80,12 @@ class RedisConnectorPrune:
|
||||
|
||||
def set_fence(self, value: bool) -> None:
|
||||
if not value:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, 0)
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
@property
|
||||
def generator_complete(self) -> int | None:
|
||||
@@ -158,6 +161,7 @@ class RedisConnectorPrune:
|
||||
return len(async_results)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.generator_progress_key)
|
||||
self.redis.delete(self.generator_complete_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
|
@@ -13,6 +13,7 @@ from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.db.document_set import construct_document_select_by_docset
|
||||
from onyx.db.models import Document
|
||||
from onyx.redis.redis_object_helper import RedisObjectHelper
|
||||
@@ -35,10 +36,12 @@ class RedisDocumentSet(RedisObjectHelper):
|
||||
|
||||
def set_fence(self, payload: int | None) -> None:
|
||||
if payload is None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload)
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
@property
|
||||
def payload(self) -> int | None:
|
||||
@@ -96,6 +99,7 @@ class RedisDocumentSet(RedisObjectHelper):
|
||||
return len(async_results), len(async_results)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
|
||||
|
@@ -113,6 +113,7 @@ class TenantRedis(redis.Redis):
|
||||
"reacquire",
|
||||
"create_lock",
|
||||
"startswith",
|
||||
"smembers",
|
||||
"sadd",
|
||||
"srem",
|
||||
"scard",
|
||||
|
@@ -13,6 +13,7 @@ from onyx.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT
|
||||
from onyx.configs.constants import OnyxCeleryPriority
|
||||
from onyx.configs.constants import OnyxCeleryQueues
|
||||
from onyx.configs.constants import OnyxCeleryTask
|
||||
from onyx.configs.constants import OnyxRedisConstants
|
||||
from onyx.db.models import Document
|
||||
from onyx.redis.redis_object_helper import RedisObjectHelper
|
||||
from onyx.utils.variable_functionality import fetch_versioned_implementation
|
||||
@@ -36,10 +37,12 @@ class RedisUserGroup(RedisObjectHelper):
|
||||
|
||||
def set_fence(self, payload: int | None) -> None:
|
||||
if payload is None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
return
|
||||
|
||||
self.redis.set(self.fence_key, payload)
|
||||
self.redis.sadd(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
|
||||
@property
|
||||
def payload(self) -> int | None:
|
||||
@@ -109,6 +112,7 @@ class RedisUserGroup(RedisObjectHelper):
|
||||
return len(async_results), len(async_results)
|
||||
|
||||
def reset(self) -> None:
|
||||
self.redis.srem(OnyxRedisConstants.ACTIVE_FENCES, self.fence_key)
|
||||
self.redis.delete(self.taskset_key)
|
||||
self.redis.delete(self.fence_key)
|
||||
|
||||
|
Reference in New Issue
Block a user