mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-20 13:05:49 +02:00
quick hack to prevent resyncing the same doc
This commit is contained in:
@@ -898,6 +898,9 @@ def vespa_metadata_sync_task(
|
|||||||
# the sync might repeat again later
|
# the sync might repeat again later
|
||||||
mark_document_as_synced(document_id, db_session)
|
mark_document_as_synced(document_id, db_session)
|
||||||
|
|
||||||
|
r = get_redis_client(tenant_id=tenant_id)
|
||||||
|
r.hdel(RedisConnectorCredentialPair.SYNCING_HASH, document_id)
|
||||||
|
|
||||||
task_logger.info(f"doc={document_id} action=sync chunks={chunks_affected}")
|
task_logger.info(f"doc={document_id} action=sync chunks={chunks_affected}")
|
||||||
except SoftTimeLimitExceeded:
|
except SoftTimeLimitExceeded:
|
||||||
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
|
task_logger.info(f"SoftTimeLimitExceeded exception. doc={document_id}")
|
||||||
|
@@ -30,6 +30,8 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
|||||||
FENCE_PREFIX = PREFIX + "_fence"
|
FENCE_PREFIX = PREFIX + "_fence"
|
||||||
TASKSET_PREFIX = PREFIX + "_taskset"
|
TASKSET_PREFIX = PREFIX + "_taskset"
|
||||||
|
|
||||||
|
SYNCING_HASH = PREFIX + ":vespa_syncing"
|
||||||
|
|
||||||
def __init__(self, tenant_id: str | None, id: int) -> None:
|
def __init__(self, tenant_id: str | None, id: int) -> None:
|
||||||
super().__init__(tenant_id, str(id))
|
super().__init__(tenant_id, str(id))
|
||||||
|
|
||||||
@@ -64,6 +66,9 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
|||||||
lock: RedisLock,
|
lock: RedisLock,
|
||||||
tenant_id: str | None,
|
tenant_id: str | None,
|
||||||
) -> tuple[int, int] | None:
|
) -> tuple[int, int] | None:
|
||||||
|
# an arbitrary number in seconds to prevent the same doc from syncing repeatedly
|
||||||
|
SYNC_EXPIRATION = 24 * 60 * 60
|
||||||
|
|
||||||
last_lock_time = time.monotonic()
|
last_lock_time = time.monotonic()
|
||||||
|
|
||||||
async_results = []
|
async_results = []
|
||||||
@@ -92,6 +97,10 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
|||||||
if doc.id in self.skip_docs:
|
if doc.id in self.skip_docs:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# is the document sync already queued?
|
||||||
|
if redis_client.hexists(doc.id):
|
||||||
|
continue
|
||||||
|
|
||||||
# celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
|
# celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac"
|
||||||
# the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
|
# the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac"
|
||||||
# we prefix the task id so it's easier to keep track of who created the task
|
# we prefix the task id so it's easier to keep track of who created the task
|
||||||
@@ -104,6 +113,11 @@ class RedisConnectorCredentialPair(RedisObjectHelper):
|
|||||||
RedisConnectorCredentialPair.get_taskset_key(), custom_task_id
|
RedisConnectorCredentialPair.get_taskset_key(), custom_task_id
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# track the doc.id in redis so that we don't resubmit it repeatedly
|
||||||
|
redis_client.hset(
|
||||||
|
self.SYNCING_HASH, doc.id, custom_task_id, ex=SYNC_EXPIRATION
|
||||||
|
)
|
||||||
|
|
||||||
# Priority on sync's triggered by new indexing should be medium
|
# Priority on sync's triggered by new indexing should be medium
|
||||||
result = celery_app.send_task(
|
result = celery_app.send_task(
|
||||||
OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
|
OnyxCeleryTask.VESPA_METADATA_SYNC_TASK,
|
||||||
|
@@ -110,6 +110,9 @@ class TenantRedis(redis.Redis):
|
|||||||
"sadd",
|
"sadd",
|
||||||
"srem",
|
"srem",
|
||||||
"scard",
|
"scard",
|
||||||
|
"hexists",
|
||||||
|
"hset",
|
||||||
|
"hdel",
|
||||||
] # Regular methods that need simple prefixing
|
] # Regular methods that need simple prefixing
|
||||||
|
|
||||||
if item == "scan_iter":
|
if item == "scan_iter":
|
||||||
|
Reference in New Issue
Block a user