light worker needs to discover some indexing tasks (#4209)

* light worker needs to discover some indexing tasks

* fix formatting

---------

Co-authored-by: Richard Kuo (Danswer) <rkuo@onyx.app>
This commit is contained in:
rkuo-danswer 2025-03-07 11:52:09 -08:00 committed by GitHub
parent 9e8eba23c3
commit 61ccba82a9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 9 additions and 3 deletions

View File

@ -111,5 +111,6 @@ celery_app.autodiscover_tasks(
"onyx.background.celery.tasks.vespa",
"onyx.background.celery.tasks.connector_deletion",
"onyx.background.celery.tasks.doc_permission_syncing",
"onyx.background.celery.tasks.indexing",
]
)

View File

@ -1170,6 +1170,7 @@ def connector_indexing_proxy_task(
return
# primary
@shared_task(
name=OnyxCeleryTask.CHECK_FOR_CHECKPOINT_CLEANUP,
soft_time_limit=300,
@ -1217,6 +1218,7 @@ def check_for_checkpoint_cleanup(*, tenant_id: str) -> None:
)
# light worker
@shared_task(
name=OnyxCeleryTask.CLEANUP_CHECKPOINT,
bind=True,

View File

@ -302,7 +302,9 @@ class ConfluenceConnector(
# Create the document
return Document(
id=build_confluence_document_id(self.wiki_base, page["_links"]["webui"], self.is_cloud),
id=build_confluence_document_id(
self.wiki_base, page["_links"]["webui"], self.is_cloud
),
sections=sections,
source=DocumentSource.CONFLUENCE,
semantic_identifier=page_title,

View File

@ -45,5 +45,6 @@ def test_confluence_connector_permissions(
difference = all_full_doc_ids - all_slim_doc_ids
# The set of full doc IDs should be always be a subset of the slim doc IDs
assert all_full_doc_ids.issubset(all_slim_doc_ids), \
f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."
assert all_full_doc_ids.issubset(
all_slim_doc_ids
), f"Full doc IDs are not a subset of slim doc IDs. Found {len(difference)} IDs in full docs but not in slim docs."