diff --git a/backend/ee/danswer/configs/app_configs.py b/backend/ee/danswer/configs/app_configs.py index 6fd813dbc..057922dc2 100644 --- a/backend/ee/danswer/configs/app_configs.py +++ b/backend/ee/danswer/configs/app_configs.py @@ -11,6 +11,14 @@ SAML_CONF_DIR = os.environ.get("SAML_CONF_DIR") or "/app/ee/danswer/configs/saml ##### # Auto Permission Sync ##### +# In seconds, default is 5 minutes +CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY = int( + os.environ.get("CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY") or 5 * 60 +) +# In seconds, default is 5 minutes +CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY = int( + os.environ.get("CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY") or 5 * 60 +) NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2) diff --git a/backend/ee/danswer/db/external_perm.py b/backend/ee/danswer/db/external_perm.py index 5411d3c8d..4df635788 100644 --- a/backend/ee/danswer/db/external_perm.py +++ b/backend/ee/danswer/db/external_perm.py @@ -10,6 +10,9 @@ from danswer.access.utils import prefix_group_w_source from danswer.configs.constants import DocumentSource from danswer.db.models import User__ExternalUserGroupId from danswer.db.users import batch_add_ext_perm_user_if_not_exists +from danswer.utils.logger import setup_logger + +logger = setup_logger() class ExternalUserGroup(BaseModel): @@ -73,7 +76,13 @@ def replace_user__ext_group_for_cc_pair( new_external_permissions = [] for external_group in group_defs: for user_email in external_group.user_emails: - user_id = email_id_map[user_email] + user_id = email_id_map.get(user_email) + if user_id is None: + logger.warning( + f"User in group {external_group.id}" + f" with email {user_email} not found" + ) + continue new_external_permissions.append( User__ExternalUserGroupId( user_id=user_id, diff --git a/backend/ee/danswer/external_permissions/confluence/doc_sync.py b/backend/ee/danswer/external_permissions/confluence/doc_sync.py index 81ec008d9..94f024093 100644 --- a/backend/ee/danswer/external_permissions/confluence/doc_sync.py +++ b/backend/ee/danswer/external_permissions/confluence/doc_sync.py @@ -195,6 +195,7 @@ def _fetch_all_page_restrictions_for_space( confluence_client: OnyxConfluence, slim_docs: list[SlimDocument], space_permissions_by_space_key: dict[str, ExternalAccess], + is_cloud: bool, ) -> list[DocExternalAccess]: """ For all pages, if a page has restrictions, then use those restrictions. @@ -222,29 +223,50 @@ def _fetch_all_page_restrictions_for_space( continue space_key = slim_doc.perm_sync_data.get("space_key") - if space_permissions := space_permissions_by_space_key.get(space_key): - # If there are no restrictions, then use the space's restrictions - document_restrictions.append( - DocExternalAccess( - doc_id=slim_doc.id, - external_access=space_permissions, - ) + if not (space_permissions := space_permissions_by_space_key.get(space_key)): + logger.debug( + f"Individually fetching space permissions for space {space_key}" ) - if ( - not space_permissions.is_public - and not space_permissions.external_user_emails - and not space_permissions.external_user_group_ids - ): + try: + # If the space permissions are not in the cache, then fetch them + if is_cloud: + retrieved_space_permissions = _get_cloud_space_permissions( + confluence_client=confluence_client, space_key=space_key + ) + else: + retrieved_space_permissions = _get_server_space_permissions( + confluence_client=confluence_client, space_key=space_key + ) + space_permissions_by_space_key[space_key] = retrieved_space_permissions + space_permissions = retrieved_space_permissions + except Exception as e: logger.warning( - f"Permissions are empty for document: {slim_doc.id}\n" - "This means space permissions are may be wrong for" - f" Space key: {space_key}" + f"Error fetching space permissions for space {space_key}: {e}" ) + + if not space_permissions: + logger.warning( + f"No permissions found for document {slim_doc.id} in space {space_key}" + ) continue - logger.warning( - f"No permissions found for document {slim_doc.id} in space {space_key}" + # If there are no restrictions, then use the space's restrictions + document_restrictions.append( + DocExternalAccess( + doc_id=slim_doc.id, + external_access=space_permissions, + ) ) + if ( + not space_permissions.is_public + and not space_permissions.external_user_emails + and not space_permissions.external_user_group_ids + ): + logger.warning( + f"Permissions are empty for document: {slim_doc.id}\n" + "This means space permissions are may be wrong for" + f" Space key: {space_key}" + ) logger.debug("Finished fetching all page restrictions for space") return document_restrictions @@ -283,4 +305,5 @@ def confluence_doc_sync( confluence_client=confluence_connector.confluence_client, slim_docs=slim_docs, space_permissions_by_space_key=space_permissions_by_space_key, + is_cloud=is_cloud, ) diff --git a/backend/ee/danswer/external_permissions/sync_params.py b/backend/ee/danswer/external_permissions/sync_params.py index e821971cc..3dc4e46b9 100644 --- a/backend/ee/danswer/external_permissions/sync_params.py +++ b/backend/ee/danswer/external_permissions/sync_params.py @@ -3,6 +3,8 @@ from collections.abc import Callable from danswer.access.models import DocExternalAccess from danswer.configs.constants import DocumentSource from danswer.db.models import ConnectorCredentialPair +from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY +from ee.danswer.configs.app_configs import CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY from ee.danswer.db.external_perm import ExternalUserGroup from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync @@ -56,7 +58,7 @@ GROUP_PERMISSIONS_IS_CC_PAIR_AGNOSTIC: set[DocumentSource] = { # If nothing is specified here, we run the doc_sync every time the celery beat runs DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = { # Polling is not supported so we fetch all doc permissions every 5 minutes - DocumentSource.CONFLUENCE: 5 * 60, + DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_DOC_SYNC_FREQUENCY, DocumentSource.SLACK: 5 * 60, } @@ -64,7 +66,7 @@ DOC_PERMISSION_SYNC_PERIODS: dict[DocumentSource, int] = { EXTERNAL_GROUP_SYNC_PERIODS: dict[DocumentSource, int] = { # Polling is not supported so we fetch all group permissions every 30 minutes DocumentSource.GOOGLE_DRIVE: 5 * 60, - DocumentSource.CONFLUENCE: 30 * 60, + DocumentSource.CONFLUENCE: CONFLUENCE_PERMISSION_GROUP_SYNC_FREQUENCY, }