diff --git a/backend/ee/onyx/external_permissions/confluence/group_sync.py b/backend/ee/onyx/external_permissions/confluence/group_sync.py index f4b17cb1b..b11d38f63 100644 --- a/backend/ee/onyx/external_permissions/confluence/group_sync.py +++ b/backend/ee/onyx/external_permissions/confluence/group_sync.py @@ -14,30 +14,24 @@ def _build_group_member_email_map( confluence_client: OnyxConfluence, cc_pair_id: int ) -> dict[str, set[str]]: group_member_emails: dict[str, set[str]] = {} - for user_result in confluence_client.paginated_cql_user_retrieval(): - logger.debug(f"Processing groups for user: {user_result}") + for user in confluence_client.paginated_cql_user_retrieval(): + logger.debug(f"Processing groups for user: {user}") - user = user_result.get("user", {}) - if not user: - msg = f"user result missing user field: {user_result}" - emit_background_error(msg, cc_pair_id=cc_pair_id) - logger.error(msg) - continue - - email = user.get("email") + email = user.email if not email: # This field is only present in Confluence Server - user_name = user.get("username") + user_name = user.username # If it is present, try to get the email using a Server-specific method if user_name: email = get_user_email_from_username__server( confluence_client=confluence_client, user_name=user_name, ) + if not email: # If we still don't have an email, skip this user - msg = f"user result missing email field: {user_result}" - if user.get("type") == "app": + msg = f"user result missing email field: {user}" + if user.type == "app": logger.warning(msg) else: emit_background_error(msg, cc_pair_id=cc_pair_id) @@ -45,7 +39,7 @@ def _build_group_member_email_map( continue all_users_groups: set[str] = set() - for group in confluence_client.paginated_groups_by_user_retrieval(user): + for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id): # group name uniqueness is enforced by Confluence, so we can use it as a group ID group_id = group["name"] group_member_emails.setdefault(group_id, set()).add(email) diff --git a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py index b72fd7e65..b190e2099 100644 --- a/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py +++ b/backend/onyx/background/celery/tasks/external_group_syncing/tasks.py @@ -384,6 +384,7 @@ def connector_external_group_sync_generator_task( logger.info( f"Syncing {len(external_user_groups)} external user groups for {source_type}" ) + logger.debug(f"New external user groups: {external_user_groups}") replace_user__ext_group_for_cc_pair( db_session=db_session, diff --git a/backend/onyx/connectors/confluence/onyx_confluence.py b/backend/onyx/connectors/confluence/onyx_confluence.py index 96b9a3702..728bf2065 100644 --- a/backend/onyx/connectors/confluence/onyx_confluence.py +++ b/backend/onyx/connectors/confluence/onyx_confluence.py @@ -8,6 +8,7 @@ from typing import TypeVar from urllib.parse import quote from atlassian import Confluence # type:ignore +from pydantic import BaseModel from requests import HTTPError from onyx.utils.logger import setup_logger @@ -29,6 +30,16 @@ class ConfluenceRateLimitError(Exception): pass +class ConfluenceUser(BaseModel): + user_id: str # accountId in Cloud, userKey in Server + username: str | None # Confluence Cloud doesn't give usernames + display_name: str + # Confluence Data Center doesn't give email back by default, + # have to fetch it with a different endpoint + email: str | None + type: str + + def _handle_http_error(e: HTTPError, attempt: int) -> int: MIN_DELAY = 2 MAX_DELAY = 60 @@ -275,21 +286,95 @@ class OnyxConfluence(Confluence): self, expand: str | None = None, limit: int | None = None, - ) -> Iterator[dict[str, Any]]: + ) -> Iterator[ConfluenceUser]: """ The search/user endpoint can be used to fetch users. It's a seperate endpoint from the content/search endpoint used only for users. Otherwise it's very similar to the content/search endpoint. """ - cql = "type=user" - url = "rest/api/search/user" if self.cloud else "rest/api/search" - expand_string = f"&expand={expand}" if expand else "" - url += f"?cql={cql}{expand_string}" - yield from self._paginate_url(url, limit) + if self.cloud: + cql = "type=user" + url = "rest/api/search/user" + expand_string = f"&expand={expand}" if expand else "" + url += f"?cql={cql}{expand_string}" + for user_result in self._paginate_url(url, limit): + # Example response: + # { + # 'user': { + # 'type': 'known', + # 'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d', + # 'accountType': 'atlassian', + # 'email': 'chris@danswer.ai', + # 'publicName': 'Chris Weaver', + # 'profilePicture': { + # 'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d', + # 'width': 48, + # 'height': 48, + # 'isDefault': False + # }, + # 'displayName': 'Chris Weaver', + # 'isExternalCollaborator': False, + # '_expandable': { + # 'operations': '', + # 'personalSpace': '' + # }, + # '_links': { + # 'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d' + # } + # }, + # 'title': 'Chris Weaver', + # 'excerpt': '', + # 'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d', + # 'breadcrumbs': [], + # 'entityType': 'user', + # 'iconCssClass': 'aui-icon content-type-profile', + # 'lastModified': '2025-02-18T04:08:03.579Z', + # 'score': 0.0 + # } + user = user_result["user"] + yield ConfluenceUser( + user_id=user["accountId"], + username=None, + display_name=user["displayName"], + email=user.get("email"), + type=user["accountType"], + ) + else: + # https://developer.atlassian.com/server/confluence/rest/v900/api-group-user/#api-rest-api-user-list-get + # ^ is only available on data center deployments + # Example response: + # [ + # { + # 'type': 'known', + # 'username': 'admin', + # 'userKey': '40281082950c5fe901950c61c55d0000', + # 'profilePicture': { + # 'path': '/images/icons/profilepics/default.svg', + # 'width': 48, + # 'height': 48, + # 'isDefault': True + # }, + # 'displayName': 'Admin Test', + # '_links': { + # 'self': 'http://localhost:8090/rest/api/user?key=40281082950c5fe901950c61c55d0000' + # }, + # '_expandable': { + # 'status': '' + # } + # } + # ] + for user in self._paginate_url("rest/api/user/list", limit): + yield ConfluenceUser( + user_id=user["userKey"], + username=user["username"], + display_name=user["displayName"], + email=None, + type=user.get("type", "user"), + ) def paginated_groups_by_user_retrieval( self, - user: dict[str, Any], + user_id: str, # accountId in Cloud, userKey in Server limit: int | None = None, ) -> Iterator[dict[str, Any]]: """ @@ -297,7 +382,7 @@ class OnyxConfluence(Confluence): It's a confluence specific endpoint that can be used to fetch groups. """ user_field = "accountId" if self.cloud else "key" - user_value = user["accountId"] if self.cloud else user["userKey"] + user_value = user_id # Server uses userKey (but calls it key during the API call), Cloud uses accountId user_query = f"{user_field}={quote(user_value)}"