mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-04-08 20:08:36 +02:00
Small confluence group sync tweaks (#4033)
This commit is contained in:
parent
2b2ba5478c
commit
7324273233
@ -14,30 +14,24 @@ def _build_group_member_email_map(
|
||||
confluence_client: OnyxConfluence, cc_pair_id: int
|
||||
) -> dict[str, set[str]]:
|
||||
group_member_emails: dict[str, set[str]] = {}
|
||||
for user_result in confluence_client.paginated_cql_user_retrieval():
|
||||
logger.debug(f"Processing groups for user: {user_result}")
|
||||
for user in confluence_client.paginated_cql_user_retrieval():
|
||||
logger.debug(f"Processing groups for user: {user}")
|
||||
|
||||
user = user_result.get("user", {})
|
||||
if not user:
|
||||
msg = f"user result missing user field: {user_result}"
|
||||
emit_background_error(msg, cc_pair_id=cc_pair_id)
|
||||
logger.error(msg)
|
||||
continue
|
||||
|
||||
email = user.get("email")
|
||||
email = user.email
|
||||
if not email:
|
||||
# This field is only present in Confluence Server
|
||||
user_name = user.get("username")
|
||||
user_name = user.username
|
||||
# If it is present, try to get the email using a Server-specific method
|
||||
if user_name:
|
||||
email = get_user_email_from_username__server(
|
||||
confluence_client=confluence_client,
|
||||
user_name=user_name,
|
||||
)
|
||||
|
||||
if not email:
|
||||
# If we still don't have an email, skip this user
|
||||
msg = f"user result missing email field: {user_result}"
|
||||
if user.get("type") == "app":
|
||||
msg = f"user result missing email field: {user}"
|
||||
if user.type == "app":
|
||||
logger.warning(msg)
|
||||
else:
|
||||
emit_background_error(msg, cc_pair_id=cc_pair_id)
|
||||
@ -45,7 +39,7 @@ def _build_group_member_email_map(
|
||||
continue
|
||||
|
||||
all_users_groups: set[str] = set()
|
||||
for group in confluence_client.paginated_groups_by_user_retrieval(user):
|
||||
for group in confluence_client.paginated_groups_by_user_retrieval(user.user_id):
|
||||
# group name uniqueness is enforced by Confluence, so we can use it as a group ID
|
||||
group_id = group["name"]
|
||||
group_member_emails.setdefault(group_id, set()).add(email)
|
||||
|
@ -384,6 +384,7 @@ def connector_external_group_sync_generator_task(
|
||||
logger.info(
|
||||
f"Syncing {len(external_user_groups)} external user groups for {source_type}"
|
||||
)
|
||||
logger.debug(f"New external user groups: {external_user_groups}")
|
||||
|
||||
replace_user__ext_group_for_cc_pair(
|
||||
db_session=db_session,
|
||||
|
@ -8,6 +8,7 @@ from typing import TypeVar
|
||||
from urllib.parse import quote
|
||||
|
||||
from atlassian import Confluence # type:ignore
|
||||
from pydantic import BaseModel
|
||||
from requests import HTTPError
|
||||
|
||||
from onyx.utils.logger import setup_logger
|
||||
@ -29,6 +30,16 @@ class ConfluenceRateLimitError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class ConfluenceUser(BaseModel):
|
||||
user_id: str # accountId in Cloud, userKey in Server
|
||||
username: str | None # Confluence Cloud doesn't give usernames
|
||||
display_name: str
|
||||
# Confluence Data Center doesn't give email back by default,
|
||||
# have to fetch it with a different endpoint
|
||||
email: str | None
|
||||
type: str
|
||||
|
||||
|
||||
def _handle_http_error(e: HTTPError, attempt: int) -> int:
|
||||
MIN_DELAY = 2
|
||||
MAX_DELAY = 60
|
||||
@ -275,21 +286,95 @@ class OnyxConfluence(Confluence):
|
||||
self,
|
||||
expand: str | None = None,
|
||||
limit: int | None = None,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
) -> Iterator[ConfluenceUser]:
|
||||
"""
|
||||
The search/user endpoint can be used to fetch users.
|
||||
It's a seperate endpoint from the content/search endpoint used only for users.
|
||||
Otherwise it's very similar to the content/search endpoint.
|
||||
"""
|
||||
cql = "type=user"
|
||||
url = "rest/api/search/user" if self.cloud else "rest/api/search"
|
||||
expand_string = f"&expand={expand}" if expand else ""
|
||||
url += f"?cql={cql}{expand_string}"
|
||||
yield from self._paginate_url(url, limit)
|
||||
if self.cloud:
|
||||
cql = "type=user"
|
||||
url = "rest/api/search/user"
|
||||
expand_string = f"&expand={expand}" if expand else ""
|
||||
url += f"?cql={cql}{expand_string}"
|
||||
for user_result in self._paginate_url(url, limit):
|
||||
# Example response:
|
||||
# {
|
||||
# 'user': {
|
||||
# 'type': 'known',
|
||||
# 'accountId': '712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
|
||||
# 'accountType': 'atlassian',
|
||||
# 'email': 'chris@danswer.ai',
|
||||
# 'publicName': 'Chris Weaver',
|
||||
# 'profilePicture': {
|
||||
# 'path': '/wiki/aa-avatar/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
|
||||
# 'width': 48,
|
||||
# 'height': 48,
|
||||
# 'isDefault': False
|
||||
# },
|
||||
# 'displayName': 'Chris Weaver',
|
||||
# 'isExternalCollaborator': False,
|
||||
# '_expandable': {
|
||||
# 'operations': '',
|
||||
# 'personalSpace': ''
|
||||
# },
|
||||
# '_links': {
|
||||
# 'self': 'https://danswerai.atlassian.net/wiki/rest/api/user?accountId=712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d'
|
||||
# }
|
||||
# },
|
||||
# 'title': 'Chris Weaver',
|
||||
# 'excerpt': '',
|
||||
# 'url': '/people/712020:35e60fbb-d0f3-4c91-b8c1-f2dd1d69462d',
|
||||
# 'breadcrumbs': [],
|
||||
# 'entityType': 'user',
|
||||
# 'iconCssClass': 'aui-icon content-type-profile',
|
||||
# 'lastModified': '2025-02-18T04:08:03.579Z',
|
||||
# 'score': 0.0
|
||||
# }
|
||||
user = user_result["user"]
|
||||
yield ConfluenceUser(
|
||||
user_id=user["accountId"],
|
||||
username=None,
|
||||
display_name=user["displayName"],
|
||||
email=user.get("email"),
|
||||
type=user["accountType"],
|
||||
)
|
||||
else:
|
||||
# https://developer.atlassian.com/server/confluence/rest/v900/api-group-user/#api-rest-api-user-list-get
|
||||
# ^ is only available on data center deployments
|
||||
# Example response:
|
||||
# [
|
||||
# {
|
||||
# 'type': 'known',
|
||||
# 'username': 'admin',
|
||||
# 'userKey': '40281082950c5fe901950c61c55d0000',
|
||||
# 'profilePicture': {
|
||||
# 'path': '/images/icons/profilepics/default.svg',
|
||||
# 'width': 48,
|
||||
# 'height': 48,
|
||||
# 'isDefault': True
|
||||
# },
|
||||
# 'displayName': 'Admin Test',
|
||||
# '_links': {
|
||||
# 'self': 'http://localhost:8090/rest/api/user?key=40281082950c5fe901950c61c55d0000'
|
||||
# },
|
||||
# '_expandable': {
|
||||
# 'status': ''
|
||||
# }
|
||||
# }
|
||||
# ]
|
||||
for user in self._paginate_url("rest/api/user/list", limit):
|
||||
yield ConfluenceUser(
|
||||
user_id=user["userKey"],
|
||||
username=user["username"],
|
||||
display_name=user["displayName"],
|
||||
email=None,
|
||||
type=user.get("type", "user"),
|
||||
)
|
||||
|
||||
def paginated_groups_by_user_retrieval(
|
||||
self,
|
||||
user: dict[str, Any],
|
||||
user_id: str, # accountId in Cloud, userKey in Server
|
||||
limit: int | None = None,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
"""
|
||||
@ -297,7 +382,7 @@ class OnyxConfluence(Confluence):
|
||||
It's a confluence specific endpoint that can be used to fetch groups.
|
||||
"""
|
||||
user_field = "accountId" if self.cloud else "key"
|
||||
user_value = user["accountId"] if self.cloud else user["userKey"]
|
||||
user_value = user_id
|
||||
# Server uses userKey (but calls it key during the API call), Cloud uses accountId
|
||||
user_query = f"{user_field}={quote(user_value)}"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user