mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 20:24:32 +02:00
fixed group sync to account for changes in drive permissions (#3666)
* fixed group sync to account for changes in drive permissions * mypy * addressed * reeeeeeeee
This commit is contained in:
@@ -120,9 +120,12 @@ def _get_permissions_from_slim_doc(
|
|||||||
elif permission_type == "anyone":
|
elif permission_type == "anyone":
|
||||||
public = True
|
public = True
|
||||||
|
|
||||||
|
drive_id = permission_info.get("drive_id")
|
||||||
|
group_ids = group_emails | ({drive_id} if drive_id is not None else set())
|
||||||
|
|
||||||
return ExternalAccess(
|
return ExternalAccess(
|
||||||
external_user_emails=user_emails,
|
external_user_emails=user_emails,
|
||||||
external_user_group_ids=group_emails,
|
external_user_group_ids=group_ids,
|
||||||
is_public=public,
|
is_public=public,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@@ -1,16 +1,127 @@
|
|||||||
from ee.onyx.db.external_perm import ExternalUserGroup
|
from ee.onyx.db.external_perm import ExternalUserGroup
|
||||||
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
||||||
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
||||||
|
from onyx.connectors.google_utils.resources import AdminService
|
||||||
from onyx.connectors.google_utils.resources import get_admin_service
|
from onyx.connectors.google_utils.resources import get_admin_service
|
||||||
|
from onyx.connectors.google_utils.resources import get_drive_service
|
||||||
from onyx.db.models import ConnectorCredentialPair
|
from onyx.db.models import ConnectorCredentialPair
|
||||||
from onyx.utils.logger import setup_logger
|
from onyx.utils.logger import setup_logger
|
||||||
|
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_drive_members(
|
||||||
|
google_drive_connector: GoogleDriveConnector,
|
||||||
|
) -> dict[str, tuple[set[str], set[str]]]:
|
||||||
|
"""
|
||||||
|
This builds a map of drive ids to their members (group and user emails).
|
||||||
|
E.g. {
|
||||||
|
"drive_id_1": ({"group_email_1"}, {"user_email_1", "user_email_2"}),
|
||||||
|
"drive_id_2": ({"group_email_3"}, {"user_email_3"}),
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
drive_ids = google_drive_connector.get_all_drive_ids()
|
||||||
|
|
||||||
|
drive_id_to_members_map: dict[str, tuple[set[str], set[str]]] = {}
|
||||||
|
drive_service = get_drive_service(
|
||||||
|
google_drive_connector.creds,
|
||||||
|
google_drive_connector.primary_admin_email,
|
||||||
|
)
|
||||||
|
|
||||||
|
for drive_id in drive_ids:
|
||||||
|
group_emails: set[str] = set()
|
||||||
|
user_emails: set[str] = set()
|
||||||
|
for permission in execute_paginated_retrieval(
|
||||||
|
drive_service.permissions().list,
|
||||||
|
list_key="permissions",
|
||||||
|
fileId=drive_id,
|
||||||
|
fields="permissions(emailAddress, type)",
|
||||||
|
supportsAllDrives=True,
|
||||||
|
):
|
||||||
|
if permission["type"] == "group":
|
||||||
|
group_emails.add(permission["emailAddress"])
|
||||||
|
elif permission["type"] == "user":
|
||||||
|
user_emails.add(permission["emailAddress"])
|
||||||
|
drive_id_to_members_map[drive_id] = (group_emails, user_emails)
|
||||||
|
return drive_id_to_members_map
|
||||||
|
|
||||||
|
|
||||||
|
def _get_all_groups(
|
||||||
|
admin_service: AdminService,
|
||||||
|
google_domain: str,
|
||||||
|
) -> set[str]:
|
||||||
|
"""
|
||||||
|
This gets all the group emails.
|
||||||
|
"""
|
||||||
|
group_emails: set[str] = set()
|
||||||
|
for group in execute_paginated_retrieval(
|
||||||
|
admin_service.groups().list,
|
||||||
|
list_key="groups",
|
||||||
|
domain=google_domain,
|
||||||
|
fields="groups(email)",
|
||||||
|
):
|
||||||
|
group_emails.add(group["email"])
|
||||||
|
return group_emails
|
||||||
|
|
||||||
|
|
||||||
|
def _map_group_email_to_member_emails(
|
||||||
|
admin_service: AdminService,
|
||||||
|
group_emails: set[str],
|
||||||
|
) -> dict[str, set[str]]:
|
||||||
|
"""
|
||||||
|
This maps group emails to their member emails.
|
||||||
|
"""
|
||||||
|
group_to_member_map: dict[str, set[str]] = {}
|
||||||
|
for group_email in group_emails:
|
||||||
|
group_member_emails: set[str] = set()
|
||||||
|
for member in execute_paginated_retrieval(
|
||||||
|
admin_service.members().list,
|
||||||
|
list_key="members",
|
||||||
|
groupKey=group_email,
|
||||||
|
fields="members(email)",
|
||||||
|
):
|
||||||
|
group_member_emails.add(member["email"])
|
||||||
|
|
||||||
|
group_to_member_map[group_email] = group_member_emails
|
||||||
|
return group_to_member_map
|
||||||
|
|
||||||
|
|
||||||
|
def _build_onyx_groups(
|
||||||
|
drive_id_to_members_map: dict[str, tuple[set[str], set[str]]],
|
||||||
|
group_email_to_member_emails_map: dict[str, set[str]],
|
||||||
|
) -> list[ExternalUserGroup]:
|
||||||
|
onyx_groups: list[ExternalUserGroup] = []
|
||||||
|
|
||||||
|
# Convert all drive member definitions to onyx groups
|
||||||
|
# This is because having drive level access means you have
|
||||||
|
# irrevocable access to all the files in the drive.
|
||||||
|
for drive_id, (group_emails, user_emails) in drive_id_to_members_map.items():
|
||||||
|
all_member_emails: set[str] = user_emails
|
||||||
|
for group_email in group_emails:
|
||||||
|
all_member_emails.update(group_email_to_member_emails_map[group_email])
|
||||||
|
onyx_groups.append(
|
||||||
|
ExternalUserGroup(
|
||||||
|
id=drive_id,
|
||||||
|
user_emails=list(all_member_emails),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert all group member definitions to onyx groups
|
||||||
|
for group_email, member_emails in group_email_to_member_emails_map.items():
|
||||||
|
onyx_groups.append(
|
||||||
|
ExternalUserGroup(
|
||||||
|
id=group_email,
|
||||||
|
user_emails=list(member_emails),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return onyx_groups
|
||||||
|
|
||||||
|
|
||||||
def gdrive_group_sync(
|
def gdrive_group_sync(
|
||||||
cc_pair: ConnectorCredentialPair,
|
cc_pair: ConnectorCredentialPair,
|
||||||
) -> list[ExternalUserGroup]:
|
) -> list[ExternalUserGroup]:
|
||||||
|
# Initialize connector and build credential/service objects
|
||||||
google_drive_connector = GoogleDriveConnector(
|
google_drive_connector = GoogleDriveConnector(
|
||||||
**cc_pair.connector.connector_specific_config
|
**cc_pair.connector.connector_specific_config
|
||||||
)
|
)
|
||||||
@@ -19,34 +130,23 @@ def gdrive_group_sync(
|
|||||||
google_drive_connector.creds, google_drive_connector.primary_admin_email
|
google_drive_connector.creds, google_drive_connector.primary_admin_email
|
||||||
)
|
)
|
||||||
|
|
||||||
onyx_groups: list[ExternalUserGroup] = []
|
# Get all drive members
|
||||||
for group in execute_paginated_retrieval(
|
drive_id_to_members_map = _get_drive_members(google_drive_connector)
|
||||||
admin_service.groups().list,
|
|
||||||
list_key="groups",
|
|
||||||
domain=google_drive_connector.google_domain,
|
|
||||||
fields="groups(email)",
|
|
||||||
):
|
|
||||||
# The id is the group email
|
|
||||||
group_email = group["email"]
|
|
||||||
|
|
||||||
# Gather group member emails
|
# Get all group emails
|
||||||
group_member_emails: list[str] = []
|
all_group_emails = _get_all_groups(
|
||||||
for member in execute_paginated_retrieval(
|
admin_service, google_drive_connector.google_domain
|
||||||
admin_service.members().list,
|
|
||||||
list_key="members",
|
|
||||||
groupKey=group_email,
|
|
||||||
fields="members(email)",
|
|
||||||
):
|
|
||||||
group_member_emails.append(member["email"])
|
|
||||||
|
|
||||||
if not group_member_emails:
|
|
||||||
continue
|
|
||||||
|
|
||||||
onyx_groups.append(
|
|
||||||
ExternalUserGroup(
|
|
||||||
id=group_email,
|
|
||||||
user_emails=list(group_member_emails),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Map group emails to their members
|
||||||
|
group_email_to_member_emails_map = _map_group_email_to_member_emails(
|
||||||
|
admin_service, all_group_emails
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert the maps to onyx groups
|
||||||
|
onyx_groups = _build_onyx_groups(
|
||||||
|
drive_id_to_members_map=drive_id_to_members_map,
|
||||||
|
group_email_to_member_emails_map=group_email_to_member_emails_map,
|
||||||
)
|
)
|
||||||
|
|
||||||
return onyx_groups
|
return onyx_groups
|
||||||
|
@@ -258,7 +258,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
|||||||
user_emails.append(email)
|
user_emails.append(email)
|
||||||
return user_emails
|
return user_emails
|
||||||
|
|
||||||
def _get_all_drive_ids(self) -> set[str]:
|
def get_all_drive_ids(self) -> set[str]:
|
||||||
primary_drive_service = get_drive_service(
|
primary_drive_service = get_drive_service(
|
||||||
creds=self.creds,
|
creds=self.creds,
|
||||||
user_email=self.primary_admin_email,
|
user_email=self.primary_admin_email,
|
||||||
@@ -353,7 +353,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
|||||||
) -> Iterator[GoogleDriveFileType]:
|
) -> Iterator[GoogleDriveFileType]:
|
||||||
all_org_emails: list[str] = self._get_all_user_emails()
|
all_org_emails: list[str] = self._get_all_user_emails()
|
||||||
|
|
||||||
all_drive_ids: set[str] = self._get_all_drive_ids()
|
all_drive_ids: set[str] = self.get_all_drive_ids()
|
||||||
|
|
||||||
drive_ids_to_retrieve: set[str] = set()
|
drive_ids_to_retrieve: set[str] = set()
|
||||||
folder_ids_to_retrieve: set[str] = set()
|
folder_ids_to_retrieve: set[str] = set()
|
||||||
@@ -437,7 +437,7 @@ class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector):
|
|||||||
# If all 3 are true, we already yielded from get_all_files_for_oauth
|
# If all 3 are true, we already yielded from get_all_files_for_oauth
|
||||||
return
|
return
|
||||||
|
|
||||||
all_drive_ids = self._get_all_drive_ids()
|
all_drive_ids = self.get_all_drive_ids()
|
||||||
drive_ids_to_retrieve: set[str] = set()
|
drive_ids_to_retrieve: set[str] = set()
|
||||||
folder_ids_to_retrieve: set[str] = set()
|
folder_ids_to_retrieve: set[str] = set()
|
||||||
if self._requested_shared_drive_ids or self._requested_folder_ids:
|
if self._requested_shared_drive_ids or self._requested_folder_ids:
|
||||||
|
@@ -252,6 +252,7 @@ def build_slim_document(file: GoogleDriveFileType) -> SlimDocument | None:
|
|||||||
id=file["webViewLink"],
|
id=file["webViewLink"],
|
||||||
perm_sync_data={
|
perm_sync_data={
|
||||||
"doc_id": file.get("id"),
|
"doc_id": file.get("id"),
|
||||||
|
"drive_id": file.get("driveId"),
|
||||||
"permissions": file.get("permissions", []),
|
"permissions": file.get("permissions", []),
|
||||||
"permission_ids": file.get("permissionIds", []),
|
"permission_ids": file.get("permissionIds", []),
|
||||||
"name": file.get("name"),
|
"name": file.get("name"),
|
||||||
|
@@ -19,7 +19,7 @@ FILE_FIELDS = (
|
|||||||
"shortcutDetails, owners(emailAddress), size)"
|
"shortcutDetails, owners(emailAddress), size)"
|
||||||
)
|
)
|
||||||
SLIM_FILE_FIELDS = (
|
SLIM_FILE_FIELDS = (
|
||||||
"nextPageToken, files(mimeType, id, name, permissions(emailAddress, type), "
|
"nextPageToken, files(mimeType, driveId, id, name, permissions(emailAddress, type), "
|
||||||
"permissionIds, webViewLink, owners(emailAddress))"
|
"permissionIds, webViewLink, owners(emailAddress))"
|
||||||
)
|
)
|
||||||
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
|
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"
|
||||||
|
Reference in New Issue
Block a user