mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-04 08:50:24 +02:00
160 lines
5.5 KiB
Python
160 lines
5.5 KiB
Python
from datetime import datetime
|
|
from datetime import timezone
|
|
from typing import Any
|
|
|
|
from onyx.access.models import DocExternalAccess
|
|
from onyx.access.models import ExternalAccess
|
|
from onyx.connectors.google_drive.connector import GoogleDriveConnector
|
|
from onyx.connectors.google_utils.google_utils import execute_paginated_retrieval
|
|
from onyx.connectors.google_utils.resources import get_drive_service
|
|
from onyx.connectors.interfaces import GenerateSlimDocumentOutput
|
|
from onyx.connectors.models import SlimDocument
|
|
from onyx.db.models import ConnectorCredentialPair
|
|
from onyx.utils.logger import setup_logger
|
|
|
|
logger = setup_logger()
|
|
|
|
_PERMISSION_ID_PERMISSION_MAP: dict[str, dict[str, Any]] = {}
|
|
|
|
|
|
def _get_slim_doc_generator(
|
|
cc_pair: ConnectorCredentialPair,
|
|
google_drive_connector: GoogleDriveConnector,
|
|
) -> GenerateSlimDocumentOutput:
|
|
current_time = datetime.now(timezone.utc)
|
|
start_time = (
|
|
cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp()
|
|
if cc_pair.last_time_perm_sync
|
|
else 0.0
|
|
)
|
|
|
|
return google_drive_connector.retrieve_all_slim_documents(
|
|
start=start_time, end=current_time.timestamp()
|
|
)
|
|
|
|
|
|
def _fetch_permissions_for_permission_ids(
|
|
google_drive_connector: GoogleDriveConnector,
|
|
permission_ids: list[str],
|
|
permission_info: dict[str, Any],
|
|
) -> list[dict[str, Any]]:
|
|
doc_id = permission_info.get("doc_id")
|
|
if not permission_info or not doc_id:
|
|
return []
|
|
|
|
# Check cache first for all permission IDs
|
|
permissions = [
|
|
_PERMISSION_ID_PERMISSION_MAP[pid]
|
|
for pid in permission_ids
|
|
if pid in _PERMISSION_ID_PERMISSION_MAP
|
|
]
|
|
|
|
# If we found all permissions in cache, return them
|
|
if len(permissions) == len(permission_ids):
|
|
return permissions
|
|
|
|
owner_email = permission_info.get("owner_email")
|
|
drive_service = get_drive_service(
|
|
creds=google_drive_connector.creds,
|
|
user_email=(owner_email or google_drive_connector.primary_admin_email),
|
|
)
|
|
|
|
# Otherwise, fetch all permissions and update cache
|
|
fetched_permissions = execute_paginated_retrieval(
|
|
retrieval_function=drive_service.permissions().list,
|
|
list_key="permissions",
|
|
fileId=doc_id,
|
|
fields="permissions(id, emailAddress, type, domain)",
|
|
supportsAllDrives=True,
|
|
)
|
|
|
|
permissions_for_doc_id = []
|
|
# Update cache and return all permissions
|
|
for permission in fetched_permissions:
|
|
permissions_for_doc_id.append(permission)
|
|
_PERMISSION_ID_PERMISSION_MAP[permission["id"]] = permission
|
|
|
|
return permissions_for_doc_id
|
|
|
|
|
|
def _get_permissions_from_slim_doc(
|
|
google_drive_connector: GoogleDriveConnector,
|
|
slim_doc: SlimDocument,
|
|
) -> ExternalAccess:
|
|
permission_info = slim_doc.perm_sync_data or {}
|
|
|
|
permissions_list = permission_info.get("permissions", [])
|
|
if not permissions_list:
|
|
if permission_ids := permission_info.get("permission_ids"):
|
|
permissions_list = _fetch_permissions_for_permission_ids(
|
|
google_drive_connector=google_drive_connector,
|
|
permission_ids=permission_ids,
|
|
permission_info=permission_info,
|
|
)
|
|
if not permissions_list:
|
|
logger.warning(f"No permissions found for document {slim_doc.id}")
|
|
return ExternalAccess(
|
|
external_user_emails=set(),
|
|
external_user_group_ids=set(),
|
|
is_public=False,
|
|
)
|
|
|
|
company_domain = google_drive_connector.google_domain
|
|
user_emails: set[str] = set()
|
|
group_emails: set[str] = set()
|
|
public = False
|
|
for permission in permissions_list:
|
|
permission_type = permission["type"]
|
|
if permission_type == "user":
|
|
user_emails.add(permission["emailAddress"])
|
|
elif permission_type == "group":
|
|
group_emails.add(permission["emailAddress"])
|
|
elif permission_type == "domain" and company_domain:
|
|
if permission.get("domain") == company_domain:
|
|
public = True
|
|
else:
|
|
logger.warning(
|
|
"Permission is type domain but does not match company domain:"
|
|
f"\n {permission}"
|
|
)
|
|
elif permission_type == "anyone":
|
|
public = True
|
|
|
|
return ExternalAccess(
|
|
external_user_emails=user_emails,
|
|
external_user_group_ids=group_emails,
|
|
is_public=public,
|
|
)
|
|
|
|
|
|
def gdrive_doc_sync(
|
|
cc_pair: ConnectorCredentialPair,
|
|
) -> list[DocExternalAccess]:
|
|
"""
|
|
Adds the external permissions to the documents in postgres
|
|
if the document doesn't already exists in postgres, we create
|
|
it in postgres so that when it gets created later, the permissions are
|
|
already populated
|
|
"""
|
|
google_drive_connector = GoogleDriveConnector(
|
|
**cc_pair.connector.connector_specific_config
|
|
)
|
|
google_drive_connector.load_credentials(cc_pair.credential.credential_json)
|
|
|
|
slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector)
|
|
|
|
document_external_accesses = []
|
|
for slim_doc_batch in slim_doc_generator:
|
|
for slim_doc in slim_doc_batch:
|
|
ext_access = _get_permissions_from_slim_doc(
|
|
google_drive_connector=google_drive_connector,
|
|
slim_doc=slim_doc,
|
|
)
|
|
document_external_accesses.append(
|
|
DocExternalAccess(
|
|
external_access=ext_access,
|
|
doc_id=slim_doc.id,
|
|
)
|
|
)
|
|
return document_external_accesses
|