google drive permission sync cleanup (#2749)

This commit is contained in:
hagen-danswer
2024-10-09 14:17:22 -07:00
committed by GitHub
parent 1cbc067483
commit 804de3248e

View File

@@ -14,7 +14,6 @@ from danswer.connectors.factory import instantiate_connector
from danswer.connectors.google_drive.connector_auth import (
get_google_drive_creds,
)
from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.models import InputType
from danswer.db.models import ConnectorCredentialPair
@@ -72,25 +71,6 @@ def _fetch_permissions_paginated(
) -> Iterator[dict[str, Any]]:
next_token = None
# Check if the file is trashed
# Returning nothing here will cause the external permissions to
# be empty which will get written to vespa (failing shut)
try:
file_metadata = add_retries(
lambda: drive_service.files()
.get(fileId=drive_file_id, fields="id, trashed")
.execute()
)()
except HttpError as e:
if e.resp.status == 404 or e.resp.status == 403:
return
logger.error(f"Failed to fetch permissions: {e}")
raise
if file_metadata.get("trashed", False):
logger.debug(f"File with ID {drive_file_id} is trashed")
return
# Get paginated permissions for the file id
while True:
try:
@@ -99,7 +79,7 @@ def _fetch_permissions_paginated(
drive_service.permissions()
.list(
fileId=drive_file_id,
fields="permissions(id, emailAddress, role, type, domain)",
fields="permissions(emailAddress, type, domain)",
supportsAllDrives=True,
pageToken=next_token,
)
@@ -107,10 +87,17 @@ def _fetch_permissions_paginated(
)
)()
except HttpError as e:
if e.resp.status == 404 or e.resp.status == 403:
if e.resp.status == 404:
logger.warning(f"Document with id {drive_file_id} not found: {e}")
break
logger.error(f"Failed to fetch permissions: {e}")
raise
elif e.resp.status == 403:
logger.warning(
f"Access denied for retrieving document permissions: {e}"
)
break
else:
logger.error(f"Failed to fetch permissions: {e}")
raise
for permission in permissions_resp.get("permissions", []):
yield permission
@@ -123,12 +110,12 @@ def _fetch_permissions_paginated(
def _fetch_google_permissions_for_document_id(
db_session: Session,
drive_file_id: str,
raw_credentials_json: dict[str, str],
credentials_json: dict[str, str],
company_google_domains: list[str],
) -> ExternalAccess:
# Authenticate and construct service
google_drive_creds, _ = get_google_drive_creds(
raw_credentials_json, scopes=FETCH_PERMISSIONS_SCOPES
credentials_json,
)
if not google_drive_creds.valid:
raise ValueError("Invalid Google Drive credentials")
@@ -187,7 +174,7 @@ def gdrive_doc_sync(
ext_access = _fetch_google_permissions_for_document_id(
db_session=db_session,
drive_file_id=doc_additional_info,
raw_credentials_json=cc_pair.credential.credential_json,
credentials_json=cc_pair.credential.credential_json,
company_google_domains=[
cast(dict[str, str], sync_details)["company_domain"]
],