mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-27 12:29:41 +02:00
google drive permission sync cleanup (#2749)
This commit is contained in:
@@ -14,7 +14,6 @@ from danswer.connectors.factory import instantiate_connector
|
|||||||
from danswer.connectors.google_drive.connector_auth import (
|
from danswer.connectors.google_drive.connector_auth import (
|
||||||
get_google_drive_creds,
|
get_google_drive_creds,
|
||||||
)
|
)
|
||||||
from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES
|
|
||||||
from danswer.connectors.interfaces import PollConnector
|
from danswer.connectors.interfaces import PollConnector
|
||||||
from danswer.connectors.models import InputType
|
from danswer.connectors.models import InputType
|
||||||
from danswer.db.models import ConnectorCredentialPair
|
from danswer.db.models import ConnectorCredentialPair
|
||||||
@@ -72,25 +71,6 @@ def _fetch_permissions_paginated(
|
|||||||
) -> Iterator[dict[str, Any]]:
|
) -> Iterator[dict[str, Any]]:
|
||||||
next_token = None
|
next_token = None
|
||||||
|
|
||||||
# Check if the file is trashed
|
|
||||||
# Returning nothing here will cause the external permissions to
|
|
||||||
# be empty which will get written to vespa (failing shut)
|
|
||||||
try:
|
|
||||||
file_metadata = add_retries(
|
|
||||||
lambda: drive_service.files()
|
|
||||||
.get(fileId=drive_file_id, fields="id, trashed")
|
|
||||||
.execute()
|
|
||||||
)()
|
|
||||||
except HttpError as e:
|
|
||||||
if e.resp.status == 404 or e.resp.status == 403:
|
|
||||||
return
|
|
||||||
logger.error(f"Failed to fetch permissions: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
if file_metadata.get("trashed", False):
|
|
||||||
logger.debug(f"File with ID {drive_file_id} is trashed")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get paginated permissions for the file id
|
# Get paginated permissions for the file id
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
@@ -99,7 +79,7 @@ def _fetch_permissions_paginated(
|
|||||||
drive_service.permissions()
|
drive_service.permissions()
|
||||||
.list(
|
.list(
|
||||||
fileId=drive_file_id,
|
fileId=drive_file_id,
|
||||||
fields="permissions(id, emailAddress, role, type, domain)",
|
fields="permissions(emailAddress, type, domain)",
|
||||||
supportsAllDrives=True,
|
supportsAllDrives=True,
|
||||||
pageToken=next_token,
|
pageToken=next_token,
|
||||||
)
|
)
|
||||||
@@ -107,10 +87,17 @@ def _fetch_permissions_paginated(
|
|||||||
)
|
)
|
||||||
)()
|
)()
|
||||||
except HttpError as e:
|
except HttpError as e:
|
||||||
if e.resp.status == 404 or e.resp.status == 403:
|
if e.resp.status == 404:
|
||||||
|
logger.warning(f"Document with id {drive_file_id} not found: {e}")
|
||||||
break
|
break
|
||||||
logger.error(f"Failed to fetch permissions: {e}")
|
elif e.resp.status == 403:
|
||||||
raise
|
logger.warning(
|
||||||
|
f"Access denied for retrieving document permissions: {e}"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to fetch permissions: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
for permission in permissions_resp.get("permissions", []):
|
for permission in permissions_resp.get("permissions", []):
|
||||||
yield permission
|
yield permission
|
||||||
@@ -123,12 +110,12 @@ def _fetch_permissions_paginated(
|
|||||||
def _fetch_google_permissions_for_document_id(
|
def _fetch_google_permissions_for_document_id(
|
||||||
db_session: Session,
|
db_session: Session,
|
||||||
drive_file_id: str,
|
drive_file_id: str,
|
||||||
raw_credentials_json: dict[str, str],
|
credentials_json: dict[str, str],
|
||||||
company_google_domains: list[str],
|
company_google_domains: list[str],
|
||||||
) -> ExternalAccess:
|
) -> ExternalAccess:
|
||||||
# Authenticate and construct service
|
# Authenticate and construct service
|
||||||
google_drive_creds, _ = get_google_drive_creds(
|
google_drive_creds, _ = get_google_drive_creds(
|
||||||
raw_credentials_json, scopes=FETCH_PERMISSIONS_SCOPES
|
credentials_json,
|
||||||
)
|
)
|
||||||
if not google_drive_creds.valid:
|
if not google_drive_creds.valid:
|
||||||
raise ValueError("Invalid Google Drive credentials")
|
raise ValueError("Invalid Google Drive credentials")
|
||||||
@@ -187,7 +174,7 @@ def gdrive_doc_sync(
|
|||||||
ext_access = _fetch_google_permissions_for_document_id(
|
ext_access = _fetch_google_permissions_for_document_id(
|
||||||
db_session=db_session,
|
db_session=db_session,
|
||||||
drive_file_id=doc_additional_info,
|
drive_file_id=doc_additional_info,
|
||||||
raw_credentials_json=cc_pair.credential.credential_json,
|
credentials_json=cc_pair.credential.credential_json,
|
||||||
company_google_domains=[
|
company_google_domains=[
|
||||||
cast(dict[str, str], sync_details)["company_domain"]
|
cast(dict[str, str], sync_details)["company_domain"]
|
||||||
],
|
],
|
||||||
|
Reference in New Issue
Block a user