full drive perm sync

This commit is contained in:
Evan Lohn
2025-05-19 19:31:29 -07:00
committed by Chris Weaver
parent 10bc072b4b
commit e0f5b95cfc
3 changed files with 32 additions and 13 deletions

View File

@@ -619,7 +619,6 @@ class GoogleDriveConnector(SlimConnector, CheckpointedConnector[GoogleDriveCheck
if checkpoint.completion_stage == DriveRetrievalStage.USER_EMAILS:
all_org_emails: list[str] = self._get_all_user_emails()
if not is_slim:
checkpoint.user_emails = all_org_emails
checkpoint.completion_stage = DriveRetrievalStage.DRIVE_IDS
else:
@@ -730,7 +729,6 @@ class GoogleDriveConnector(SlimConnector, CheckpointedConnector[GoogleDriveCheck
elif self.include_shared_drives:
sorted_drive_ids = sorted(all_drive_ids)
if not is_slim:
checkpoint.drive_ids_to_retrieve = sorted_drive_ids
checkpoint.folder_ids_to_retrieve = sorted_folder_ids
checkpoint.completion_stage = next_stage
@@ -908,9 +906,6 @@ class GoogleDriveConnector(SlimConnector, CheckpointedConnector[GoogleDriveCheck
start=start,
end=end,
)
if is_slim:
yield from drive_files
return
for file in drive_files:
logger.debug(
@@ -1146,13 +1141,14 @@ class GoogleDriveConnector(SlimConnector, CheckpointedConnector[GoogleDriveCheck
def _extract_slim_docs_from_google_drive(
self,
checkpoint: GoogleDriveCheckpoint,
start: SecondsSinceUnixEpoch | None = None,
end: SecondsSinceUnixEpoch | None = None,
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
slim_batch = []
for file in self._fetch_drive_items(
checkpoint=self.build_dummy_checkpoint(),
checkpoint=checkpoint,
is_slim=True,
start=start,
end=end,
@@ -1179,9 +1175,15 @@ class GoogleDriveConnector(SlimConnector, CheckpointedConnector[GoogleDriveCheck
callback: IndexingHeartbeatInterface | None = None,
) -> GenerateSlimDocumentOutput:
try:
checkpoint = self.build_dummy_checkpoint()
while checkpoint.completion_stage != DriveRetrievalStage.DONE:
yield from self._extract_slim_docs_from_google_drive(
start, end, callback=callback
checkpoint=checkpoint,
start=start,
end=end,
callback=callback,
)
except Exception as e:
if MISSING_SCOPES_ERROR_STR in str(e):
raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e

View File

@@ -30,7 +30,7 @@ FILE_FIELDS = (
)
SLIM_FILE_FIELDS = (
f"nextPageToken, files(mimeType, driveId, id, name, {PERMISSION_FULL_DESCRIPTION}, "
"permissionIds, webViewLink, owners(emailAddress))"
"permissionIds, webViewLink, owners(emailAddress), modifiedTime)"
)
FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)"

View File

@@ -154,4 +154,21 @@ def test_gdrive_perm_sync_with_real_data(
f"but is accessible to {emails_with_access}. Raw result: {doc_to_raw_result_mapping[doc_id]} "
)
# Verify that we checked every file in ACCESS_MAPPING
all_expected_files = set()
for file_ids in ACCESS_MAPPING.values():
all_expected_files.update(file_ids)
checked_file_ids = {
url_to_id_mapping[doc_id]
for doc_id in doc_to_email_mapping
if doc_id in url_to_id_mapping
}
assert all_expected_files == checked_file_ids, (
f"Not all expected files were checked. "
f"Missing files: {all_expected_files - checked_file_ids}, "
f"Extra files checked: {checked_file_ids - all_expected_files}"
)
print(f"Checked permissions for {checked_files} files from drive_id_mapping.json")