Fix Sharepoint Folder Parsing (#3791)

This commit is contained in:
Yuhong Sun
2025-01-26 16:45:24 -08:00
committed by GitHub
parent 7a64a25ff4
commit 05ab94945b
3 changed files with 21 additions and 3 deletions

View File

@@ -6,6 +6,7 @@ from datetime import datetime
from datetime import timezone
from typing import Any
from typing import Optional
from urllib.parse import unquote
import msal # type: ignore
from office365.graph_client import GraphClient # type: ignore
@@ -82,8 +83,13 @@ class SharepointConnector(LoadConnector, PollConnector):
sites_index = parts.index("sites")
site_url = "/".join(parts[: sites_index + 2])
folder = (
parts[sites_index + 2] if len(parts) > sites_index + 2 else None
"/".join(unquote(part) for part in parts[sites_index + 2 :])
if len(parts) > sites_index + 2
else None
)
# Handling for new URL structure
if folder and folder.startswith("Shared Documents/"):
folder = folder[len("Shared Documents/") :]
site_data_list.append(
SiteData(url=site_url, folder=folder, sites=[], driveitems=[])
)
@@ -111,11 +117,19 @@ class SharepointConnector(LoadConnector, PollConnector):
query = query.filter(filter_str)
driveitems = query.execute_query()
if element.folder:
expected_path = f"/root:/{element.folder}"
filtered_driveitems = [
item
for item in driveitems
if element.folder in item.parent_reference.path
if item.parent_reference.path.endswith(expected_path)
]
if len(filtered_driveitems) == 0:
all_paths = [
item.parent_reference.path for item in driveitems
]
logger.warning(
f"Nothing found for folder '{expected_path}' in any of valid paths: {all_paths}"
)
element.driveitems.extend(filtered_driveitems)
else:
element.driveitems.extend(driveitems)