Fix Sharepoint Folder Parsing (#3791)

This commit is contained in:
Yuhong Sun 2025-01-26 16:45:24 -08:00 committed by GitHub
parent 7a64a25ff4
commit 05ab94945b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 21 additions and 3 deletions

View File

@ -6,6 +6,7 @@ from datetime import datetime
from datetime import timezone from datetime import timezone
from typing import Any from typing import Any
from typing import Optional from typing import Optional
from urllib.parse import unquote
import msal # type: ignore import msal # type: ignore
from office365.graph_client import GraphClient # type: ignore from office365.graph_client import GraphClient # type: ignore
@ -82,8 +83,13 @@ class SharepointConnector(LoadConnector, PollConnector):
sites_index = parts.index("sites") sites_index = parts.index("sites")
site_url = "/".join(parts[: sites_index + 2]) site_url = "/".join(parts[: sites_index + 2])
folder = ( folder = (
parts[sites_index + 2] if len(parts) > sites_index + 2 else None "/".join(unquote(part) for part in parts[sites_index + 2 :])
if len(parts) > sites_index + 2
else None
) )
# Handling for new URL structure
if folder and folder.startswith("Shared Documents/"):
folder = folder[len("Shared Documents/") :]
site_data_list.append( site_data_list.append(
SiteData(url=site_url, folder=folder, sites=[], driveitems=[]) SiteData(url=site_url, folder=folder, sites=[], driveitems=[])
) )
@ -111,11 +117,19 @@ class SharepointConnector(LoadConnector, PollConnector):
query = query.filter(filter_str) query = query.filter(filter_str)
driveitems = query.execute_query() driveitems = query.execute_query()
if element.folder: if element.folder:
expected_path = f"/root:/{element.folder}"
filtered_driveitems = [ filtered_driveitems = [
item item
for item in driveitems for item in driveitems
if element.folder in item.parent_reference.path if item.parent_reference.path.endswith(expected_path)
] ]
if len(filtered_driveitems) == 0:
all_paths = [
item.parent_reference.path for item in driveitems
]
logger.warning(
f"Nothing found for folder '{expected_path}' in any of valid paths: {all_paths}"
)
element.driveitems.extend(filtered_driveitems) element.driveitems.extend(filtered_driveitems)
else: else:
element.driveitems.extend(driveitems) element.driveitems.extend(driveitems)

View File

@ -77,7 +77,9 @@ export function LabelWithTooltip({
} }
export function SubLabel({ children }: { children: string | JSX.Element }) { export function SubLabel({ children }: { children: string | JSX.Element }) {
return <div className="text-xs text-subtle">{children}</div>; return (
<div className="text-xs text-subtle whitespace-pre-line">{children}</div>
);
} }
export function ManualErrorMessage({ children }: { children: string }) { export function ManualErrorMessage({ children }: { children: string }) {

View File

@ -481,7 +481,9 @@ Hint: Use the singular form of the object name (e.g., 'Opportunity' instead of '
name: "sites", name: "sites",
optional: true, optional: true,
description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required). description: `• If no sites are specified, all sites in your organization will be indexed (Sites.Read.All permission required).
Specifying 'https://onyxai.sharepoint.com/sites/support' for example will only index documents within this site. Specifying 'https://onyxai.sharepoint.com/sites/support' for example will only index documents within this site.
Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example will only index documents within this folder. Specifying 'https://onyxai.sharepoint.com/sites/support/subfolder' for example will only index documents within this folder.
`, `,
}, },