From ec93ad9e6d08cc7d3021731b79c9269d2ca17aac Mon Sep 17 00:00:00 2001 From: Weves Date: Thu, 25 Jan 2024 16:26:54 -0800 Subject: [PATCH] Sharepoint fixes --- .../connectors/sharepoint/connector.py | 4 +- backend/requirements/default.txt | 2 + .../app/admin/connectors/sharepoint/page.tsx | 76 +++++++++---------- web/src/lib/sources.ts | 10 +-- web/src/lib/types.ts | 1 - 5 files changed, 46 insertions(+), 47 deletions(-) diff --git a/backend/danswer/connectors/sharepoint/connector.py b/backend/danswer/connectors/sharepoint/connector.py index f4887bd8c..56ac2829e 100644 --- a/backend/danswer/connectors/sharepoint/connector.py +++ b/backend/danswer/connectors/sharepoint/connector.py @@ -76,7 +76,7 @@ def get_text_from_txt_driveitem(driveitem_object: DriveItem) -> str: return text_string -def get_text_from_pptx_driveitem(driveitem_object: DriveItem): +def get_text_from_pptx_driveitem(driveitem_object: DriveItem) -> str: file_content = driveitem_object.get_content().execute_query().value pptx_stream = io.BytesIO(file_content) with tempfile.NamedTemporaryFile() as temp: @@ -235,7 +235,7 @@ class SharepointConnector(LoadConnector, PollConnector): elif driveitem_name.endswith(".xlsx"): driveitem_text = get_text_from_xlsx_driveitem(driveitem_object) elif driveitem_name.endswith(".pptx"): - driveitem_text = get_text_from_xlsx_driveitem(driveitem_object) + driveitem_text = get_text_from_pptx_driveitem(driveitem_object) elif is_text_file_extension(driveitem_name): driveitem_text = get_text_from_txt_driveitem(driveitem_object) diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index b48c10302..6b3880ae4 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -25,8 +25,10 @@ langchain==0.0.340 litellm==1.7.5 llama-index==0.9.8 Mako==1.2.4 +msal==1.26.0 nltk==3.8.1 docx2txt==0.8 +Office365-REST-Python-Client==2.5.4 oauthlib==3.2.2 openai==1.3.5 openpyxl==3.1.2 diff --git a/web/src/app/admin/connectors/sharepoint/page.tsx b/web/src/app/admin/connectors/sharepoint/page.tsx index e7fb1a00b..662bf1a44 100644 --- a/web/src/app/admin/connectors/sharepoint/page.tsx +++ b/web/src/app/admin/connectors/sharepoint/page.tsx @@ -72,6 +72,13 @@ const MainSection = () => { return ( <> + + The Sharepoint connector allows you to index and search through your + Sharepoint files. Once setup, your Word documents, Excel files, + PowerPoint presentations, OneNote notebooks, PDFs, and uploaded files + will be queryable within Danswer. + + Step 1: Provide Sharepoint credentials @@ -96,8 +103,16 @@ const MainSection = () => { ) : ( <> - To index Sharepoint, please provide Azure AD client ID, Client - Secret, and Directory ID. + As a first step, please provide Application (client) ID, Directory + (tenant) ID, and Client Secret. You can follow the guide{" "} + + here + {" "} + to create an Azure AD application and obtain these values. @@ -105,28 +120,28 @@ const MainSection = () => { <> } validationSchema={Yup.object().shape({ aad_client_id: Yup.string().required( - "Please enter your Azure AD Client ID" + "Please enter your Application (client) ID" ), aad_directory_id: Yup.string().required( - "Please enter your Azure AD Directory ID" + "Please enter your Directory (tenant) ID" ), aad_client_secret: Yup.string().required( - "Please enter your Azure AD Client Secret" + "Please enter your Client Secret" ), })} initialValues={{ @@ -151,13 +166,9 @@ const MainSection = () => { {sharepointConnectorIndexingStatuses.length > 0 && ( <> - We index the most recently updated tickets from each Sharepoint - instance listed below regularly. - - - The initial poll at this time retrieves tickets updated in the past - hour. All subsequent polls execute every ten minutes. This should be - configurable in the future. + The latest state of your Word documents, Excel files, PowerPoint + presentations, OneNote notebooks, PDFs, and uploaded files are + fetched every 10 minutes.
@@ -176,15 +187,6 @@ const MainSection = () => { } }} specialColumns={[ - { - header: "Sites Group Name", - key: "sites_group_name", - getValue: (ccPairStatus) => { - const connectorConfig = - ccPairStatus.connector.connector_specific_config; - return `${connectorConfig.sites_group_name}`; - }, - }, { header: "Connectors", key: "connectors", @@ -195,6 +197,7 @@ const MainSection = () => { }, }, ]} + includeName />
@@ -204,25 +207,24 @@ const MainSection = () => { nameBuilder={(values) => - `Sharepoint-${values.sites_group_name}` + values.sites && values.sites.length > 0 + ? `Sharepoint-${values.sites.join("-")}` + : "Sharepoint" } ccPairNameBuilder={(values) => - `Sharepoint ${values.sites_group_name}` + values.sites && values.sites.length > 0 + ? `Sharepoint-${values.sites.join("-")}` + : "Sharepoint" } source="sharepoint" inputType="poll" - formBody={ - <> - - - } // formBody={<>} formBodyBuilder={TextArrayFieldBuilder({ name: "sites", label: "Sites:", subtext: "Specify 0 or more sites to index. For example, specifying the site " + - "'support' for the 'danswerai' sharepoint will cause us to only index all content " + + "'support' for the 'danswerai' sharepoint will cause us to only index documents " + "within the 'https://danswerai.sharepoint.com/sites/support' site. " + "If no sites are specified, all sites in your organization will be indexed.", })} @@ -230,13 +232,9 @@ const MainSection = () => { sites: Yup.array() .of(Yup.string().required("Site names must be strings")) .required(), - sites_group_name: Yup.string().required( - "Please enter the name you would like to give this group of sites e.g. engineering " - ), })} initialValues={{ sites: [], - sites_group_name: "", }} credentialId={sharepointCredential.id} refreshFreq={10 * 60} // 10 minutes @@ -244,9 +242,9 @@ const MainSection = () => { ) : ( - Please provide all Azure info in Step 1 first! Once you're done with - that, you can then specify which Sharepoint sites you want to make - searchable. + Please provide all Azure info in Step 1 first! Once you're done + with that, you can then specify which Sharepoint sites you want to + make searchable. )} diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index d2aaab03f..7c11badcd 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -143,11 +143,6 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Google Sites", category: SourceCategory.ImportedKnowledge, }, - requesttracker: { - icon: RequestTrackerIcon, - displayName: "Request Tracker", - category: SourceCategory.AppConnection, - }, loopio: { icon: LoopioIcon, displayName: "Loopio", @@ -158,6 +153,11 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Sharepoint", category: SourceCategory.AppConnection, }, + requesttracker: { + icon: RequestTrackerIcon, + displayName: "Request Tracker", + category: SourceCategory.AppConnection, + }, }; function fillSourceMetadata( diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index c6841bb22..b2945d088 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -107,7 +107,6 @@ export interface JiraConfig { export interface SharepointConfig { sites?: string[]; - sites_group_name: string; } export interface ProductboardConfig {}