Sharepoint fixes

This commit is contained in:
Weves 2024-01-25 16:26:54 -08:00 committed by Chris Weaver
parent d0fa02c8dc
commit ec93ad9e6d
5 changed files with 46 additions and 47 deletions

View File

@ -76,7 +76,7 @@ def get_text_from_txt_driveitem(driveitem_object: DriveItem) -> str:
return text_string return text_string
def get_text_from_pptx_driveitem(driveitem_object: DriveItem): def get_text_from_pptx_driveitem(driveitem_object: DriveItem) -> str:
file_content = driveitem_object.get_content().execute_query().value file_content = driveitem_object.get_content().execute_query().value
pptx_stream = io.BytesIO(file_content) pptx_stream = io.BytesIO(file_content)
with tempfile.NamedTemporaryFile() as temp: with tempfile.NamedTemporaryFile() as temp:
@ -235,7 +235,7 @@ class SharepointConnector(LoadConnector, PollConnector):
elif driveitem_name.endswith(".xlsx"): elif driveitem_name.endswith(".xlsx"):
driveitem_text = get_text_from_xlsx_driveitem(driveitem_object) driveitem_text = get_text_from_xlsx_driveitem(driveitem_object)
elif driveitem_name.endswith(".pptx"): elif driveitem_name.endswith(".pptx"):
driveitem_text = get_text_from_xlsx_driveitem(driveitem_object) driveitem_text = get_text_from_pptx_driveitem(driveitem_object)
elif is_text_file_extension(driveitem_name): elif is_text_file_extension(driveitem_name):
driveitem_text = get_text_from_txt_driveitem(driveitem_object) driveitem_text = get_text_from_txt_driveitem(driveitem_object)

View File

@ -25,8 +25,10 @@ langchain==0.0.340
litellm==1.7.5 litellm==1.7.5
llama-index==0.9.8 llama-index==0.9.8
Mako==1.2.4 Mako==1.2.4
msal==1.26.0
nltk==3.8.1 nltk==3.8.1
docx2txt==0.8 docx2txt==0.8
Office365-REST-Python-Client==2.5.4
oauthlib==3.2.2 oauthlib==3.2.2
openai==1.3.5 openai==1.3.5
openpyxl==3.1.2 openpyxl==3.1.2

View File

@ -72,6 +72,13 @@ const MainSection = () => {
return ( return (
<> <>
<Text>
The Sharepoint connector allows you to index and search through your
Sharepoint files. Once setup, your Word documents, Excel files,
PowerPoint presentations, OneNote notebooks, PDFs, and uploaded files
will be queryable within Danswer.
</Text>
<Title className="mb-2 mt-6 ml-auto mr-auto"> <Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide Sharepoint credentials Step 1: Provide Sharepoint credentials
</Title> </Title>
@ -96,8 +103,16 @@ const MainSection = () => {
) : ( ) : (
<> <>
<Text className="mb-2"> <Text className="mb-2">
To index Sharepoint, please provide Azure AD client ID, Client As a first step, please provide Application (client) ID, Directory
Secret, and Directory ID. (tenant) ID, and Client Secret. You can follow the guide{" "}
<a
target="_blank"
href="https://docs.danswer.dev/connectors/sharepoint"
className="text-link"
>
here
</a>{" "}
to create an Azure AD application and obtain these values.
</Text> </Text>
<Card className="mt-2"> <Card className="mt-2">
<CredentialForm<SharepointCredentialJson> <CredentialForm<SharepointCredentialJson>
@ -105,28 +120,28 @@ const MainSection = () => {
<> <>
<TextFormField <TextFormField
name="aad_client_id" name="aad_client_id"
label="Azure AD Client ID:" label="Application (client) ID:"
/> />
<TextFormField <TextFormField
name="aad_directory_id" name="aad_directory_id"
label="Azure AD Directory ID:" label="Directory (tenant) ID:"
/> />
<TextFormField <TextFormField
name="aad_client_secret" name="aad_client_secret"
label="Azure AD Client Secret:" label="Client Secret:"
type="password" type="password"
/> />
</> </>
} }
validationSchema={Yup.object().shape({ validationSchema={Yup.object().shape({
aad_client_id: Yup.string().required( aad_client_id: Yup.string().required(
"Please enter your Azure AD Client ID" "Please enter your Application (client) ID"
), ),
aad_directory_id: Yup.string().required( aad_directory_id: Yup.string().required(
"Please enter your Azure AD Directory ID" "Please enter your Directory (tenant) ID"
), ),
aad_client_secret: Yup.string().required( aad_client_secret: Yup.string().required(
"Please enter your Azure AD Client Secret" "Please enter your Client Secret"
), ),
})} })}
initialValues={{ initialValues={{
@ -151,13 +166,9 @@ const MainSection = () => {
{sharepointConnectorIndexingStatuses.length > 0 && ( {sharepointConnectorIndexingStatuses.length > 0 && (
<> <>
<Text className="mb-2"> <Text className="mb-2">
We index the most recently updated tickets from each Sharepoint The latest state of your Word documents, Excel files, PowerPoint
instance listed below regularly. presentations, OneNote notebooks, PDFs, and uploaded files are
</Text> fetched every 10 minutes.
<Text className="mb-2">
The initial poll at this time retrieves tickets updated in the past
hour. All subsequent polls execute every ten minutes. This should be
configurable in the future.
</Text> </Text>
<div className="mb-2"> <div className="mb-2">
<ConnectorsTable<SharepointConfig, SharepointCredentialJson> <ConnectorsTable<SharepointConfig, SharepointCredentialJson>
@ -176,15 +187,6 @@ const MainSection = () => {
} }
}} }}
specialColumns={[ specialColumns={[
{
header: "Sites Group Name",
key: "sites_group_name",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return `${connectorConfig.sites_group_name}`;
},
},
{ {
header: "Connectors", header: "Connectors",
key: "connectors", key: "connectors",
@ -195,6 +197,7 @@ const MainSection = () => {
}, },
}, },
]} ]}
includeName
/> />
</div> </div>
</> </>
@ -204,25 +207,24 @@ const MainSection = () => {
<Card className="mt-4"> <Card className="mt-4">
<ConnectorForm<SharepointConfig> <ConnectorForm<SharepointConfig>
nameBuilder={(values) => nameBuilder={(values) =>
`Sharepoint-${values.sites_group_name}` values.sites && values.sites.length > 0
? `Sharepoint-${values.sites.join("-")}`
: "Sharepoint"
} }
ccPairNameBuilder={(values) => ccPairNameBuilder={(values) =>
`Sharepoint ${values.sites_group_name}` values.sites && values.sites.length > 0
? `Sharepoint-${values.sites.join("-")}`
: "Sharepoint"
} }
source="sharepoint" source="sharepoint"
inputType="poll" inputType="poll"
formBody={
<>
<TextFormField name="sites_group_name" label="Sites Group Name:" />
</>
}
// formBody={<></>} // formBody={<></>}
formBodyBuilder={TextArrayFieldBuilder({ formBodyBuilder={TextArrayFieldBuilder({
name: "sites", name: "sites",
label: "Sites:", label: "Sites:",
subtext: subtext:
"Specify 0 or more sites to index. For example, specifying the site " + "Specify 0 or more sites to index. For example, specifying the site " +
"'support' for the 'danswerai' sharepoint will cause us to only index all content " + "'support' for the 'danswerai' sharepoint will cause us to only index documents " +
"within the 'https://danswerai.sharepoint.com/sites/support' site. " + "within the 'https://danswerai.sharepoint.com/sites/support' site. " +
"If no sites are specified, all sites in your organization will be indexed.", "If no sites are specified, all sites in your organization will be indexed.",
})} })}
@ -230,13 +232,9 @@ const MainSection = () => {
sites: Yup.array() sites: Yup.array()
.of(Yup.string().required("Site names must be strings")) .of(Yup.string().required("Site names must be strings"))
.required(), .required(),
sites_group_name: Yup.string().required(
"Please enter the name you would like to give this group of sites e.g. engineering "
),
})} })}
initialValues={{ initialValues={{
sites: [], sites: [],
sites_group_name: "",
}} }}
credentialId={sharepointCredential.id} credentialId={sharepointCredential.id}
refreshFreq={10 * 60} // 10 minutes refreshFreq={10 * 60} // 10 minutes
@ -244,9 +242,9 @@ const MainSection = () => {
</Card> </Card>
) : ( ) : (
<Text> <Text>
Please provide all Azure info in Step 1 first! Once you're done with Please provide all Azure info in Step 1 first! Once you&apos;re done
that, you can then specify which Sharepoint sites you want to make with that, you can then specify which Sharepoint sites you want to
searchable. make searchable.
</Text> </Text>
)} )}
</> </>

View File

@ -143,11 +143,6 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Google Sites", displayName: "Google Sites",
category: SourceCategory.ImportedKnowledge, category: SourceCategory.ImportedKnowledge,
}, },
requesttracker: {
icon: RequestTrackerIcon,
displayName: "Request Tracker",
category: SourceCategory.AppConnection,
},
loopio: { loopio: {
icon: LoopioIcon, icon: LoopioIcon,
displayName: "Loopio", displayName: "Loopio",
@ -158,6 +153,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Sharepoint", displayName: "Sharepoint",
category: SourceCategory.AppConnection, category: SourceCategory.AppConnection,
}, },
requesttracker: {
icon: RequestTrackerIcon,
displayName: "Request Tracker",
category: SourceCategory.AppConnection,
},
}; };
function fillSourceMetadata( function fillSourceMetadata(

View File

@ -107,7 +107,6 @@ export interface JiraConfig {
export interface SharepointConfig { export interface SharepointConfig {
sites?: string[]; sites?: string[];
sites_group_name: string;
} }
export interface ProductboardConfig {} export interface ProductboardConfig {}