Merge pull request #1144 from hagen6835/add-teams-connector

added teams connector
This commit is contained in:
hagen-danswer 2024-06-10 13:25:54 -04:00 committed by GitHub
commit ead6a851cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 603 additions and 24 deletions

View File

@ -95,6 +95,7 @@ class DocumentSource(str, Enum):
LOOPIO = "loopio"
DROPBOX = "dropbox"
SHAREPOINT = "sharepoint"
TEAMS = "teams"
DISCOURSE = "discourse"
AXERO = "axero"
MEDIAWIKI = "mediawiki"

View File

@ -33,6 +33,7 @@ from danswer.connectors.sharepoint.connector import SharepointConnector
from danswer.connectors.slab.connector import SlabConnector
from danswer.connectors.slack.connector import SlackPollConnector
from danswer.connectors.slack.load_connector import SlackLoadConnector
from danswer.connectors.teams.connector import TeamsConnector
from danswer.connectors.web.connector import WebConnector
from danswer.connectors.wikipedia.connector import WikipediaConnector
from danswer.connectors.zendesk.connector import ZendeskConnector
@ -76,6 +77,7 @@ def identify_connector_class(
DocumentSource.LOOPIO: LoopioConnector,
DocumentSource.DROPBOX: DropboxConnector,
DocumentSource.SHAREPOINT: SharepointConnector,
DocumentSource.TEAMS: TeamsConnector,
DocumentSource.DISCOURSE: DiscourseConnector,
DocumentSource.AXERO: AxeroConnector,
DocumentSource.MEDIAWIKI: MediaWikiConnector,

View File

@ -164,19 +164,19 @@ class SharepointConnector(LoadConnector, PollConnector):
yield doc_batch
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
aad_client_id = credentials["aad_client_id"]
aad_client_secret = credentials["aad_client_secret"]
aad_directory_id = credentials["aad_directory_id"]
sp_client_id = credentials["sp_client_id"]
sp_client_secret = credentials["sp_client_secret"]
sp_directory_id = credentials["sp_directory_id"]
def _acquire_token_func() -> dict[str, Any]:
"""
Acquire token via MSAL
"""
authority_url = f"https://login.microsoftonline.com/{aad_directory_id}"
authority_url = f"https://login.microsoftonline.com/{sp_directory_id}"
app = msal.ConfidentialClientApplication(
authority=authority_url,
client_id=aad_client_id,
client_credential=aad_client_secret,
client_id=sp_client_id,
client_credential=sp_client_secret,
)
token = app.acquire_token_for_client(
scopes=["https://graph.microsoft.com/.default"]
@ -202,9 +202,9 @@ if __name__ == "__main__":
connector.load_credentials(
{
"aad_client_id": os.environ["AAD_CLIENT_ID"],
"aad_client_secret": os.environ["AAD_CLIENT_SECRET"],
"aad_directory_id": os.environ["AAD_CLIENT_DIRECTORY_ID"],
"sp_client_id": os.environ["SP_CLIENT_ID"],
"sp_client_secret": os.environ["SP_CLIENT_SECRET"],
"sp_directory_id": os.environ["SP_CLIENT_DIRECTORY_ID"],
}
)
document_batches = connector.load_from_state()

View File

@ -0,0 +1,278 @@
import os
from datetime import datetime
from datetime import timezone
from typing import Any
import msal # type: ignore
from office365.graph_client import GraphClient # type: ignore
from office365.teams.channels.channel import Channel # type: ignore
from office365.teams.chats.messages.message import ChatMessage # type: ignore
from office365.teams.team import Team # type: ignore
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import BasicExpertInfo
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.file_processing.html_utils import parse_html_page_basic
from danswer.utils.logger import setup_logger
logger = setup_logger()
def get_created_datetime(chat_message: ChatMessage) -> datetime:
# Extract the 'createdDateTime' value from the 'properties' dictionary and convert it to a datetime object
return time_str_to_utc(chat_message.properties["createdDateTime"])
def _extract_channel_members(channel: Channel) -> list[BasicExpertInfo]:
channel_members_list: list[BasicExpertInfo] = []
members = channel.members.get().execute_query()
for member in members:
channel_members_list.append(BasicExpertInfo(display_name=member.display_name))
return channel_members_list
def _get_threads_from_channel(
channel: Channel,
start: datetime | None = None,
end: datetime | None = None,
) -> list[list[ChatMessage]]:
# Ensure start and end are timezone-aware
if start and start.tzinfo is None:
start = start.replace(tzinfo=timezone.utc)
if end and end.tzinfo is None:
end = end.replace(tzinfo=timezone.utc)
query = channel.messages.get()
base_messages: list[ChatMessage] = query.execute_query()
threads: list[list[ChatMessage]] = []
for base_message in base_messages:
message_datetime = time_str_to_utc(
base_message.properties["lastModifiedDateTime"]
)
if start and message_datetime < start:
continue
if end and message_datetime > end:
continue
reply_query = base_message.replies.get_all()
replies = reply_query.execute_query()
# start a list containing the base message and its replies
thread: list[ChatMessage] = [base_message]
thread.extend(replies)
threads.append(thread)
return threads
def _get_channels_from_teams(
teams: list[Team],
) -> list[Channel]:
channels_list: list[Channel] = []
for team in teams:
query = team.channels.get()
channels = query.execute_query()
channels_list.extend(channels)
return channels_list
def _construct_semantic_identifier(channel: Channel, top_message: ChatMessage) -> str:
first_poster = (
top_message.properties.get("from", {})
.get("user", {})
.get("displayName", "Unknown User")
)
channel_name = channel.properties.get("displayName", "Unknown")
thread_subject = top_message.properties.get("subject", "Unknown")
snippet = parse_html_page_basic(top_message.body.content.rstrip())
snippet = snippet[:50] + "..." if len(snippet) > 50 else snippet
return f"{first_poster} in {channel_name} about {thread_subject}: {snippet}"
def _convert_thread_to_document(
channel: Channel,
thread: list[ChatMessage],
) -> Document | None:
if len(thread) == 0:
return None
most_recent_message_datetime: datetime | None = None
top_message = thread[0]
post_members_list: list[BasicExpertInfo] = []
thread_text = ""
sorted_thread = sorted(thread, key=get_created_datetime, reverse=True)
if sorted_thread:
most_recent_message = sorted_thread[0]
most_recent_message_datetime = time_str_to_utc(
most_recent_message.properties["createdDateTime"]
)
for message in thread:
# add text and a newline
if message.body.content:
message_text = parse_html_page_basic(message.body.content)
thread_text += message_text
# if it has a subject, that means its the top level post message, so grab its id, url, and subject
if message.properties["subject"]:
top_message = message
# check to make sure there is a valid display name
if message.properties["from"]:
if message.properties["from"]["user"]:
if message.properties["from"]["user"]["displayName"]:
message_sender = message.properties["from"]["user"]["displayName"]
# if its not a duplicate, add it to the list
if message_sender not in [
member.display_name for member in post_members_list
]:
post_members_list.append(
BasicExpertInfo(display_name=message_sender)
)
# if there are no found post members, grab the members from the parent channel
if not post_members_list:
post_members_list = _extract_channel_members(channel)
if not thread_text:
return None
semantic_string = _construct_semantic_identifier(channel, top_message)
post_id = top_message.properties["id"]
web_url = top_message.web_url
doc = Document(
id=post_id,
sections=[Section(link=web_url, text=thread_text)],
source=DocumentSource.TEAMS,
semantic_identifier=semantic_string,
title="", # teams threads don't really have a "title"
doc_updated_at=most_recent_message_datetime,
primary_owners=post_members_list,
metadata={},
)
return doc
class TeamsConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
teams: list[str] = [],
) -> None:
self.batch_size = batch_size
self.graph_client: GraphClient | None = None
self.requested_team_list: list[str] = teams
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
teams_client_id = credentials["teams_client_id"]
teams_client_secret = credentials["teams_client_secret"]
teams_directory_id = credentials["teams_directory_id"]
def _acquire_token_func() -> dict[str, Any]:
"""
Acquire token via MSAL
"""
authority_url = f"https://login.microsoftonline.com/{teams_directory_id}"
app = msal.ConfidentialClientApplication(
authority=authority_url,
client_id=teams_client_id,
client_credential=teams_client_secret,
)
token = app.acquire_token_for_client(
scopes=["https://graph.microsoft.com/.default"]
)
return token
self.graph_client = GraphClient(_acquire_token_func)
return None
def _get_all_teams(self) -> list[Team]:
if self.graph_client is None:
raise ConnectorMissingCredentialError("Teams")
teams_list: list[Team] = []
teams = self.graph_client.teams.get().execute_query()
if len(self.requested_team_list) > 0:
adjusted_request_strings = [
requested_team.replace(" ", "")
for requested_team in self.requested_team_list
]
teams_list = [
team
for team in teams
if team.display_name.replace(" ", "") in adjusted_request_strings
]
else:
teams_list.extend(teams)
return teams_list
def _fetch_from_teams(
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
if self.graph_client is None:
raise ConnectorMissingCredentialError("Teams")
teams = self._get_all_teams()
channels = _get_channels_from_teams(
teams=teams,
)
# goes over channels, converts them into Document objects and then yields them in batches
doc_batch: list[Document] = []
for channel in channels:
thread_list = _get_threads_from_channel(channel, start=start, end=end)
for thread in thread_list:
converted_doc = _convert_thread_to_document(channel, thread)
if converted_doc:
doc_batch.append(converted_doc)
if len(doc_batch) >= self.batch_size:
yield doc_batch
doc_batch = []
yield doc_batch
def load_from_state(self) -> GenerateDocumentsOutput:
return self._fetch_from_teams()
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.utcfromtimestamp(start)
end_datetime = datetime.utcfromtimestamp(end)
return self._fetch_from_teams(start=start_datetime, end=end_datetime)
if __name__ == "__main__":
connector = TeamsConnector(teams=os.environ["TEAMS"].split(","))
connector.load_credentials(
{
"teams_client_id": os.environ["TEAMS_CLIENT_ID"],
"teams_client_secret": os.environ["TEAMS_CLIENT_SECRET"],
"teams_directory_id": os.environ["TEAMS_CLIENT_DIRECTORY_ID"],
}
)
document_batches = connector.load_from_state()
print(next(document_batches))

BIN
web/public/Teams.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

View File

@ -78,7 +78,7 @@ const MainSection = () => {
const sharepointCredential: Credential<SharepointCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.aad_client_id
(credential) => credential.credential_json?.sp_client_id
);
return (
@ -98,7 +98,7 @@ const MainSection = () => {
<div className="flex mb-1 text-sm">
<Text className="my-auto">Existing Azure AD Client ID: </Text>
<Text className="ml-1 italic my-auto">
{sharepointCredential.credential_json.aad_client_id}
{sharepointCredential.credential_json.sp_client_id}
</Text>
<button
className="ml-1 hover:bg-hover rounded p-1"
@ -130,35 +130,35 @@ const MainSection = () => {
formBody={
<>
<TextFormField
name="aad_client_id"
name="sp_client_id"
label="Application (client) ID:"
/>
<TextFormField
name="aad_directory_id"
name="sp_directory_id"
label="Directory (tenant) ID:"
/>
<TextFormField
name="aad_client_secret"
name="sp_client_secret"
label="Client Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
aad_client_id: Yup.string().required(
sp_client_id: Yup.string().required(
"Please enter your Application (client) ID"
),
aad_directory_id: Yup.string().required(
sp_directory_id: Yup.string().required(
"Please enter your Directory (tenant) ID"
),
aad_client_secret: Yup.string().required(
sp_client_secret: Yup.string().required(
"Please enter your Client Secret"
),
})}
initialValues={{
aad_client_id: "",
aad_directory_id: "",
aad_client_secret: "",
sp_client_id: "",
sp_directory_id: "",
sp_client_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
@ -186,7 +186,7 @@ const MainSection = () => {
connectorIndexingStatuses={sharepointConnectorIndexingStatuses}
liveCredential={sharepointCredential}
getCredential={(credential) =>
credential.credential_json.aad_directory_id
credential.credential_json.sp_directory_id
}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")

View File

@ -0,0 +1,264 @@
"use client";
import * as Yup from "yup";
import { TrashIcon, TeamsIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon
import { fetcher } from "@/lib/fetcher";
import useSWR, { useSWRConfig } from "swr";
import { LoadingAnimation } from "@/components/Loading";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import {
TeamsConfig,
TeamsCredentialJson,
ConnectorIndexingStatus,
Credential,
} from "@/lib/types"; // Modify or create these types as required
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
TextFormField,
TextArrayFieldBuilder,
} from "@/components/admin/connectors/Field";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { usePublicCredentials } from "@/lib/hooks";
import { AdminPageTitle } from "@/components/admin/Title";
import { Card, Text, Title } from "@tremor/react";
const MainSection = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const teamsConnectorIndexingStatuses: ConnectorIndexingStatus<
TeamsConfig,
TeamsCredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "teams"
);
const teamsCredential: Credential<TeamsCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.teams_client_id
);
return (
<>
<Text>
The Teams connector allows you to index and search through your Teams
channels. Once setup, all messages from the channels contained in the
specified teams will be queryable within Danswer.
</Text>
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide Teams credentials
</Title>
{teamsCredential ? (
<>
<div className="flex mb-1 text-sm">
<Text className="my-auto">Existing Azure AD Client ID: </Text>
<Text className="ml-1 italic my-auto">
{teamsCredential.credential_json.teams_client_id}
</Text>
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
await adminDeleteCredential(teamsCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text className="mb-2">
As a first step, please provide Application (client) ID, Directory
(tenant) ID, and Client Secret. You can follow the guide{" "}
<a
target="_blank"
href="https://docs.danswer.dev/connectors/teams"
className="text-link"
>
here
</a>{" "}
to create an Azure AD application and obtain these values.
</Text>
<Card className="mt-2">
<CredentialForm<TeamsCredentialJson>
formBody={
<>
<TextFormField
name="teams_client_id"
label="Application (client) ID:"
/>
<TextFormField
name="teams_directory_id"
label="Directory (tenant) ID:"
/>
<TextFormField
name="teams_client_secret"
label="Client Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
teams_client_id: Yup.string().required(
"Please enter your Application (client) ID"
),
teams_directory_id: Yup.string().required(
"Please enter your Directory (tenant) ID"
),
teams_client_secret: Yup.string().required(
"Please enter your Client Secret"
),
})}
initialValues={{
teams_client_id: "",
teams_directory_id: "",
teams_client_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Manage Teams Connector
</Title>
{teamsConnectorIndexingStatuses.length > 0 && (
<>
<Text className="mb-2">
The latest messages from the specified teams are fetched every 10
minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<TeamsConfig, TeamsCredentialJson>
connectorIndexingStatuses={teamsConnectorIndexingStatuses}
liveCredential={teamsCredential}
getCredential={(credential) =>
credential.credential_json.teams_directory_id
}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
onCredentialLink={async (connectorId) => {
if (teamsCredential) {
await linkCredential(connectorId, teamsCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Connectors",
key: "connectors",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return `${connectorConfig.teams}`;
},
},
]}
includeName
/>
</div>
</>
)}
{teamsCredential ? (
<Card className="mt-4">
<ConnectorForm<TeamsConfig>
nameBuilder={(values) =>
values.teams && values.teams.length > 0
? `Teams-${values.teams.join("-")}`
: "Teams"
}
ccPairNameBuilder={(values) =>
values.teams && values.teams.length > 0
? `Teams-${values.teams.join("-")}`
: "Teams"
}
source="teams"
inputType="poll"
// formBody={<></>}
formBodyBuilder={TextArrayFieldBuilder({
name: "teams",
label: "Teams:",
subtext:
"Specify 0 or more Teams to index. " +
"For example, specifying the Team 'Support' for the 'danswerai' Org will cause " +
"us to only index messages sent in channels belonging to the 'Support' Team. " +
"If no Teams are specified, all Teams in your organization will be indexed.",
})}
validationSchema={Yup.object().shape({
teams: Yup.array()
.of(Yup.string().required("Team names must be strings"))
.required(),
})}
initialValues={{
teams: [],
}}
credentialId={teamsCredential.id}
refreshFreq={10 * 60} // 10 minutes
/>
</Card>
) : (
<Text>
Please provide all Azure info in Step 1 first! Once you&apos;re done
with that, you can then specify which teams you want to make
searchable.
</Text>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle icon={<TeamsIcon size={32} />} title="Teams" />
<MainSection />
</div>
);
}

View File

@ -53,6 +53,7 @@ import googleSitesIcon from "../../../public/GoogleSites.png";
import zendeskIcon from "../../../public/Zendesk.svg";
import dropboxIcon from "../../../public/Dropbox.png";
import sharepointIcon from "../../../public/Sharepoint.png";
import teamsIcon from "../../../public/Teams.png";
import mediawikiIcon from "../../../public/MediaWiki.svg";
import wikipediaIcon from "../../../public/Wikipedia.svg";
import discourseIcon from "../../../public/Discourse.png";
@ -538,6 +539,18 @@ export const SharepointIcon = ({
</div>
);
export const TeamsIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={teamsIcon} alt="Logo" width="96" height="96" />
</div>
);
export const GongIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -22,6 +22,7 @@ import {
ProductboardIcon,
RequestTrackerIcon,
SharepointIcon,
TeamsIcon,
SlabIcon,
SlackIcon,
ZendeskIcon,
@ -164,6 +165,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Sharepoint",
category: SourceCategory.AppConnection,
},
teams: {
icon: TeamsIcon,
displayName: "Teams",
category: SourceCategory.AppConnection,
},
discourse: {
icon: DiscourseIcon,
displayName: "Discourse",

View File

@ -44,6 +44,7 @@ export type ValidSources =
| "loopio"
| "dropbox"
| "sharepoint"
| "teams"
| "zendesk"
| "discourse"
| "axero"
@ -127,6 +128,10 @@ export interface SharepointConfig {
sites?: string[];
}
export interface TeamsConfig {
teams?: string[];
}
export interface DiscourseConfig {
base_url: string;
categories?: string[];
@ -136,6 +141,10 @@ export interface AxeroConfig {
spaces?: string[];
}
export interface TeamsConfig {
teams?: string[];
}
export interface ProductboardConfig {}
export interface SlackConfig {
@ -366,9 +375,15 @@ export interface DropboxCredentialJson {
}
export interface SharepointCredentialJson {
aad_client_id: string;
aad_client_secret: string;
aad_directory_id: string;
sp_client_id: string;
sp_client_secret: string;
sp_directory_id: string;
}
export interface TeamsCredentialJson {
teams_client_id: string;
teams_client_secret: string;
teams_directory_id: string;
}
export interface DiscourseCredentialJson {