added teams connector

This commit is contained in:
Hagenoneill 2024-02-29 12:36:05 -05:00
parent 514e7f6e41
commit 73b063b66c
9 changed files with 520 additions and 0 deletions

View File

@ -83,6 +83,7 @@ class DocumentSource(str, Enum):
ZENDESK = "zendesk"
LOOPIO = "loopio"
SHAREPOINT = "sharepoint"
TEAMS = "teams"
class DocumentIndexType(str, Enum):

View File

@ -26,6 +26,7 @@ from danswer.connectors.notion.connector import NotionConnector
from danswer.connectors.productboard.connector import ProductboardConnector
from danswer.connectors.requesttracker.connector import RequestTrackerConnector
from danswer.connectors.sharepoint.connector import SharepointConnector
from danswer.connectors.teams.connector import TeamsConnector
from danswer.connectors.slab.connector import SlabConnector
from danswer.connectors.slack.connector import SlackPollConnector
from danswer.connectors.slack.load_connector import SlackLoadConnector
@ -70,6 +71,7 @@ def identify_connector_class(
DocumentSource.ZENDESK: ZendeskConnector,
DocumentSource.LOOPIO: LoopioConnector,
DocumentSource.SHAREPOINT: SharepointConnector,
DocumentSource.TEAMS: TeamsConnector,
}
connector_by_source = connector_map.get(source, {})

View File

@ -0,0 +1,223 @@
import io
import os
import tempfile
from datetime import datetime
from datetime import timezone
from typing import Any
from html.parser import HTMLParser
import docx # type: ignore
import msal # type: ignore
import openpyxl # type: ignore
# import pptx # type: ignore
from office365.graph_client import GraphClient # type: ignore
from office365.teams.team import Team
from office365.teams.channels.channel import Channel
from office365.teams.chats.messages.message import ChatMessage
from office365.outlook.mail.item_body import ItemBody
from danswer.configs.app_configs import INDEX_BATCH_SIZE
from danswer.configs.constants import DocumentSource
from danswer.connectors.cross_connector_utils.file_utils import is_text_file_extension
from danswer.connectors.cross_connector_utils.file_utils import read_pdf_file
from danswer.connectors.interfaces import GenerateDocumentsOutput
from danswer.connectors.interfaces import LoadConnector
from danswer.connectors.interfaces import PollConnector
from danswer.connectors.interfaces import SecondsSinceUnixEpoch
from danswer.connectors.models import BasicExpertInfo
from danswer.connectors.models import ConnectorMissingCredentialError
from danswer.connectors.models import Document
from danswer.connectors.models import Section
from danswer.utils.logger import setup_logger
UNSUPPORTED_FILE_TYPE_CONTENT = "" # idea copied from the google drive side of things
logger = setup_logger()
class HTMLFilter(HTMLParser):
text = ""
def handle_data(self, data):
self.text += data
def get_created_datetime(obj: ChatMessage):
# Extract the 'createdDateTime' value from the 'properties' dictionary
created_datetime_str = obj.properties['createdDateTime']
# Convert the string to a datetime object
return datetime.strptime(created_datetime_str, '%Y-%m-%dT%H:%M:%S.%f%z')
class TeamsConnector(LoadConnector, PollConnector):
def __init__(
self,
batch_size: int = INDEX_BATCH_SIZE,
teams: list[str] = [],
) -> None:
self.batch_size = batch_size
self.graph_client: GraphClient | None = None
self.requested_team_list: list[str] = teams
def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None:
aad_client_id = credentials["aad_client_id"]
aad_client_secret = credentials["aad_client_secret"]
aad_directory_id = credentials["aad_directory_id"]
def _acquire_token_func() -> dict[str, Any]:
"""
Acquire token via MSAL
"""
authority_url = f"https://login.microsoftonline.com/{aad_directory_id}"
app = msal.ConfidentialClientApplication(
authority=authority_url,
client_id=aad_client_id,
client_credential=aad_client_secret,
)
token = app.acquire_token_for_client(
scopes=["https://graph.microsoft.com/.default"]
)
return token
self.graph_client = GraphClient(_acquire_token_func)
return None
def get_message_list_from_channel(self, channel_object: Channel) -> list[ChatMessage]:
message_list: list[ChatMessage] = []
message_object_collection = channel_object.messages.get().execute_query()
message_list.extend(message_object_collection)
return message_list
def get_all_channels(
self,
team_object_list: list[Team],
start: datetime | None = None,
end: datetime | None = None,
) -> list[Channel]:
filter_str = ""
if start is not None and end is not None:
filter_str = f"last_modified_datetime ge {start.isoformat()} and last_modified_datetime le {end.isoformat()}"
channel_list: list[Channel] = []
for team_object in team_object_list:
query = team_object.channels.get()
if filter_str:
query = query.filter(filter_str)
channel_objects = query.execute_query()
channel_list.extend(channel_objects)
return channel_list
def get_all_teams_objects(self) -> list[Team]:
team_object_list: list[Team] = []
teams_object = self.graph_client.teams.get().execute_query()
if len(self.requested_team_list) > 0:
for requested_team in self.requested_team_list:
adjusted_request_string = requested_team.replace(" ", "")
for team_object in teams_object:
adjusted_team_string = team_object.display_name.replace(" ", "")
if adjusted_team_string == adjusted_request_string:
team_object_list.append(team_object)
else:
team_object_list.extend(teams_object)
return team_object_list
def _fetch_from_teams(
self, start: datetime | None = None, end: datetime | None = None
) -> GenerateDocumentsOutput:
if self.graph_client is None:
raise ConnectorMissingCredentialError("Teams")
team_object_list = self.get_all_teams_objects()
channel_list = self.get_all_channels(
team_object_list=team_object_list,
start=start,
end=end,
)
# goes over channels, converts them into Document objects and then yields them in batches
doc_batch: list[Document] = []
batch_count = 0
for channel_object in channel_list:
doc_batch.append(
self.convert_channel_object_to_document(channel_object)
)
batch_count += 1
if batch_count >= self.batch_size:
yield doc_batch
batch_count = 0
doc_batch = []
yield doc_batch
def convert_channel_object_to_document(
self,
channel_object: Channel,
) -> Document:
channel_text, most_recent_message_datetime = self.extract_channel_text_and_latest_datetime(channel_object)
channel_members = self.extract_channel_members(channel_object)
doc = Document(
id=channel_object.id,
sections=[Section(link=channel_object.web_url, text=channel_text)],
source=DocumentSource.TEAMS,
semantic_identifier=channel_object.properties["displayName"],
doc_updated_at=most_recent_message_datetime,
primary_owners=channel_members,
metadata={},
)
return doc
def extract_channel_members(self, channel_object: Channel)->list[BasicExpertInfo]:
channel_members_list: list[BasicExpertInfo] = []
member_objects = channel_object.members.get().execute_query()
for member_object in member_objects:
channel_members_list.append(
BasicExpertInfo(display_name=member_object.display_name)
)
return channel_members_list
def extract_channel_text_and_latest_datetime(self, channel_object: Channel):
message_list = self.get_message_list_from_channel(channel_object)
sorted_message_list = sorted(message_list, key=get_created_datetime, reverse=True)
most_recent_datetime: datetime | None = None
if sorted_message_list:
most_recent_message = sorted_message_list[0]
most_recent_datetime = datetime.strptime(most_recent_message.properties["createdDateTime"],
'%Y-%m-%dT%H:%M:%S.%f%z')
messages_text = ""
for message in message_list:
if message.body.content:
html_parser = HTMLFilter()
html_parser.feed(message.body.content)
messages_text += html_parser.text
return messages_text, most_recent_datetime
def load_from_state(self) -> GenerateDocumentsOutput:
return self._fetch_from_teams()
def poll_source(
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
) -> GenerateDocumentsOutput:
start_datetime = datetime.utcfromtimestamp(start)
end_datetime = datetime.utcfromtimestamp(end)
return self._fetch_from_teams(start=start_datetime, end=end_datetime)
if __name__ == "__main__":
connector = TeamsConnector(sites=os.environ["SITES"].split(","))
connector.load_credentials(
{
"aad_client_id": os.environ["AAD_CLIENT_ID"],
"aad_client_secret": os.environ["AAD_CLIENT_SECRET"],
"aad_directory_id": os.environ["AAD_CLIENT_DIRECTORY_ID"],
}
)
document_batches = connector.load_from_state()
print(next(document_batches))

BIN
web/public/Teams.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 124 KiB

View File

@ -0,0 +1,264 @@
"use client";
import * as Yup from "yup";
import { TrashIcon, TeamsIcon } from "@/components/icons/icons"; // Make sure you have a Document360 icon
import { fetcher } from "@/lib/fetcher";
import useSWR, { useSWRConfig } from "swr";
import { LoadingAnimation } from "@/components/Loading";
import { HealthCheckBanner } from "@/components/health/healthcheck";
import {
TeamsConfig,
TeamsCredentialJson,
ConnectorIndexingStatus,
Credential,
} from "@/lib/types"; // Modify or create these types as required
import { adminDeleteCredential, linkCredential } from "@/lib/credential";
import { CredentialForm } from "@/components/admin/connectors/CredentialForm";
import {
TextFormField,
TextArrayFieldBuilder,
} from "@/components/admin/connectors/Field";
import { ConnectorsTable } from "@/components/admin/connectors/table/ConnectorsTable";
import { ConnectorForm } from "@/components/admin/connectors/ConnectorForm";
import { usePublicCredentials } from "@/lib/hooks";
import { AdminPageTitle } from "@/components/admin/Title";
import { Card, Text, Title } from "@tremor/react";
const MainSection = () => {
const { mutate } = useSWRConfig();
const {
data: connectorIndexingStatuses,
isLoading: isConnectorIndexingStatusesLoading,
error: isConnectorIndexingStatusesError,
} = useSWR<ConnectorIndexingStatus<any, any>[]>(
"/api/manage/admin/connector/indexing-status",
fetcher
);
const {
data: credentialsData,
isLoading: isCredentialsLoading,
error: isCredentialsError,
refreshCredentials,
} = usePublicCredentials();
if (
(!connectorIndexingStatuses && isConnectorIndexingStatusesLoading) ||
(!credentialsData && isCredentialsLoading)
) {
return <LoadingAnimation text="Loading" />;
}
if (isConnectorIndexingStatusesError || !connectorIndexingStatuses) {
return <div>Failed to load connectors</div>;
}
if (isCredentialsError || !credentialsData) {
return <div>Failed to load credentials</div>;
}
const teamsConnectorIndexingStatuses: ConnectorIndexingStatus<
TeamsConfig,
TeamsCredentialJson
>[] = connectorIndexingStatuses.filter(
(connectorIndexingStatus) =>
connectorIndexingStatus.connector.source === "teams"
);
const teamsCredential: Credential<TeamsCredentialJson> | undefined =
credentialsData.find(
(credential) => credential.credential_json?.aad_client_id
);
return (
<>
<Text>
The Teams connector allows you to index and search through your
Teams channels. Once setup, all messages from the channels contained
in the specified teams will be queryable within Danswer.
</Text>
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 1: Provide Teams credentials
</Title>
{teamsCredential ? (
<>
<div className="flex mb-1 text-sm">
<Text className="my-auto">Existing Azure AD Client ID: </Text>
<Text className="ml-1 italic my-auto">
{teamsCredential.credential_json.aad_client_id}
</Text>
<button
className="ml-1 hover:bg-hover rounded p-1"
onClick={async () => {
await adminDeleteCredential(teamsCredential.id);
refreshCredentials();
}}
>
<TrashIcon />
</button>
</div>
</>
) : (
<>
<Text className="mb-2">
As a first step, please provide Application (client) ID, Directory
(tenant) ID, and Client Secret. You can follow the guide{" "}
<a
target="_blank"
href="https://docs.danswer.dev/connectors/teams"
className="text-link"
>
here
</a>{" "}
to create an Azure AD application and obtain these values.
</Text>
<Card className="mt-2">
<CredentialForm<TeamsCredentialJson>
formBody={
<>
<TextFormField
name="aad_client_id"
label="Application (client) ID:"
/>
<TextFormField
name="aad_directory_id"
label="Directory (tenant) ID:"
/>
<TextFormField
name="aad_client_secret"
label="Client Secret:"
type="password"
/>
</>
}
validationSchema={Yup.object().shape({
aad_client_id: Yup.string().required(
"Please enter your Application (client) ID"
),
aad_directory_id: Yup.string().required(
"Please enter your Directory (tenant) ID"
),
aad_client_secret: Yup.string().required(
"Please enter your Client Secret"
),
})}
initialValues={{
aad_client_id: "",
aad_directory_id: "",
aad_client_secret: "",
}}
onSubmit={(isSuccess) => {
if (isSuccess) {
refreshCredentials();
}
}}
/>
</Card>
</>
)}
<Title className="mb-2 mt-6 ml-auto mr-auto">
Step 2: Manage Teams Connector
</Title>
{teamsConnectorIndexingStatuses.length > 0 && (
<>
<Text className="mb-2">
The latest messages from the specified teams are
fetched every 10 minutes.
</Text>
<div className="mb-2">
<ConnectorsTable<TeamsConfig, TeamsCredentialJson>
connectorIndexingStatuses={teamsConnectorIndexingStatuses}
liveCredential={teamsCredential}
getCredential={(credential) =>
credential.credential_json.aad_directory_id
}
onUpdate={() =>
mutate("/api/manage/admin/connector/indexing-status")
}
onCredentialLink={async (connectorId) => {
if (teamsCredential) {
await linkCredential(connectorId, teamsCredential.id);
mutate("/api/manage/admin/connector/indexing-status");
}
}}
specialColumns={[
{
header: "Connectors",
key: "connectors",
getValue: (ccPairStatus) => {
const connectorConfig =
ccPairStatus.connector.connector_specific_config;
return `${connectorConfig.teams}`;
},
},
]}
includeName
/>
</div>
</>
)}
{teamsCredential ? (
<Card className="mt-4">
<ConnectorForm<TeamsConfig>
nameBuilder={(values) =>
values.teams && values.teams.length > 0
? `Teams-${values.teams.join("-")}`
: "Teams"
}
ccPairNameBuilder={(values) =>
values.teams && values.teams.length > 0
? `Teams-${values.teams.join("-")}`
: "Teams"
}
source="teams"
inputType="poll"
// formBody={<></>}
formBodyBuilder={TextArrayFieldBuilder({
name: "teams",
label: "Teams:",
subtext:
"Specify 0 or more Teams to index. " +
"For example, specifying the Team 'Support' for the 'danswerai' Org will cause " +
"us to only index messages sent in channels belonging to the 'Support' Team. " +
"If no Teams are specified, all Teams in your organization will be indexed.",
})}
validationSchema={Yup.object().shape({
teams: Yup.array()
.of(Yup.string().required("Team names must be strings"))
.required(),
})}
initialValues={{
teams: [],
}}
credentialId={teamsCredential.id}
refreshFreq={10 * 60} // 10 minutes
/>
</Card>
) : (
<Text>
Please provide all Azure info in Step 1 first! Once you&apos;re done
with that, you can then specify which teams you want to
make searchable.
</Text>
)}
</>
);
};
export default function Page() {
return (
<div className="mx-auto container">
<div className="mb-4">
<HealthCheckBanner />
</div>
<AdminPageTitle icon={<TeamsIcon size={32} />} title="Teams" />
<MainSection />
</div>
);
}

View File

@ -51,6 +51,7 @@ import document360Icon from "../../../public/Document360.png";
import googleSitesIcon from "../../../public/GoogleSites.png";
import zendeskIcon from "../../../public/Zendesk.svg";
import sharepointIcon from "../../../public/Sharepoint.png";
import teamsIcon from "../../../public/Teams.png";
import { FaRobot } from "react-icons/fa";
interface IconProps {
@ -526,6 +527,18 @@ export const SharepointIcon = ({
</div>
);
export const TeamsIcon = ({
size = 16,
className = defaultTailwindCSS,
}: IconProps) => (
<div
style={{ width: `${size}px`, height: `${size}px` }}
className={`w-[${size}px] h-[${size}px] ` + className}
>
<Image src={teamsIcon} alt="Logo" width="96" height="96" />
</div>
);
export const GongIcon = ({
size = 16,
className = defaultTailwindCSS,

View File

@ -19,6 +19,7 @@ import {
ProductboardIcon,
RequestTrackerIcon,
SharepointIcon,
TeamsIcon,
SlabIcon,
SlackIcon,
ZendeskIcon,
@ -154,6 +155,11 @@ const SOURCE_METADATA_MAP: SourceMap = {
displayName: "Sharepoint",
category: SourceCategory.AppConnection,
},
teams: {
icon: TeamsIcon,
displayName: "Teams",
category: SourceCategory.AppConnection,
},
requesttracker: {
icon: RequestTrackerIcon,
displayName: "Request Tracker",

View File

@ -33,6 +33,7 @@ export type ValidSources =
| "google_sites"
| "loopio"
| "sharepoint"
| "teams"
| "zendesk";
export type ValidInputTypes = "load_state" | "poll" | "event";
@ -110,6 +111,10 @@ export interface SharepointConfig {
sites?: string[];
}
export interface TeamsConfig {
teams?: string[];
}
export interface ProductboardConfig {}
export interface SlackConfig {
@ -314,6 +319,12 @@ export interface SharepointCredentialJson {
aad_directory_id: string;
}
export interface TeamsCredentialJson {
aad_client_id: string;
aad_client_secret: string;
aad_directory_id: string;
}
// DELETION
export interface DeletionAttemptSnapshot {