diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 5f06a4acc..58a782541 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -95,6 +95,7 @@ class DocumentSource(str, Enum): LOOPIO = "loopio" DROPBOX = "dropbox" SHAREPOINT = "sharepoint" + TEAMS = "teams" DISCOURSE = "discourse" AXERO = "axero" MEDIAWIKI = "mediawiki" diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 4873147e4..41072eb74 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -33,6 +33,7 @@ from danswer.connectors.sharepoint.connector import SharepointConnector from danswer.connectors.slab.connector import SlabConnector from danswer.connectors.slack.connector import SlackPollConnector from danswer.connectors.slack.load_connector import SlackLoadConnector +from danswer.connectors.teams.connector import TeamsConnector from danswer.connectors.web.connector import WebConnector from danswer.connectors.wikipedia.connector import WikipediaConnector from danswer.connectors.zendesk.connector import ZendeskConnector @@ -76,6 +77,7 @@ def identify_connector_class( DocumentSource.LOOPIO: LoopioConnector, DocumentSource.DROPBOX: DropboxConnector, DocumentSource.SHAREPOINT: SharepointConnector, + DocumentSource.TEAMS: TeamsConnector, DocumentSource.DISCOURSE: DiscourseConnector, DocumentSource.AXERO: AxeroConnector, DocumentSource.MEDIAWIKI: MediaWikiConnector, diff --git a/backend/danswer/connectors/sharepoint/connector.py b/backend/danswer/connectors/sharepoint/connector.py index 45d5d7793..b66c010d7 100644 --- a/backend/danswer/connectors/sharepoint/connector.py +++ b/backend/danswer/connectors/sharepoint/connector.py @@ -164,19 +164,19 @@ class SharepointConnector(LoadConnector, PollConnector): yield doc_batch def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - aad_client_id = credentials["aad_client_id"] - aad_client_secret = credentials["aad_client_secret"] - aad_directory_id = credentials["aad_directory_id"] + sp_client_id = credentials["sp_client_id"] + sp_client_secret = credentials["sp_client_secret"] + sp_directory_id = credentials["sp_directory_id"] def _acquire_token_func() -> dict[str, Any]: """ Acquire token via MSAL """ - authority_url = f"https://login.microsoftonline.com/{aad_directory_id}" + authority_url = f"https://login.microsoftonline.com/{sp_directory_id}" app = msal.ConfidentialClientApplication( authority=authority_url, - client_id=aad_client_id, - client_credential=aad_client_secret, + client_id=sp_client_id, + client_credential=sp_client_secret, ) token = app.acquire_token_for_client( scopes=["https://graph.microsoft.com/.default"] @@ -202,9 +202,9 @@ if __name__ == "__main__": connector.load_credentials( { - "aad_client_id": os.environ["AAD_CLIENT_ID"], - "aad_client_secret": os.environ["AAD_CLIENT_SECRET"], - "aad_directory_id": os.environ["AAD_CLIENT_DIRECTORY_ID"], + "sp_client_id": os.environ["SP_CLIENT_ID"], + "sp_client_secret": os.environ["SP_CLIENT_SECRET"], + "sp_directory_id": os.environ["SP_CLIENT_DIRECTORY_ID"], } ) document_batches = connector.load_from_state() diff --git a/backend/danswer/connectors/teams/__init__.py b/backend/danswer/connectors/teams/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/danswer/connectors/teams/connector.py b/backend/danswer/connectors/teams/connector.py new file mode 100644 index 000000000..3b9340878 --- /dev/null +++ b/backend/danswer/connectors/teams/connector.py @@ -0,0 +1,278 @@ +import os +from datetime import datetime +from datetime import timezone +from typing import Any + +import msal # type: ignore +from office365.graph_client import GraphClient # type: ignore +from office365.teams.channels.channel import Channel # type: ignore +from office365.teams.chats.messages.message import ChatMessage # type: ignore +from office365.teams.team import Team # type: ignore + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import BasicExpertInfo +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_created_datetime(chat_message: ChatMessage) -> datetime: + # Extract the 'createdDateTime' value from the 'properties' dictionary and convert it to a datetime object + return time_str_to_utc(chat_message.properties["createdDateTime"]) + + +def _extract_channel_members(channel: Channel) -> list[BasicExpertInfo]: + channel_members_list: list[BasicExpertInfo] = [] + members = channel.members.get().execute_query() + for member in members: + channel_members_list.append(BasicExpertInfo(display_name=member.display_name)) + return channel_members_list + + +def _get_threads_from_channel( + channel: Channel, + start: datetime | None = None, + end: datetime | None = None, +) -> list[list[ChatMessage]]: + # Ensure start and end are timezone-aware + if start and start.tzinfo is None: + start = start.replace(tzinfo=timezone.utc) + if end and end.tzinfo is None: + end = end.replace(tzinfo=timezone.utc) + + query = channel.messages.get() + base_messages: list[ChatMessage] = query.execute_query() + + threads: list[list[ChatMessage]] = [] + for base_message in base_messages: + message_datetime = time_str_to_utc( + base_message.properties["lastModifiedDateTime"] + ) + + if start and message_datetime < start: + continue + if end and message_datetime > end: + continue + + reply_query = base_message.replies.get_all() + replies = reply_query.execute_query() + + # start a list containing the base message and its replies + thread: list[ChatMessage] = [base_message] + thread.extend(replies) + + threads.append(thread) + + return threads + + +def _get_channels_from_teams( + teams: list[Team], +) -> list[Channel]: + channels_list: list[Channel] = [] + for team in teams: + query = team.channels.get() + channels = query.execute_query() + channels_list.extend(channels) + + return channels_list + + +def _construct_semantic_identifier(channel: Channel, top_message: ChatMessage) -> str: + first_poster = ( + top_message.properties.get("from", {}) + .get("user", {}) + .get("displayName", "Unknown User") + ) + channel_name = channel.properties.get("displayName", "Unknown") + thread_subject = top_message.properties.get("subject", "Unknown") + + snippet = parse_html_page_basic(top_message.body.content.rstrip()) + snippet = snippet[:50] + "..." if len(snippet) > 50 else snippet + + return f"{first_poster} in {channel_name} about {thread_subject}: {snippet}" + + +def _convert_thread_to_document( + channel: Channel, + thread: list[ChatMessage], +) -> Document | None: + if len(thread) == 0: + return None + + most_recent_message_datetime: datetime | None = None + top_message = thread[0] + post_members_list: list[BasicExpertInfo] = [] + thread_text = "" + + sorted_thread = sorted(thread, key=get_created_datetime, reverse=True) + + if sorted_thread: + most_recent_message = sorted_thread[0] + most_recent_message_datetime = time_str_to_utc( + most_recent_message.properties["createdDateTime"] + ) + + for message in thread: + # add text and a newline + if message.body.content: + message_text = parse_html_page_basic(message.body.content) + thread_text += message_text + + # if it has a subject, that means its the top level post message, so grab its id, url, and subject + if message.properties["subject"]: + top_message = message + + # check to make sure there is a valid display name + if message.properties["from"]: + if message.properties["from"]["user"]: + if message.properties["from"]["user"]["displayName"]: + message_sender = message.properties["from"]["user"]["displayName"] + # if its not a duplicate, add it to the list + if message_sender not in [ + member.display_name for member in post_members_list + ]: + post_members_list.append( + BasicExpertInfo(display_name=message_sender) + ) + + # if there are no found post members, grab the members from the parent channel + if not post_members_list: + post_members_list = _extract_channel_members(channel) + + if not thread_text: + return None + + semantic_string = _construct_semantic_identifier(channel, top_message) + + post_id = top_message.properties["id"] + web_url = top_message.web_url + + doc = Document( + id=post_id, + sections=[Section(link=web_url, text=thread_text)], + source=DocumentSource.TEAMS, + semantic_identifier=semantic_string, + title="", # teams threads don't really have a "title" + doc_updated_at=most_recent_message_datetime, + primary_owners=post_members_list, + metadata={}, + ) + return doc + + +class TeamsConnector(LoadConnector, PollConnector): + def __init__( + self, + batch_size: int = INDEX_BATCH_SIZE, + teams: list[str] = [], + ) -> None: + self.batch_size = batch_size + self.graph_client: GraphClient | None = None + self.requested_team_list: list[str] = teams + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + teams_client_id = credentials["teams_client_id"] + teams_client_secret = credentials["teams_client_secret"] + teams_directory_id = credentials["teams_directory_id"] + + def _acquire_token_func() -> dict[str, Any]: + """ + Acquire token via MSAL + """ + authority_url = f"https://login.microsoftonline.com/{teams_directory_id}" + app = msal.ConfidentialClientApplication( + authority=authority_url, + client_id=teams_client_id, + client_credential=teams_client_secret, + ) + token = app.acquire_token_for_client( + scopes=["https://graph.microsoft.com/.default"] + ) + return token + + self.graph_client = GraphClient(_acquire_token_func) + return None + + def _get_all_teams(self) -> list[Team]: + if self.graph_client is None: + raise ConnectorMissingCredentialError("Teams") + + teams_list: list[Team] = [] + + teams = self.graph_client.teams.get().execute_query() + + if len(self.requested_team_list) > 0: + adjusted_request_strings = [ + requested_team.replace(" ", "") + for requested_team in self.requested_team_list + ] + teams_list = [ + team + for team in teams + if team.display_name.replace(" ", "") in adjusted_request_strings + ] + else: + teams_list.extend(teams) + + return teams_list + + def _fetch_from_teams( + self, start: datetime | None = None, end: datetime | None = None + ) -> GenerateDocumentsOutput: + if self.graph_client is None: + raise ConnectorMissingCredentialError("Teams") + + teams = self._get_all_teams() + + channels = _get_channels_from_teams( + teams=teams, + ) + + # goes over channels, converts them into Document objects and then yields them in batches + doc_batch: list[Document] = [] + for channel in channels: + thread_list = _get_threads_from_channel(channel, start=start, end=end) + for thread in thread_list: + converted_doc = _convert_thread_to_document(channel, thread) + if converted_doc: + doc_batch.append(converted_doc) + + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + yield doc_batch + + def load_from_state(self) -> GenerateDocumentsOutput: + return self._fetch_from_teams() + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + start_datetime = datetime.utcfromtimestamp(start) + end_datetime = datetime.utcfromtimestamp(end) + return self._fetch_from_teams(start=start_datetime, end=end_datetime) + + +if __name__ == "__main__": + connector = TeamsConnector(teams=os.environ["TEAMS"].split(",")) + + connector.load_credentials( + { + "teams_client_id": os.environ["TEAMS_CLIENT_ID"], + "teams_client_secret": os.environ["TEAMS_CLIENT_SECRET"], + "teams_directory_id": os.environ["TEAMS_CLIENT_DIRECTORY_ID"], + } + ) + document_batches = connector.load_from_state() + print(next(document_batches)) diff --git a/web/public/Teams.png b/web/public/Teams.png new file mode 100644 index 000000000..c8479db69 Binary files /dev/null and b/web/public/Teams.png differ diff --git a/web/src/app/admin/connectors/sharepoint/page.tsx b/web/src/app/admin/connectors/sharepoint/page.tsx index 39628b6a0..bf9704154 100644 --- a/web/src/app/admin/connectors/sharepoint/page.tsx +++ b/web/src/app/admin/connectors/sharepoint/page.tsx @@ -78,7 +78,7 @@ const MainSection = () => { const sharepointCredential: Credential | undefined = credentialsData.find( - (credential) => credential.credential_json?.aad_client_id + (credential) => credential.credential_json?.sp_client_id ); return ( @@ -98,7 +98,7 @@ const MainSection = () => {
Existing Azure AD Client ID: - {sharepointCredential.credential_json.aad_client_id} + {sharepointCredential.credential_json.sp_client_id} +
+ + ) : ( + <> + + As a first step, please provide Application (client) ID, Directory + (tenant) ID, and Client Secret. You can follow the guide{" "} + + here + {" "} + to create an Azure AD application and obtain these values. + + + + formBody={ + <> + + + + + } + validationSchema={Yup.object().shape({ + teams_client_id: Yup.string().required( + "Please enter your Application (client) ID" + ), + teams_directory_id: Yup.string().required( + "Please enter your Directory (tenant) ID" + ), + teams_client_secret: Yup.string().required( + "Please enter your Client Secret" + ), + })} + initialValues={{ + teams_client_id: "", + teams_directory_id: "", + teams_client_secret: "", + }} + onSubmit={(isSuccess) => { + if (isSuccess) { + refreshCredentials(); + } + }} + /> + + + )} + + + Step 2: Manage Teams Connector + + + {teamsConnectorIndexingStatuses.length > 0 && ( + <> + + The latest messages from the specified teams are fetched every 10 + minutes. + +
+ + connectorIndexingStatuses={teamsConnectorIndexingStatuses} + liveCredential={teamsCredential} + getCredential={(credential) => + credential.credential_json.teams_directory_id + } + onUpdate={() => + mutate("/api/manage/admin/connector/indexing-status") + } + onCredentialLink={async (connectorId) => { + if (teamsCredential) { + await linkCredential(connectorId, teamsCredential.id); + mutate("/api/manage/admin/connector/indexing-status"); + } + }} + specialColumns={[ + { + header: "Connectors", + key: "connectors", + getValue: (ccPairStatus) => { + const connectorConfig = + ccPairStatus.connector.connector_specific_config; + return `${connectorConfig.teams}`; + }, + }, + ]} + includeName + /> +
+ + )} + + {teamsCredential ? ( + + + nameBuilder={(values) => + values.teams && values.teams.length > 0 + ? `Teams-${values.teams.join("-")}` + : "Teams" + } + ccPairNameBuilder={(values) => + values.teams && values.teams.length > 0 + ? `Teams-${values.teams.join("-")}` + : "Teams" + } + source="teams" + inputType="poll" + // formBody={<>} + formBodyBuilder={TextArrayFieldBuilder({ + name: "teams", + label: "Teams:", + subtext: + "Specify 0 or more Teams to index. " + + "For example, specifying the Team 'Support' for the 'danswerai' Org will cause " + + "us to only index messages sent in channels belonging to the 'Support' Team. " + + "If no Teams are specified, all Teams in your organization will be indexed.", + })} + validationSchema={Yup.object().shape({ + teams: Yup.array() + .of(Yup.string().required("Team names must be strings")) + .required(), + })} + initialValues={{ + teams: [], + }} + credentialId={teamsCredential.id} + refreshFreq={10 * 60} // 10 minutes + /> + + ) : ( + + Please provide all Azure info in Step 1 first! Once you're done + with that, you can then specify which teams you want to make + searchable. + + )} + + ); +}; + +export default function Page() { + return ( +
+
+ +
+ + } title="Teams" /> + + +
+ ); +} diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 2c9928e31..04d003a59 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -53,6 +53,7 @@ import googleSitesIcon from "../../../public/GoogleSites.png"; import zendeskIcon from "../../../public/Zendesk.svg"; import dropboxIcon from "../../../public/Dropbox.png"; import sharepointIcon from "../../../public/Sharepoint.png"; +import teamsIcon from "../../../public/Teams.png"; import mediawikiIcon from "../../../public/MediaWiki.svg"; import wikipediaIcon from "../../../public/Wikipedia.svg"; import discourseIcon from "../../../public/Discourse.png"; @@ -538,6 +539,18 @@ export const SharepointIcon = ({ ); +export const TeamsIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => ( +
+ Logo +
+); + export const GongIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index af173e614..597f45f4d 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -22,6 +22,7 @@ import { ProductboardIcon, RequestTrackerIcon, SharepointIcon, + TeamsIcon, SlabIcon, SlackIcon, ZendeskIcon, @@ -164,6 +165,11 @@ const SOURCE_METADATA_MAP: SourceMap = { displayName: "Sharepoint", category: SourceCategory.AppConnection, }, + teams: { + icon: TeamsIcon, + displayName: "Teams", + category: SourceCategory.AppConnection, + }, discourse: { icon: DiscourseIcon, displayName: "Discourse", diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index a451b6897..3f8910230 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -44,6 +44,7 @@ export type ValidSources = | "loopio" | "dropbox" | "sharepoint" + | "teams" | "zendesk" | "discourse" | "axero" @@ -127,6 +128,10 @@ export interface SharepointConfig { sites?: string[]; } +export interface TeamsConfig { + teams?: string[]; +} + export interface DiscourseConfig { base_url: string; categories?: string[]; @@ -136,6 +141,10 @@ export interface AxeroConfig { spaces?: string[]; } +export interface TeamsConfig { + teams?: string[]; +} + export interface ProductboardConfig {} export interface SlackConfig { @@ -366,9 +375,15 @@ export interface DropboxCredentialJson { } export interface SharepointCredentialJson { - aad_client_id: string; - aad_client_secret: string; - aad_directory_id: string; + sp_client_id: string; + sp_client_secret: string; + sp_directory_id: string; +} + +export interface TeamsCredentialJson { + teams_client_id: string; + teams_client_secret: string; + teams_directory_id: string; } export interface DiscourseCredentialJson {