From ceb34a41d949972b111b073009fb7ec48fe6b34c Mon Sep 17 00:00:00 2001 From: Ishankoradia <39583356+Ishankoradia@users.noreply.github.com> Date: Tue, 7 Jan 2025 04:24:22 +0530 Subject: [PATCH] discord connector (#3023) * discord: frontend and backend poll connector * added requirements for discord installation * fixed the mypy errors * process messages not part of any thread * minor change * updated the connector; this logic works & am able to docs when i print * minor change * ability to enter a start date to pull docs from and refactor * added the load connector and fixed mypy errors * local commit test done! * minor refactor and properly commented everything * updated the logic to handle permissions and index active/archived threads * basic discord test template * cleanup * going away with the danswer discord client class ; using an async context manager * moved to proper folder * minor fixes * needs improvement * fixed discord icon --------- Co-authored-by: hagen-danswer --- backend/onyx/configs/constants.py | 1 + backend/onyx/connectors/discord/__init__.py | 0 backend/onyx/connectors/discord/connector.py | 320 ++++++++++++++++++ backend/onyx/connectors/factory.py | 2 + backend/requirements/default.txt | 1 + .../discord/test_discord_connector.py | 49 +++ web/public/discord.png | Bin 0 -> 10150 bytes web/src/components/icons/icons.tsx | 15 + web/src/lib/connectors/connectors.tsx | 30 ++ web/src/lib/connectors/credentials.ts | 8 + web/src/lib/sources.ts | 7 + web/src/lib/types.ts | 1 + 12 files changed, 434 insertions(+) create mode 100644 backend/onyx/connectors/discord/__init__.py create mode 100644 backend/onyx/connectors/discord/connector.py create mode 100644 backend/tests/daily/connectors/discord/test_discord_connector.py create mode 100644 web/public/discord.png diff --git a/backend/onyx/configs/constants.py b/backend/onyx/configs/constants.py index 622dc4eadd..5a6ba4c6ed 100644 --- a/backend/onyx/configs/constants.py +++ b/backend/onyx/configs/constants.py @@ -142,6 +142,7 @@ class DocumentSource(str, Enum): OCI_STORAGE = "oci_storage" XENFORO = "xenforo" NOT_APPLICABLE = "not_applicable" + DISCORD = "discord" FRESHDESK = "freshdesk" FIREFLIES = "fireflies" EGNYTE = "egnyte" diff --git a/backend/onyx/connectors/discord/__init__.py b/backend/onyx/connectors/discord/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/backend/onyx/connectors/discord/connector.py b/backend/onyx/connectors/discord/connector.py new file mode 100644 index 0000000000..5bc0031f54 --- /dev/null +++ b/backend/onyx/connectors/discord/connector.py @@ -0,0 +1,320 @@ +import asyncio +from collections.abc import AsyncIterable +from collections.abc import Iterable +from datetime import datetime +from datetime import timezone +from typing import Any + +from discord import Client +from discord.channel import TextChannel +from discord.channel import Thread +from discord.enums import MessageType +from discord.flags import Intents +from discord.message import Message as DiscordMessage + +from onyx.configs.app_configs import INDEX_BATCH_SIZE +from onyx.configs.constants import DocumentSource +from onyx.connectors.interfaces import GenerateDocumentsOutput +from onyx.connectors.interfaces import LoadConnector +from onyx.connectors.interfaces import PollConnector +from onyx.connectors.interfaces import SecondsSinceUnixEpoch +from onyx.connectors.models import ConnectorMissingCredentialError +from onyx.connectors.models import Document +from onyx.connectors.models import Section +from onyx.utils.logger import setup_logger + +logger = setup_logger() + + +_DISCORD_DOC_ID_PREFIX = "DISCORD_" +_SNIPPET_LENGTH = 30 + + +def _convert_message_to_document( + message: DiscordMessage, + sections: list[Section], +) -> Document: + """ + Convert a discord message to a document + Sections are collected before calling this function because it relies on async + calls to fetch the thread history if there is one + """ + + metadata: dict[str, str | list[str]] = {} + semantic_substring = "" + + # Only messages from TextChannels will make it here but we have to check for it anyways + if isinstance(message.channel, TextChannel) and ( + channel_name := message.channel.name + ): + metadata["Channel"] = channel_name + semantic_substring += f" in Channel: #{channel_name}" + + # Single messages dont have a title + title = "" + + # If there is a thread, add more detail to the metadata, title, and semantic identifier + if isinstance(message.channel, Thread): + # Threads do have a title + title = message.channel.name + + # If its a thread, update the metadata, title, and semantic_substring + metadata["Thread"] = title + + # Add more detail to the semantic identifier if available + semantic_substring += f" in Thread: {title}" + + snippet: str = ( + message.content[:_SNIPPET_LENGTH].rstrip() + "..." + if len(message.content) > _SNIPPET_LENGTH + else message.content + ) + + semantic_identifier = f"{message.author.name} said{semantic_substring}: {snippet}" + + return Document( + id=f"{_DISCORD_DOC_ID_PREFIX}{message.id}", + source=DocumentSource.DISCORD, + semantic_identifier=semantic_identifier, + doc_updated_at=message.edited_at, + title=title, + sections=sections, + metadata=metadata, + ) + + +async def _fetch_filtered_channels( + discord_client: Client, + server_ids: list[int] | None, + channel_names: list[str] | None, +) -> list[TextChannel]: + filtered_channels: list[TextChannel] = [] + + for channel in discord_client.get_all_channels(): + if not channel.permissions_for(channel.guild.me).read_message_history: + continue + if not isinstance(channel, TextChannel): + continue + if server_ids and len(server_ids) > 0 and channel.guild.id not in server_ids: + continue + if channel_names and channel.name not in channel_names: + continue + filtered_channels.append(channel) + + logger.info(f"Found {len(filtered_channels)} channels for the authenticated user") + return filtered_channels + + +async def _fetch_documents_from_channel( + channel: TextChannel, + start_time: datetime | None, + end_time: datetime | None, +) -> AsyncIterable[Document]: + # Discord's epoch starts at 2015-01-01 + discord_epoch = datetime(2015, 1, 1, tzinfo=timezone.utc) + if start_time and start_time < discord_epoch: + start_time = discord_epoch + + async for channel_message in channel.history( + after=start_time, + before=end_time, + ): + # Skip messages that are not the default type + if channel_message.type != MessageType.default: + continue + + sections: list[Section] = [ + Section( + text=channel_message.content, + link=channel_message.jump_url, + ) + ] + + yield _convert_message_to_document(channel_message, sections) + + for active_thread in channel.threads: + async for thread_message in active_thread.history( + after=start_time, + before=end_time, + ): + # Skip messages that are not the default type + if thread_message.type != MessageType.default: + continue + + sections = [ + Section( + text=thread_message.content, + link=thread_message.jump_url, + ) + ] + + yield _convert_message_to_document(thread_message, sections) + + async for archived_thread in channel.archived_threads(): + async for thread_message in archived_thread.history( + after=start_time, + before=end_time, + ): + # Skip messages that are not the default type + if thread_message.type != MessageType.default: + continue + + sections = [ + Section( + text=thread_message.content, + link=thread_message.jump_url, + ) + ] + + yield _convert_message_to_document(thread_message, sections) + + +def _manage_async_retrieval( + token: str, + requested_start_date_string: str, + channel_names: list[str], + server_ids: list[int], + start: datetime | None = None, + end: datetime | None = None, +) -> Iterable[Document]: + # parse requested_start_date_string to datetime + pull_date: datetime | None = ( + datetime.strptime(requested_start_date_string, "%Y-%m-%d").replace( + tzinfo=timezone.utc + ) + if requested_start_date_string + else None + ) + + # Set start_time to the later of start and pull_date, or whichever is provided + start_time = max(filter(None, [start, pull_date])) if start or pull_date else None + + end_time: datetime | None = end + + async def _async_fetch() -> AsyncIterable[Document]: + intents = Intents.default() + intents.message_content = True + async with Client(intents=intents) as discord_client: + asyncio.create_task(discord_client.start(token)) + await discord_client.wait_until_ready() + + filtered_channels: list[TextChannel] = await _fetch_filtered_channels( + discord_client=discord_client, + server_ids=server_ids, + channel_names=channel_names, + ) + + for channel in filtered_channels: + async for doc in _fetch_documents_from_channel( + channel=channel, + start_time=start_time, + end_time=end_time, + ): + yield doc + + def run_and_yield() -> Iterable[Document]: + loop = asyncio.new_event_loop() + try: + # Get the async generator + async_gen = _async_fetch() + # Convert to AsyncIterator + async_iter = async_gen.__aiter__() + while True: + try: + # Create a coroutine by calling anext with the async iterator + next_coro = anext(async_iter) + # Run the coroutine to get the next document + doc = loop.run_until_complete(next_coro) + yield doc + except StopAsyncIteration: + break + finally: + loop.close() + + return run_and_yield() + + +class DiscordConnector(PollConnector, LoadConnector): + def __init__( + self, + server_ids: list[str] = [], + channel_names: list[str] = [], + start_date: str | None = None, # YYYY-MM-DD + batch_size: int = INDEX_BATCH_SIZE, + ): + self.batch_size = batch_size + self.channel_names: list[str] = channel_names if channel_names else [] + self.server_ids: list[int] = ( + [int(server_id) for server_id in server_ids] if server_ids else [] + ) + self._discord_bot_token: str | None = None + self.requested_start_date_string: str = start_date or "" + + @property + def discord_bot_token(self) -> str: + if self._discord_bot_token is None: + raise ConnectorMissingCredentialError("Discord") + return self._discord_bot_token + + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: + self._discord_bot_token = credentials["discord_bot_token"] + return None + + def _manage_doc_batching( + self, + start: datetime | None = None, + end: datetime | None = None, + ) -> GenerateDocumentsOutput: + doc_batch = [] + for doc in _manage_async_retrieval( + token=self.discord_bot_token, + requested_start_date_string=self.requested_start_date_string, + channel_names=self.channel_names, + server_ids=self.server_ids, + start=start, + end=end, + ): + doc_batch.append(doc) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + if doc_batch: + yield doc_batch + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + return self._manage_doc_batching( + datetime.fromtimestamp(start, tz=timezone.utc), + datetime.fromtimestamp(end, tz=timezone.utc), + ) + + def load_from_state(self) -> GenerateDocumentsOutput: + return self._manage_doc_batching(None, None) + + +if __name__ == "__main__": + import os + import time + + end = time.time() + # 1 day + start = end - 24 * 60 * 60 * 1 + # "1,2,3" + server_ids: str | None = os.environ.get("server_ids", None) + # "channel1,channel2" + channel_names: str | None = os.environ.get("channel_names", None) + + connector = DiscordConnector( + server_ids=server_ids.split(",") if server_ids else [], + channel_names=channel_names.split(",") if channel_names else [], + start_date=os.environ.get("start_date", None), + ) + connector.load_credentials( + {"discord_bot_token": os.environ.get("discord_bot_token")} + ) + + for doc_batch in connector.poll_source(start, end): + for doc in doc_batch: + print(doc) diff --git a/backend/onyx/connectors/factory.py b/backend/onyx/connectors/factory.py index 8a08689c74..c7bbc3d708 100644 --- a/backend/onyx/connectors/factory.py +++ b/backend/onyx/connectors/factory.py @@ -12,6 +12,7 @@ from onyx.connectors.blob.connector import BlobStorageConnector from onyx.connectors.bookstack.connector import BookstackConnector from onyx.connectors.clickup.connector import ClickupConnector from onyx.connectors.confluence.connector import ConfluenceConnector +from onyx.connectors.discord.connector import DiscordConnector from onyx.connectors.discourse.connector import DiscourseConnector from onyx.connectors.document360.connector import Document360Connector from onyx.connectors.dropbox.connector import DropboxConnector @@ -101,6 +102,7 @@ def identify_connector_class( DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector, DocumentSource.OCI_STORAGE: BlobStorageConnector, DocumentSource.XENFORO: XenforoConnector, + DocumentSource.DISCORD: DiscordConnector, DocumentSource.FRESHDESK: FreshdeskConnector, DocumentSource.FIREFLIES: FirefliesConnector, DocumentSource.EGNYTE: EgnyteConnector, diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index d4bd46c6e9..3bd82522f8 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -8,6 +8,7 @@ celery==5.5.0b4 chardet==5.2.0 dask==2023.8.1 ddtrace==2.6.5 +discord.py==2.4.0 distributed==2023.8.1 fastapi==0.109.2 fastapi-users==12.1.3 diff --git a/backend/tests/daily/connectors/discord/test_discord_connector.py b/backend/tests/daily/connectors/discord/test_discord_connector.py new file mode 100644 index 0000000000..98d66e49ed --- /dev/null +++ b/backend/tests/daily/connectors/discord/test_discord_connector.py @@ -0,0 +1,49 @@ +import os +import time + +import pytest + +from onyx.connectors.discord.connector import DiscordConnector +from onyx.connectors.models import Document + + +@pytest.fixture +def discord_connector() -> DiscordConnector: + server_ids: str | None = os.environ.get("server_ids", None) + channel_names: str | None = os.environ.get("channel_names", None) + + connector = DiscordConnector( + server_ids=server_ids.split(",") if server_ids else [], + channel_names=channel_names.split(",") if channel_names else [], + start_date=os.environ.get("start_date", None), + ) + connector.load_credentials( + { + "discord_bot_token": os.environ.get("DISCORD_BOT_TOKEN"), + } + ) + return connector + + +@pytest.mark.skip(reason="Test Discord is not setup yet!") +def test_discord_poll_connector(discord_connector: DiscordConnector) -> None: + end = time.time() + start = end - 24 * 60 * 60 * 15 # 1 day + + all_docs: list[Document] = [] + channels: set[str] = set() + threads: set[str] = set() + for doc_batch in discord_connector.poll_source(start, end): + for doc in doc_batch: + if "Channel" in doc.metadata: + assert isinstance(doc.metadata["Channel"], str) + channels.add(doc.metadata["Channel"]) + if "Thread" in doc.metadata: + assert isinstance(doc.metadata["Thread"], str) + threads.add(doc.metadata["Thread"]) + all_docs.append(doc) + + # might change based on the channels and servers being used + assert len(all_docs) == 10 + assert len(channels) == 2 + assert len(threads) == 2 diff --git a/web/public/discord.png b/web/public/discord.png new file mode 100644 index 0000000000000000000000000000000000000000..fb39b322781be28568131b8a840cc6a12446a3fa GIT binary patch literal 10150 zcmd6NXH*m0zcwmIELahdswf2MFo{$VDT0BagGdvBNdPG-p;r+FDN+*JP?Zv@AiWAl zdLR@*I*JH}Lzgc74(Gq_ec$)P`~7CEti6Bbd7j_iGka!dCc)a8s%K8IouZ?oJA=Ki ztV>6CEdKCIe+=;`(iFmx=9<-gT@5-qPZS;9i&u1X+lcGMKXi01Vsvz~kLl=S5CF@w zq-vepNPt-z)W3(c2L}h!)6=Qsm4~>M`-ZECXHA0lE7BZs*45ObyGt9w9~zeGs>*b4 z{^Gok7!1x>je86;M~+`&LnS5tWTc}*nPHU`^xa1ml26C}luT^-8H)- zCkKw6>ev}?e^?G4oeAQ4uRXN&?I1~7n3mdgm*tcB!Om5L@_)tWu_(v!kSBK^7w6}8 z@LFGK?HH!#z9AV){E55VzpPZYVBI2CEpP62=k$@PA_w>rrMe)|+hVuZ<=YN5mE9!P z$W%YCCVyTVS1z`n7j(kdNA`4kk^=?v+DXi|cqLjmSKf-Tj6Wqaj5-`m{W*mDri9~L zO%crf$sbbZ{$Lnwtp|k8rln}GITvK_dz{X2cxtWS!D~iaPWLgC#v~Y}ZLe|#=KvzQ zD{7UXUj4whPAZ(H%k=pxxtuxDumQ)_zo{twtedI75PlY27gJoUnLsVj@0v0jx1qh6 zO|5$IHCs<*088@FSryp%j1AA;r>a5%qq-^sCrL&^G3_bcLJGN=k=wnn+*}job}n-a zUPV+XvVI_Kwni~>E80{zMkyWUZu5IEsq6agx&nDGqx9;p`}^DR9AVxaWrV28k*HU zvdB5X*jq)}6-cgF(+MVRbrc4cNgDvuBvsPZ8y$1s1ZwEyv#Aekvf|SuLz47y+tbrp zQ8Sncbc~i89`@Cs*-h-S0-=#?uJ0yds$|4vO{C`fx+7<9m3kj^uC|+|#wROPNETcp zv@?pvuBU&Q3X#sBc)uD++7v;P9}YCCTvLH9G|0^bZx;;l`b8K_5)j(<*uV!LYb(z` zugXuV{~3ZiAZT$-=>d69DjItqua z=r2czi}}mb+VZ!k2rvXiDD@Z(1&amBr(00yC+$DJ;eAE|s6WpcF^{HKLEECBXsgfn zIrX|3 zI6P8KWFBo_%(C9=+zZ`4;Ao#&aPr+b)uciDd(d>D1?rWIlThl?x-q0~&fzjDr1n1IxGQ&!-(oy8oV+QM`JXxMQibbL-bfV}CRBxkXoBf8wcv8xsk*msriyX&u-+ zb3M6Z+tD+RrY0v7Wv&>cey*l4QfR;Y>T}2Yc!g3GK*PL(yfc`XN?Ax_+TZCmA;Q^n zXkxaw?-mokUl@M>F&cvwt6%!uM{g`+1mjQtq)&tl?{FdKbyJ#RIik3 zMP#it>veg8>&P(wJ%U~^CK#I!OpU!EGVe+rbqj{Lh>@UsgMItg$ zn3qsm?Ar|HLm!W8y+1v|ly2M9*0iQ|+Y++e=3_JIJ{6fmux>tU^Qui*Sl$0Tl5z5i z#LTZa#M{KB?REjFiujJVtw|Sk#z&4_5K$e9*d@j9zn|d3`Qx9n^~qzhRbEgJoEPnf zPSSFA#t^_oi?ce`vBAc%VW=AFZ^`n{>}GO_MyTT`C|eH?S6aLZFOW|(L_IN513D1Z zH!{Csg8ff&&!!k4IT;uuoMi+r|73W_?L%>3v66b7=@^q`!hLvy)ayGH(^QQS&t?4h zJ0^^Xz{H~lyHzO4By3od!F>Y~L1O-0;@)~TH+ z!o_1GWo1)FP9X$)UB6LeHJji!^X;OqxN|mW58MwtRd+>3AC*9LrZ;Eg6wsUAZ;>Ti zATuGsonVM6xPlFqS&coU1RUoqiNVaI3`{=o1w!G59RKEFN@VP{tZ)&J0Wq*`6}nxS z1YhaAxoV!gGrGu8!!lw*-fNPms*E-}Zs^C4`|;B`ai?Z6<)h%J5@fk0C}$58iHb^F z7?uaMA;_c&*JEd4nN7pJBNz~o0M0uf_`X1x8R@^(U%kde@L*h9N)l`PT2=o~#5;?q z;9sJbjW5GCfRWf+yyyS4GMj_D+&0G%8`fsb`nv}LF8q#vS%L6?*E&SU!1V(kQ zGn(W@VCa;m1MZZjD$sHFd;W4%Fp9*P45A8v$JRC&J!Cd0-PQu|!+2Wj+>z_&5CQa$ zb-Jh^kXmg0&QJHYKM1l;{XoA(7^N%uqJm$~2>q!={Ssl?OBIT*v7W)DQK&CriD&=0 zZLDHJR_L?o($s958M$k(%=adjR)BOzlYu7u+ntSSgwh138}@9)tPgeGkEI?b#0K*R zn2!B}n;jdUY5lp6iO)M!R*LFqidK*q8}}f@1n+A}e2p zRc2)HegqwLFZL4fJT8--?IIk15OpLNp;PWze{ljS=f=@E!_Yw-(s^0HpnQ#KRUUZe zUgmCqqWQggcM?iN3(}YE)34`J{Rn<1#eV!ox(GVsH>)Vq-c|nmdcL~ylwgz|V`oyj zs*?t)t3FggwWA3yB0gmJ{rVw)pbgBsEpMsqXQhr`d`eemgIt0SZUo;ytkAOd+)aJd zW%OU-8!r@bh}MZ6o97=vXFTuo2x4$EsDse~#LoFn>ywnu!QjkzQD|H3K}&1C%Uc# zM_YvUNuh<$m)zmJjwAC9 zZm+i4uQT<{162%dO-IDuV~!k>hMbKI?#kEJpAlv9QhAl8xe82QZtX$*)!U+)>1e^2 zntJ5^gcnnq6h;!Jj>57HCMegEP+V(y9X#(MD(-dhRk#V3ERde+r*|fU*gSBP{HTHQBD@^8@I+1%iC%$I&c4B9XIzGkL zRqq51_n?0Nw`#vZQSWbl zOVy=*an&Wu%5z@~@ZnKdFOl3^@Lzcv{rY)8o>ic0pB6q4@tSp%``SrZm64riu9a+e zKGiexfmQ}NpL}pBrkHyhWwC_+bDK{db%|lH+4lPfkKE-B z2@*KwW8eK9fD_3rMyREM+89G;GRd~2;Q5(Jeu@buv)$giq?3mfIFxb}TDh#TAVhLu zynP8Yhq0mfNv;+>Vwp|C8Ba$*ItZQwNQKl_@|7Excu0u1QOOZ7N|yf(#znSQfambjVjI(Fi)37h#W?suE0vN$C*fxq|Q5uDB_7CksIf3s7q_}Ny4z-qeaRke+eC6 zEA}QC*)OQ==rl`{zEp}l*5qRX-wjKL{m33#DPZsOIcKGaGGQ2H1gs#`L*+qkWIS80 z98&F2sA?;ei0zFFSa0MN#FkU-lAx8&eaRw#5_EJDeD1Dqzn(7!qicw_iBozaTwX=i zMDO){5?KBWzj1n-C#S=|qo^=O$-=a1`X)&*0VOhe(Vo|ki+ygIXu57c$3wE~y8CSs zcQIn_G8gKZW-)Gj_#SFd%WknL+mKlPWle@xoHUhdoeZJ=S<*_kLJk(T!@ruTEk9mU zAUjW;GL?X~xd{;^64cNAsN$_LfJV$!_(=0lq%=?@+k5V2s6qN37(^LlUVc{A022AnHMi+w zhVk!mzb--mOs=Rm4_*-=*>=5qPr@4Hn9V?di}fI`xJiMZWb=^4d4}Rl%#p1TA?5J7 zW|6R8#m=`w?M+LPClIA=9VA`X#oDUTU2VnL+7Rc|f8JUV#t7^8xG1BsB5&3p6cND_!UuRZIT4L5Tf;5hyoJQ|BzX?iwZ0}kwY5P;zg z+JY%+uI2$Dw>-hnU0UQwoUISahvC>0cOKg!rAux_FCGyrRkw3CJ%i=n#I2#**F zQ^S6qQCG@TKJ}03bISvO)r%Op)up0N8$B|?0-G9sSTINFQA~*x7kf4!*u5rb0$E9) zJXRpbQb`lhwkc03g%s-BSgPCMA)UST5HTss>4zpV zlaQl6URsni$I4&{U*cUY)qt}+V0*XbzV_XsQ5EI3u>-DaWbC5v)sZgf;#9i?Zs0U9 zoPv2DgYEhHl#ut!k}{SJhMao+kc@Aq41RA71<5DqQ*Lp`QZ^$0)Pi*t)L)dbsn72> zWkf3VM)5KIJ>-Gk+I_oxAHjdcsh1 zpad9JeD_HvkS2LwfMiG9`LMZMo3fQCNMCYcW@d@)IuR2mlZ>3mnob&->vJE?{WgtI zqf3-|Gh|s5XJ`tL9MV6JS~+(kG}mIWF@ny#Bs&3zN2u{7ioM%A5|%JNAES#hxubRy z&H(&oHF}Pp1o)S1ME&MD7uL1k&tp+K$>ftbrv<0{owwnAz;J>?Z`6L7&nCs z@BGcHIZ>NG&s$GV-r*ym43i=^FJJk!s^e{tbOKJ$M19e3-Qk~#&;GNr?9!Bi#cS}9 zay4tBC{uIPu8yfNg=7k(+i&X}@qQlzug9pNhO0Thp_{!4W2?Y$Ec%R_01U}16c7H8 zmMx+2{o(0T(FF=L*b@rtN12j%OkVkn&R)I1Y-K%=-{|{#nTPb$qjuj@OxhS_B(Cij z46Qgg<&Qry=-?&P=8G7k%*5Y~By<`#2H%7yCnJi0lc5UgJS54h){qlt8E55}HXfnl z`lD2-9eMoVYzElHg*@8|i^2XmoiT!M899jJ*89?yA#@y5=(!>Ro4Ek|q%zMdUU!QC z{?H?xUNvE%aM=~u?G&8R{3Emd)Z35>%BAaln(WH$yM}srD&IW{Q zlu;-lAd>`m`02W4u=17IV3m650gpuL;0RAD)dF#mDz9Yg)q=Ebjr$Z1m4xR&u&Und z(^_!RbXQObFE15L!g4z88lRm@`#4dR>}T8!ghwcYi{`V+6f3;en$pRQX`$etC2tXR99I*z6`Fp{v5+xsK? zVO+f@8Mktf4NAX>g0D^6%dvAXZ~m>UNx`h!hfyy{4=PirI@dCyL6^5tFwf(@@SqN8 zsiQ9COq(%a$9ZG5{?d^=yD)I0&VJ8H;+h>Lzn@^ECqZNCu~U;^M`$mmA}Ga4qoucOS#XQSZK!uJK-!0IJx+y@bJ0BumR;d4a` zw%uWqxr-;iH2&rdnk^)YbZvVMZ?`P)Il; zl=<${b{iFTOKpJeI7&T>f@?R?@3a+0kM z`VVbF(KJS^Z0S^Mt%$ig`Aq&}dHRhrL3?aqaw>y{G%KOO!)o`^F3u*|6?N1z zJQFWdr8qBoFx>w9Ni|xyd#rRp;Csk1IxA6%n=p5m7XE_Dc?6a{3b@fjiznK~E<{${ zxJFRDOY_zK?;et}_q!KwDMy&J5z<^bVb4B%?k4PY6xckbMoYPySanq!L2q>a3ztgy zc;nDSwC+;=WqWqYUiI;FPlWRSXf;Jsg_e2)+!3WQZi3{^e{~U>O{9n*3elsds`01- z0&tHF?g)?!|0hfA#Z{yn6pnWG)GKj*y&?6#{ywvckx-*Os!W3xkNB((U31&-+dc;FQDm!fu7PO~i%LO4e*0&W2b(OXX! zKi)m@pCM;AMpONq7c1}53IEF^C(h3_{X;T1kh(_)%clAN+x)yov%1ANXzWNNCm1KG zP{w%r5FJTMDVQ`f%Cm(l5Azc|mSnz^VnVLwZz=3}+so3D&V>fH>J~oa@0WUy$FXE~ z^I9EPsh~!?v5D(nBa%nvO>k0h_AQk`J|p&txgG7fk2|QgFmK;@MR(lQ zv#p#X_D@lIvc7iYdMP~sE-#%ci5VWA+HNs*7Wn`cgp}vefOh>s%V>{3Wrdm_o`?no zfn-mFzk91@XcC{+VgXf-r~4 zNUPcgcgav_=YzN)L%3e5Za9xx*e`Y#t}Zs;vSVJX3=J&#B=-zVy7|;~KtrE$ts!7{#_ksH z>a$9F&!%C6q982HF}H{8bE+pW=)Z=ct{5=z`{1YeC=V$|m!_Pxbum8l-p}ACc!;Za zvu@2plWe4d-JfRaxfNG3{>Fl>CCniUcNn@{=ULR8TmH$QdAA$AwJDzbk2K84?0`Oy z6^SsJ>(AXV?OZ>47PtL0J)yA{3=Dbj4Uc<(TQACK7(PoN5k9?Gs|U?QC{Vms($bCr zIrp2U{C!x!^%1Rb&EbKUj8HrBt9Ep*mMT&e2lV?Mv zVYA{$~%+~eQIdww?iv+ zQ>?myAIxIAEE0z+o)@(_TL4J6@TGS)hh2Cu=z42@eL^Jq#*QGP|0E{^k8 z(!SmLke_J?+>DdeJ4?nDbHx19K{K{aJKtNlz}E4L>rW)~+%^?>9z!Y7cvh{x5j!8m zpKv1~bKY)lRW~xipDwh6_JfakCMNc*RK&~?EsJ1-*pZ>W-%1!-@kkU zcAW3T+*3(5%c<4>H+mpIfoc6-Ko-Cxen%Of*JQ9i!TjBxyjJQ*3J43!x?$V>#uau>;{ z(@~VK_6f{hs&LS3+km^P-*mZz|8C6GNZ3HaX{MLZ3MSD9{R36fU0$#Z*6ZbRddd~( zJg>q?mmR(z39sLq=IOQ}teoiA1zeWrW#lUQJg;am3|Br0#Y^YjgM~sfo*_^_&ds0UU*y8#Q;b;u|5hK)<>n#gX(<-*|(-<@)To2+ef<{>+1`#n`T$ z=4id31_eCD-RgbubciB!r@y*?%QC|_IKXg+MJ71T>PP(+FU4au6@HWzMHxQH#-Jw- z>?n&%KA6wdAOGCyOf0IKm66xfk)}ZN^!>9WW&8^CIgWSo;+-feaS3nLwyPpb4L{s? z_vVr=nK5wp+k3gZZP!7yPJB5ctUBmKoXk}SsFqeOD^1x-5~`9>=(N1@k_ZZ_(Oy0p z%r?b237^f*<1p*Mrd0HO{dp(qF$|j6b~`zL;DJRyE>LM!`^9e@ZdU(Tj6|EPPvq3=WyC^NH~~1QsZnEm+;RXK@q94TD=ABzxz;)|yc?^_4`wT+P)&s9J?kDMVDjSI-H4<*&I~ z59Nk^9ISL|1IxJvPW_&Y0ZSPrzVplp|NDY(j2MhMcXYFprcQ>x*s(9YU5^?3xn=@T zru?gsz_ufxY6G+&k zJ3Q-$Qyb4W=C4nI$U^{p>@y!57SbJ=mt=i|6wH%vJxYM|qX2$~kY&VVPr4ab^2HX{ z%Xz*g^w434>2#bP19IAFA7j*CAya_GNEvyCkhD-=9F1F7GggQgK|bN;r|?}=XPL^| zYx0k&!BoCehx!@>?phU*0KL|qO=oz1@7QhYj z^)_K16f62$$!T}+nA)cmk@i}bD%)^sTz3)8~wqwy^iRdZ+%0HG)44y z=51m-Bz9f#KG^#~-Y%g-qg`mMTH(z4cqhquiLq4=`ol+8pcBDZ zk#UoOw<3K(QKq!^I=PxHqc@S+Q@V1avxF)J_}4_ScE3b1!p5>)V!op!Axf}tdYX^R zJQ1wc)nBql|J@Pl(pt-I;4Ly&GjOS51o~{qMw^#w$aI(qhxYf1KbhwpnEBh@gC>9TwT(Ei}wxyM~aEcNU zHLPQ*Y00RK5_O#bLc#wPf5`f`h@XGtlkVYyz(-aSh|TMK@1Zkf;f#~9a>5}E9UunW z5*8B|7L(8yza@halL5p9#l&R9#8^5F-u*uSho_J&-sArdz(+1{Ab^|yqu^}ofFn3t iI6V8mF#w_L{@^1lH7w{kxK literal 0 HcmV?d00001 diff --git a/web/src/components/icons/icons.tsx b/web/src/components/icons/icons.tsx index 484704e156..fb4dd96458 100644 --- a/web/src/components/icons/icons.tsx +++ b/web/src/components/icons/icons.tsx @@ -68,6 +68,7 @@ import zendeskIcon from "../../../public/Zendesk.svg"; import dropboxIcon from "../../../public/Dropbox.png"; import egnyteIcon from "../../../public/Egnyte.png"; import slackIcon from "../../../public/Slack.png"; +import discordIcon from "../../../public/Discord.png"; import airtableIcon from "../../../public/Airtable.svg"; import s3Icon from "../../../public/S3.png"; @@ -258,6 +259,20 @@ export const ColorSlackIcon = ({ ); }; +export const ColorDiscordIcon = ({ + size = 16, + className = defaultTailwindCSS, +}: IconProps) => { + return ( +
+ Logo +
+ ); +}; + export const LiteLLMIcon = ({ size = 16, className = defaultTailwindCSS, diff --git a/web/src/lib/connectors/connectors.tsx b/web/src/lib/connectors/connectors.tsx index d02504566a..84b8a71506 100644 --- a/web/src/lib/connectors/connectors.tsx +++ b/web/src/lib/connectors/connectors.tsx @@ -1031,6 +1031,36 @@ For example, specifying .*-support.* as a "channel" will cause the connector to ], advanced_values: [], }, + discord: { + description: "Configure Discord connector", + values: [], + advanced_values: [ + { + type: "list", + query: "Enter Server IDs to include:", + label: "Server IDs", + name: "server_ids", + description: `Specify 0 or more server ids to include. Only channels inside them will be used for indexing`, + optional: true, + }, + { + type: "list", + query: "Enter channel names to include:", + label: "Channels", + name: "channel_names", + description: `Specify 0 or more channels to index. For example, specifying the channel "support" will cause us to only index all content within the "#support" channel. If no channels are specified, all channels the bot has access to will be indexed.`, + optional: true, + }, + { + type: "text", + query: "Enter the Start Date:", + label: "Start Date", + name: "start_date", + description: `Only messages after this date will be indexed. Format: YYYY-MM-DD`, + optional: true, + }, + ], + }, freshdesk: { description: "Configure Freshdesk connector", values: [], diff --git a/web/src/lib/connectors/credentials.ts b/web/src/lib/connectors/credentials.ts index b1d1a18d89..bd9c5dfc85 100644 --- a/web/src/lib/connectors/credentials.ts +++ b/web/src/lib/connectors/credentials.ts @@ -195,6 +195,10 @@ export interface AxeroCredentialJson { axero_api_token: string; } +export interface DiscordCredentialJson { + discord_bot_token: string; +} + export interface FreshdeskCredentialJson { freshdesk_domain: string; freshdesk_password: string; @@ -335,6 +339,7 @@ export const credentialTemplates: Record = { web: null, not_applicable: null, ingestion_api: null, + discord: { discord_bot_token: "" } as DiscordCredentialJson, // NOTE: These are Special Cases google_drive: { google_tokens: "" } as GoogleDriveCredentialJson, @@ -368,6 +373,9 @@ export const credentialDisplayNames: Record = { // Slack slack_bot_token: "Slack Bot Token", + // Discord + discord_bot_token: "Discord Bot Token", + // Gmail and Google Drive google_tokens: "Google Oauth Tokens", google_service_account_key: "Google Service Account Key", diff --git a/web/src/lib/sources.ts b/web/src/lib/sources.ts index 664ffc839a..a9a323c09f 100644 --- a/web/src/lib/sources.ts +++ b/web/src/lib/sources.ts @@ -36,6 +36,7 @@ import { GoogleStorageIcon, ColorSlackIcon, XenforoIcon, + ColorDiscordIcon, FreshdeskIcon, FirefliesIcon, EgnyteIcon, @@ -80,6 +81,12 @@ export const SOURCE_METADATA_MAP: SourceMap = { docs: "https://docs.onyx.app/connectors/slack", oauthSupported: true, }, + discord: { + icon: ColorDiscordIcon, + displayName: "Discord", + category: SourceCategory.Messaging, + docs: "https://docs.onyx.app/connectors/discord", + }, gmail: { icon: GmailIcon, displayName: "Gmail", diff --git a/web/src/lib/types.ts b/web/src/lib/types.ts index a50e6f8f1d..cab013985d 100644 --- a/web/src/lib/types.ts +++ b/web/src/lib/types.ts @@ -314,6 +314,7 @@ export enum ValidSources { GoogleSites = "google_sites", Loopio = "loopio", Dropbox = "dropbox", + Discord = "discord", Salesforce = "salesforce", Sharepoint = "sharepoint", Teams = "teams",