Add discord daily test (#3676)

* Add discord daily test

* Fix mypy error
This commit is contained in:
skylares 2025-01-15 10:50:33 -05:00 committed by GitHub
parent c3481c7356
commit 993a1a6caf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 82 additions and 21 deletions

View File

@ -1,39 +1,47 @@
import json
import os
import time
from pathlib import Path
from typing import Any
import pytest
from onyx.connectors.discord.connector import DiscordConnector
from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
def load_test_data(file_name: str = "test_discord_data.json") -> dict[str, Any]:
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)
@pytest.fixture
def discord_connector() -> DiscordConnector:
server_ids: str | None = os.environ.get("server_ids", None)
channel_names: str | None = os.environ.get("channel_names", None)
connector = DiscordConnector(
server_ids=server_ids.split(",") if server_ids else [],
channel_names=channel_names.split(",") if channel_names else [],
start_date=os.environ.get("start_date", None),
)
connector.load_credentials(
{
"discord_bot_token": os.environ.get("DISCORD_BOT_TOKEN"),
}
)
connector = DiscordConnector()
connector.load_credentials(get_credentials())
return connector
@pytest.mark.skip(reason="Test Discord is not setup yet!")
def test_discord_poll_connector(discord_connector: DiscordConnector) -> None:
end = time.time()
start = end - 24 * 60 * 60 * 15 # 1 day
def get_credentials() -> dict[str, str]:
token = os.environ.get("DISCORD_BOT_TOKEN")
if token is None:
raise ValueError("DISCORD_BOT_TOKEN is not set")
return {"discord_bot_token": token}
def test_discord_connector_basic(discord_connector: DiscordConnector) -> None:
test_data = load_test_data()
target_doc_id = test_data["target_doc"]["id"]
target_doc: Document | None = None
all_docs: list[Document] = []
channels: set[str] = set()
threads: set[str] = set()
for doc_batch in discord_connector.poll_source(start, end):
for doc_batch in discord_connector.poll_source(0, time.time()):
for doc in doc_batch:
if "Channel" in doc.metadata:
assert isinstance(doc.metadata["Channel"], str)
@ -41,9 +49,30 @@ def test_discord_poll_connector(discord_connector: DiscordConnector) -> None:
if "Thread" in doc.metadata:
assert isinstance(doc.metadata["Thread"], str)
threads.add(doc.metadata["Thread"])
if doc.id == target_doc_id:
target_doc = doc
all_docs.append(doc)
# might change based on the channels and servers being used
assert len(all_docs) == 10
# Check all docs are returned, with the correct number of channels and threads
assert len(all_docs) == 8
assert len(channels) == 2
assert len(threads) == 2
assert len(threads) == 1
# Check that all the channels and threads are returned
assert channels == set(test_data["channels"])
assert threads == set(test_data["threads"])
# Check the target doc
assert target_doc is not None
assert target_doc.id == target_doc_id
assert target_doc.source == DocumentSource.DISCORD
assert target_doc.metadata["Thread"] == test_data["target_doc"]["Thread"]
assert target_doc.sections[0].link == test_data["target_doc"]["link"]
assert target_doc.sections[0].text == test_data["target_doc"]["text"]
assert (
target_doc.semantic_identifier == test_data["target_doc"]["semantic_identifier"]
)
# Ensure all the docs section data is returned correctly
assert {doc.sections[0].text for doc in all_docs} == set(test_data["texts"])
assert {doc.sections[0].link for doc in all_docs} == set(test_data["links"])

View File

@ -0,0 +1,32 @@
{
"target_doc": {
"id": "DISCORD_1328834962551603201",
"text": "Yes I have figured it out! Shall we discuss further in private? I have much to tell you.",
"link": "https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201",
"semantic_identifier": "onyx_test_user_1 said in Thread: Anyone figure out the perpetual motion: Yes I have figured it out! Sha...",
"Thread": "Anyone figure out the perpetual motion"
},
"channels": ["rules", "general"],
"threads": ["Anyone figure out the perpetual motion"],
"links": [
"https://discord.com/channels/1328437578021736479/1328437578021736482/1328443181779652718",
"https://discord.com/channels/1328437578021736479/1328437578021736482/1328443280685400105",
"https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201",
"https://discord.com/channels/1328437578021736479/1328443280685400105/1328835359529766974",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828040821604432",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828065874444359",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828097956548732",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828102536855644"
],
"texts": [
"Hello all! How's everyone doing today?",
"Anyone figure out the perpetual motion machine yet? I think we're getting close!",
"Yes I have figured it out! Shall we discuss further in private? I have much to tell you.",
"Absolutely! let's have continued dialog in a space such that none other than ourselves may be privy to the information revealed.",
"1.) Keep opinions to a minimum, we want only to ascertain the truth.",
"2.) Answer questions quickly and accurately.",
"3.) Respect one another.",
"4.) Have fun!"
]
}