Add discord daily test (#3676)

* Add discord daily test

* Fix mypy error
This commit is contained in:
skylares 2025-01-15 10:50:33 -05:00 committed by GitHub
parent c3481c7356
commit 993a1a6caf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 82 additions and 21 deletions

View File

@ -1,39 +1,47 @@
import json
import os import os
import time import time
from pathlib import Path
from typing import Any
import pytest import pytest
from onyx.connectors.discord.connector import DiscordConnector from onyx.connectors.discord.connector import DiscordConnector
from onyx.connectors.models import Document from onyx.connectors.models import Document
from onyx.connectors.models import DocumentSource
def load_test_data(file_name: str = "test_discord_data.json") -> dict[str, Any]:
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)
@pytest.fixture @pytest.fixture
def discord_connector() -> DiscordConnector: def discord_connector() -> DiscordConnector:
server_ids: str | None = os.environ.get("server_ids", None) connector = DiscordConnector()
channel_names: str | None = os.environ.get("channel_names", None) connector.load_credentials(get_credentials())
connector = DiscordConnector(
server_ids=server_ids.split(",") if server_ids else [],
channel_names=channel_names.split(",") if channel_names else [],
start_date=os.environ.get("start_date", None),
)
connector.load_credentials(
{
"discord_bot_token": os.environ.get("DISCORD_BOT_TOKEN"),
}
)
return connector return connector
@pytest.mark.skip(reason="Test Discord is not setup yet!") def get_credentials() -> dict[str, str]:
def test_discord_poll_connector(discord_connector: DiscordConnector) -> None: token = os.environ.get("DISCORD_BOT_TOKEN")
end = time.time() if token is None:
start = end - 24 * 60 * 60 * 15 # 1 day raise ValueError("DISCORD_BOT_TOKEN is not set")
return {"discord_bot_token": token}
def test_discord_connector_basic(discord_connector: DiscordConnector) -> None:
test_data = load_test_data()
target_doc_id = test_data["target_doc"]["id"]
target_doc: Document | None = None
all_docs: list[Document] = [] all_docs: list[Document] = []
channels: set[str] = set() channels: set[str] = set()
threads: set[str] = set() threads: set[str] = set()
for doc_batch in discord_connector.poll_source(start, end):
for doc_batch in discord_connector.poll_source(0, time.time()):
for doc in doc_batch: for doc in doc_batch:
if "Channel" in doc.metadata: if "Channel" in doc.metadata:
assert isinstance(doc.metadata["Channel"], str) assert isinstance(doc.metadata["Channel"], str)
@ -41,9 +49,30 @@ def test_discord_poll_connector(discord_connector: DiscordConnector) -> None:
if "Thread" in doc.metadata: if "Thread" in doc.metadata:
assert isinstance(doc.metadata["Thread"], str) assert isinstance(doc.metadata["Thread"], str)
threads.add(doc.metadata["Thread"]) threads.add(doc.metadata["Thread"])
if doc.id == target_doc_id:
target_doc = doc
all_docs.append(doc) all_docs.append(doc)
# might change based on the channels and servers being used # Check all docs are returned, with the correct number of channels and threads
assert len(all_docs) == 10 assert len(all_docs) == 8
assert len(channels) == 2 assert len(channels) == 2
assert len(threads) == 2 assert len(threads) == 1
# Check that all the channels and threads are returned
assert channels == set(test_data["channels"])
assert threads == set(test_data["threads"])
# Check the target doc
assert target_doc is not None
assert target_doc.id == target_doc_id
assert target_doc.source == DocumentSource.DISCORD
assert target_doc.metadata["Thread"] == test_data["target_doc"]["Thread"]
assert target_doc.sections[0].link == test_data["target_doc"]["link"]
assert target_doc.sections[0].text == test_data["target_doc"]["text"]
assert (
target_doc.semantic_identifier == test_data["target_doc"]["semantic_identifier"]
)
# Ensure all the docs section data is returned correctly
assert {doc.sections[0].text for doc in all_docs} == set(test_data["texts"])
assert {doc.sections[0].link for doc in all_docs} == set(test_data["links"])

View File

@ -0,0 +1,32 @@
{
"target_doc": {
"id": "DISCORD_1328834962551603201",
"text": "Yes I have figured it out! Shall we discuss further in private? I have much to tell you.",
"link": "https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201",
"semantic_identifier": "onyx_test_user_1 said in Thread: Anyone figure out the perpetual motion: Yes I have figured it out! Sha...",
"Thread": "Anyone figure out the perpetual motion"
},
"channels": ["rules", "general"],
"threads": ["Anyone figure out the perpetual motion"],
"links": [
"https://discord.com/channels/1328437578021736479/1328437578021736482/1328443181779652718",
"https://discord.com/channels/1328437578021736479/1328437578021736482/1328443280685400105",
"https://discord.com/channels/1328437578021736479/1328443280685400105/1328834962551603201",
"https://discord.com/channels/1328437578021736479/1328443280685400105/1328835359529766974",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828040821604432",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828065874444359",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828097956548732",
"https://discord.com/channels/1328437578021736479/1328442536066416760/1328828102536855644"
],
"texts": [
"Hello all! How's everyone doing today?",
"Anyone figure out the perpetual motion machine yet? I think we're getting close!",
"Yes I have figured it out! Shall we discuss further in private? I have much to tell you.",
"Absolutely! let's have continued dialog in a space such that none other than ourselves may be privy to the information revealed.",
"1.) Keep opinions to a minimum, we want only to ascertain the truth.",
"2.) Answer questions quickly and accurately.",
"3.) Respect one another.",
"4.) Have fun!"
]
}