Fireflies daily test (#3663)

* Init test files for fireflies

* Finish creating daily test and update parsing of sections

* Added comment
This commit is contained in:
skylares
2025-01-15 09:40:31 -05:00
committed by GitHub
parent 3b7695539f
commit c3481c7356
3 changed files with 164 additions and 20 deletions

View File

@@ -30,13 +30,14 @@ _FIREFLIES_API_QUERY = """
transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) { transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
id id
title title
host_email organizer_email
participants participants
date date
transcript_url transcript_url
sentences { sentences {
text text
speaker_name speaker_name
start_time
} }
} }
} }
@@ -44,16 +45,34 @@ _FIREFLIES_API_QUERY = """
def _create_doc_from_transcript(transcript: dict) -> Document | None: def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_text = "" sections: List[Section] = []
sentences = transcript.get("sentences", []) current_speaker_name = None
if sentences: current_link = ""
for sentence in sentences: current_text = ""
meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
meeting_text += ": " + sentence.get("text", "") + "\n\n"
else:
return None
meeting_link = transcript["transcript_url"] for sentence in transcript["sentences"]:
if sentence["speaker_name"] != current_speaker_name:
if current_speaker_name is not None:
sections.append(
Section(
link=current_link,
text=current_text.strip(),
)
)
current_speaker_name = sentence.get("speaker_name") or "Unknown Speaker"
current_link = f"{transcript['transcript_url']}?t={sentence['start_time']}"
current_text = f"{current_speaker_name}: "
cleaned_text = sentence["text"].replace("\xa0", " ")
current_text += f"{cleaned_text} "
# Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
sections.append(
Section(
link=current_link,
text=current_text.strip(),
)
)
fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"] fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
@@ -62,27 +81,22 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_date_unix = transcript["date"] meeting_date_unix = transcript["date"]
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc) meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
meeting_host_email = transcript["host_email"] meeting_organizer_email = transcript["organizer_email"]
host_email_user_info = [BasicExpertInfo(email=meeting_host_email)] organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]
meeting_participants_email_list = [] meeting_participants_email_list = []
for participant in transcript.get("participants", []): for participant in transcript.get("participants", []):
if participant != meeting_host_email and participant: if participant != meeting_organizer_email and participant:
meeting_participants_email_list.append(BasicExpertInfo(email=participant)) meeting_participants_email_list.append(BasicExpertInfo(email=participant))
return Document( return Document(
id=fireflies_id, id=fireflies_id,
sections=[ sections=sections,
Section(
link=meeting_link,
text=meeting_text,
)
],
source=DocumentSource.FIREFLIES, source=DocumentSource.FIREFLIES,
semantic_identifier=meeting_title, semantic_identifier=meeting_title,
metadata={}, metadata={},
doc_updated_at=meeting_date, doc_updated_at=meeting_date,
primary_owners=host_email_user_info, primary_owners=organizer_email_user_info,
secondary_owners=meeting_participants_email_list, secondary_owners=meeting_participants_email_list,
) )

View File

@@ -0,0 +1,62 @@
import json
import os
import time
from pathlib import Path
from typing import Any
import pytest
from onyx.configs.constants import DocumentSource
from onyx.connectors.fireflies.connector import FirefliesConnector
from onyx.connectors.models import Document
def load_test_data(file_name: str = "test_fireflies_data.json") -> dict[str, Any]:
current_dir = Path(__file__).parent
with open(current_dir / file_name, "r") as f:
return json.load(f)
@pytest.fixture
def fireflies_connector() -> FirefliesConnector:
connector = FirefliesConnector()
connector.load_credentials(get_credentials())
return connector
def get_credentials() -> dict[str, str]:
return {
"fireflies_api_key": os.environ["FIREFLIES_API_KEY"],
}
def test_fireflies_connector_basic(fireflies_connector: FirefliesConnector) -> None:
test_data = load_test_data()
connector_return_data: list[Document] = next(
fireflies_connector.poll_source(0, time.time())
)
target_doc: Document = connector_return_data[0]
assert target_doc is not None, "No documents were retrieved from the connector"
assert (
target_doc.primary_owners is not None
), "No primary owners were retrieved from the connector"
assert target_doc.id == test_data["id"]
assert target_doc.semantic_identifier == test_data["semantic_identifier"]
assert target_doc.primary_owners[0].email == test_data["primary_owners"]
assert target_doc.secondary_owners == test_data["secondary_owners"]
assert (
target_doc.source == DocumentSource.FIREFLIES
), "Document source is not fireflies"
assert target_doc.metadata == {}
# Check that the test data and the connector data contain the same section data
assert {section.text for section in target_doc.sections} == {
section["text"] for section in test_data["sections"]
}
assert {section.link for section in target_doc.sections} == {
section["link"] for section in test_data["sections"]
}

View File

@@ -0,0 +1,68 @@
{
"id": "FIREFLIES_VcBdZpuV82rImQCA",
"semantic_identifier": "Lead Generation Efforts",
"primary_owners": "admin@onyx-test.com",
"secondary_owners": [],
"sections": [
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=153.1",
"text": "test_user_1 1: Hey, David, thanks for taking the time today."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=158.14",
"text": "Test Admin Admin: Of course Sarah, It's nice to see you. Whenever you're ready."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=165.1",
"text": "test_user_1 1: All right then, David, let's jump right in. How are the lead generation efforts for the new product launch looking?"
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=171.084",
"text": "Test Admin Admin: So far we've seen a good initial response, but we're facing a slight challenge with qualifying leads. The sales team is getting inquiries. Some aren't quite aligned with our ideal customer profile."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=191.86",
"text": "test_user_1 1: That makes sense. Do you think we need to adjust our marketing messaging to better target the right audience?"
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=202.26",
"text": "Test Admin Admin: Absolutely. Maybe we could emphasize the key features that are most relevant to our target market in the marketing materials. What are your thoughts on refining the lead capture to gather more specific information?"
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=225.99",
"text": "test_user_1 1: I think that's a great idea. We could add additional qualifying questions to ensure we're capturing leads with the right needs."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=238.56",
"text": "Test Admin Admin: On another note, how are the social media campaigns performing? Are we seeing good engagement with the new product launch post?"
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=257.2",
"text": "test_user_1 1: The engagement is positive, but we could potentially increase increase reach further with targeted ad campaigns and key platforms."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=268.91",
"text": "Test Admin Admin: Agreed. Let's discuss a strategy to develop targeted ads that focus on the pain points our ideal customers are facing and how our product solves them."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=270.27",
"text": "test_user_1 1: We can collaborate on creating specific ad copy that highlights these benefits."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=289.06",
"text": "Test Admin Admin: All right, so to summarize, let's prioritize refining the lead capture form, develop targeted social media ads, and make sure our marketing method clearly aligns with our ideal customer profile."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=303.38",
"text": "test_user_1 1: Yep. And let's schedule a follow up meeting in a week, review progress and discuss any adjustments."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=310.9",
"text": "Test Admin Admin: Sounds good. I'll send you address updated lead form by the end of the day. Thanks, Sarah."
},
{
"link": "https://app.fireflies.ai/view/VcBdZpuV82rImQCA?t=319.19",
"text": "test_user_1 1: Thank you David."
}
]
}