Fireflies daily test (#3663)

* Init test files for fireflies

* Finish creating daily test and update parsing of sections

* Added comment
This commit is contained in:
skylares
2025-01-15 09:40:31 -05:00
committed by GitHub
parent 3b7695539f
commit c3481c7356
3 changed files with 164 additions and 20 deletions

View File

@@ -30,13 +30,14 @@ _FIREFLIES_API_QUERY = """
transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
id
title
host_email
organizer_email
participants
date
transcript_url
sentences {
text
speaker_name
start_time
}
}
}
@@ -44,16 +45,34 @@ _FIREFLIES_API_QUERY = """
def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_text = ""
sentences = transcript.get("sentences", [])
if sentences:
for sentence in sentences:
meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
meeting_text += ": " + sentence.get("text", "") + "\n\n"
else:
return None
sections: List[Section] = []
current_speaker_name = None
current_link = ""
current_text = ""
meeting_link = transcript["transcript_url"]
for sentence in transcript["sentences"]:
if sentence["speaker_name"] != current_speaker_name:
if current_speaker_name is not None:
sections.append(
Section(
link=current_link,
text=current_text.strip(),
)
)
current_speaker_name = sentence.get("speaker_name") or "Unknown Speaker"
current_link = f"{transcript['transcript_url']}?t={sentence['start_time']}"
current_text = f"{current_speaker_name}: "
cleaned_text = sentence["text"].replace("\xa0", " ")
current_text += f"{cleaned_text} "
# Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
sections.append(
Section(
link=current_link,
text=current_text.strip(),
)
)
fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
@@ -62,27 +81,22 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
meeting_date_unix = transcript["date"]
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
meeting_host_email = transcript["host_email"]
host_email_user_info = [BasicExpertInfo(email=meeting_host_email)]
meeting_organizer_email = transcript["organizer_email"]
organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]
meeting_participants_email_list = []
for participant in transcript.get("participants", []):
if participant != meeting_host_email and participant:
if participant != meeting_organizer_email and participant:
meeting_participants_email_list.append(BasicExpertInfo(email=participant))
return Document(
id=fireflies_id,
sections=[
Section(
link=meeting_link,
text=meeting_text,
)
],
sections=sections,
source=DocumentSource.FIREFLIES,
semantic_identifier=meeting_title,
metadata={},
doc_updated_at=meeting_date,
primary_owners=host_email_user_info,
primary_owners=organizer_email_user_info,
secondary_owners=meeting_participants_email_list,
)