mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-09-19 12:03:54 +02:00
Fireflies daily test (#3663)
* Init test files for fireflies * Finish creating daily test and update parsing of sections * Added comment
This commit is contained in:
@@ -30,13 +30,14 @@ _FIREFLIES_API_QUERY = """
|
||||
transcripts(fromDate: $fromDate, toDate: $toDate, limit: $limit, skip: $skip) {
|
||||
id
|
||||
title
|
||||
host_email
|
||||
organizer_email
|
||||
participants
|
||||
date
|
||||
transcript_url
|
||||
sentences {
|
||||
text
|
||||
speaker_name
|
||||
start_time
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -44,16 +45,34 @@ _FIREFLIES_API_QUERY = """
|
||||
|
||||
|
||||
def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
||||
meeting_text = ""
|
||||
sentences = transcript.get("sentences", [])
|
||||
if sentences:
|
||||
for sentence in sentences:
|
||||
meeting_text += sentence.get("speaker_name") or "Unknown Speaker"
|
||||
meeting_text += ": " + sentence.get("text", "") + "\n\n"
|
||||
else:
|
||||
return None
|
||||
sections: List[Section] = []
|
||||
current_speaker_name = None
|
||||
current_link = ""
|
||||
current_text = ""
|
||||
|
||||
meeting_link = transcript["transcript_url"]
|
||||
for sentence in transcript["sentences"]:
|
||||
if sentence["speaker_name"] != current_speaker_name:
|
||||
if current_speaker_name is not None:
|
||||
sections.append(
|
||||
Section(
|
||||
link=current_link,
|
||||
text=current_text.strip(),
|
||||
)
|
||||
)
|
||||
current_speaker_name = sentence.get("speaker_name") or "Unknown Speaker"
|
||||
current_link = f"{transcript['transcript_url']}?t={sentence['start_time']}"
|
||||
current_text = f"{current_speaker_name}: "
|
||||
|
||||
cleaned_text = sentence["text"].replace("\xa0", " ")
|
||||
current_text += f"{cleaned_text} "
|
||||
|
||||
# Sometimes these links (links with a timestamp) do not work, it is a bug with Fireflies.
|
||||
sections.append(
|
||||
Section(
|
||||
link=current_link,
|
||||
text=current_text.strip(),
|
||||
)
|
||||
)
|
||||
|
||||
fireflies_id = _FIREFLIES_ID_PREFIX + transcript["id"]
|
||||
|
||||
@@ -62,27 +81,22 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
||||
meeting_date_unix = transcript["date"]
|
||||
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
|
||||
|
||||
meeting_host_email = transcript["host_email"]
|
||||
host_email_user_info = [BasicExpertInfo(email=meeting_host_email)]
|
||||
meeting_organizer_email = transcript["organizer_email"]
|
||||
organizer_email_user_info = [BasicExpertInfo(email=meeting_organizer_email)]
|
||||
|
||||
meeting_participants_email_list = []
|
||||
for participant in transcript.get("participants", []):
|
||||
if participant != meeting_host_email and participant:
|
||||
if participant != meeting_organizer_email and participant:
|
||||
meeting_participants_email_list.append(BasicExpertInfo(email=participant))
|
||||
|
||||
return Document(
|
||||
id=fireflies_id,
|
||||
sections=[
|
||||
Section(
|
||||
link=meeting_link,
|
||||
text=meeting_text,
|
||||
)
|
||||
],
|
||||
sections=sections,
|
||||
source=DocumentSource.FIREFLIES,
|
||||
semantic_identifier=meeting_title,
|
||||
metadata={},
|
||||
doc_updated_at=meeting_date,
|
||||
primary_owners=host_email_user_info,
|
||||
primary_owners=organizer_email_user_info,
|
||||
secondary_owners=meeting_participants_email_list,
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user