mirror of
https://github.com/danswer-ai/danswer.git
synced 2025-05-30 09:40:35 +02:00
Clean up connector
This commit is contained in:
parent
ee4b334a0a
commit
95d9b33c1a
@ -23,7 +23,24 @@ _FIREFLIES_ID_PREFIX = "FIREFLIES_"
|
|||||||
|
|
||||||
_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"
|
_FIREFLIES_API_URL = "https://api.fireflies.ai/graphql"
|
||||||
|
|
||||||
_FIREFLIES_TRANSCRIPT_PAGE_SIZE = 30
|
_FIREFLIES_TRANSCRIPT_QUERY_SIZE = 50 # Max page size is 50
|
||||||
|
|
||||||
|
_FIREFLIES_API_QUERY = """
|
||||||
|
query Transcripts($fromDate: DateTime, $toDate: DateTime) {
|
||||||
|
transcripts(fromDate: $fromDate, toDate: $toDate) {
|
||||||
|
id
|
||||||
|
title
|
||||||
|
host_email
|
||||||
|
participants
|
||||||
|
date
|
||||||
|
transcript_url
|
||||||
|
sentences {
|
||||||
|
text
|
||||||
|
speaker_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
||||||
@ -45,11 +62,13 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
|||||||
meeting_date_unix = transcript.get("date", "")
|
meeting_date_unix = transcript.get("date", "")
|
||||||
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
|
meeting_date = datetime.fromtimestamp(meeting_date_unix / 1000, tz=timezone.utc)
|
||||||
|
|
||||||
meeting_host_email = [BasicExpertInfo(email=transcript.get("host_email", ""))]
|
meeting_host_email = transcript.get("host_email", "")
|
||||||
|
host_email_user_info = BasicExpertInfo(email=meeting_host_email)
|
||||||
|
|
||||||
meeting_participants_emails = []
|
meeting_participants_email_list = []
|
||||||
for participant in transcript.get("participants", []):
|
for participant in transcript.get("participants", []):
|
||||||
meeting_participants_emails.append(BasicExpertInfo(email=participant))
|
if participant != meeting_host_email:
|
||||||
|
meeting_participants_email_list.append(BasicExpertInfo(email=participant))
|
||||||
|
|
||||||
return Document(
|
return Document(
|
||||||
id=fireflies_id,
|
id=fireflies_id,
|
||||||
@ -63,8 +82,8 @@ def _create_doc_from_transcript(transcript: dict) -> Document | None:
|
|||||||
semantic_identifier=meeting_title,
|
semantic_identifier=meeting_title,
|
||||||
metadata={},
|
metadata={},
|
||||||
doc_updated_at=meeting_date,
|
doc_updated_at=meeting_date,
|
||||||
primary_owners=meeting_host_email,
|
primary_owners=host_email_user_info,
|
||||||
secondary_owners=meeting_participants_emails,
|
secondary_owners=meeting_participants_email_list,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -72,7 +91,7 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
|||||||
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None:
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
|
|
||||||
def load_credentials(self, credentials: dict[str, str | int]) -> None:
|
def load_credentials(self, credentials: dict[str, str]) -> None:
|
||||||
api_key = credentials.get("fireflies_api_key")
|
api_key = credentials.get("fireflies_api_key")
|
||||||
|
|
||||||
if not isinstance(api_key, str):
|
if not isinstance(api_key, str):
|
||||||
@ -82,8 +101,10 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
|||||||
|
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def _fetch_transcripts(
|
def _fetch_transcripts(
|
||||||
self, start: str | None = None, end: str | None = None
|
self, start_datetime: str | None = None, end_datetime: str | None = None
|
||||||
) -> Iterator[List[dict]]:
|
) -> Iterator[List[dict]]:
|
||||||
if self.api_key is None:
|
if self.api_key is None:
|
||||||
raise ConnectorMissingCredentialError("Missing API key")
|
raise ConnectorMissingCredentialError("Missing API key")
|
||||||
@ -95,36 +116,19 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
|||||||
|
|
||||||
skip = 0
|
skip = 0
|
||||||
variables: dict[str, int | str] = {
|
variables: dict[str, int | str] = {
|
||||||
"limit": _FIREFLIES_TRANSCRIPT_PAGE_SIZE,
|
"limit": _FIREFLIES_TRANSCRIPT_QUERY_SIZE,
|
||||||
}
|
}
|
||||||
|
|
||||||
if start:
|
if start_datetime:
|
||||||
variables["fromDate"] = start
|
variables["fromDate"] = start_datetime
|
||||||
if end:
|
if end_datetime:
|
||||||
variables["toDate"] = end
|
variables["toDate"] = end_datetime
|
||||||
|
|
||||||
query = """
|
|
||||||
query Transcripts($fromDate: DateTime, $toDate: DateTime) {
|
|
||||||
transcripts(fromDate: $fromDate, toDate: $toDate) {
|
|
||||||
id
|
|
||||||
title
|
|
||||||
host_email
|
|
||||||
participants
|
|
||||||
date
|
|
||||||
transcript_url
|
|
||||||
sentences {
|
|
||||||
text
|
|
||||||
speaker_name
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
_FIREFLIES_API_URL,
|
_FIREFLIES_API_URL,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
json={"query": query, "variables": variables},
|
json={"query": _FIREFLIES_API_QUERY, "variables": variables},
|
||||||
)
|
)
|
||||||
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@ -137,15 +141,12 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
|||||||
"transcripts", []
|
"transcripts", []
|
||||||
)
|
)
|
||||||
|
|
||||||
if not parsed_transcripts:
|
|
||||||
break
|
|
||||||
|
|
||||||
yield parsed_transcripts
|
yield parsed_transcripts
|
||||||
|
|
||||||
if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_PAGE_SIZE:
|
if len(parsed_transcripts) < _FIREFLIES_TRANSCRIPT_QUERY_SIZE:
|
||||||
break
|
break
|
||||||
|
|
||||||
skip += _FIREFLIES_TRANSCRIPT_PAGE_SIZE
|
skip += _FIREFLIES_TRANSCRIPT_QUERY_SIZE
|
||||||
variables["skip"] = skip
|
variables["skip"] = skip
|
||||||
|
|
||||||
def _process_transcripts(
|
def _process_transcripts(
|
||||||
@ -169,12 +170,12 @@ class FirefliesConnector(PollConnector, LoadConnector):
|
|||||||
return self._process_transcripts()
|
return self._process_transcripts()
|
||||||
|
|
||||||
def poll_source(
|
def poll_source(
|
||||||
self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch
|
self, start_unixtime: SecondsSinceUnixEpoch, end_unixtime: SecondsSinceUnixEpoch
|
||||||
) -> GenerateDocumentsOutput:
|
) -> GenerateDocumentsOutput:
|
||||||
start_datetime = datetime.fromtimestamp(start, tz=timezone.utc).strftime(
|
start_datetime = datetime.fromtimestamp(
|
||||||
"%Y-%m-%dT%H:%M:%S.000Z"
|
start_unixtime, tz=timezone.utc
|
||||||
)
|
).strftime("%Y-%m-%dT%H:%M:%S.000Z")
|
||||||
end_datetime = datetime.fromtimestamp(end, tz=timezone.utc).strftime(
|
end_datetime = datetime.fromtimestamp(end_unixtime, tz=timezone.utc).strftime(
|
||||||
"%Y-%m-%dT%H:%M:%S.000Z"
|
"%Y-%m-%dT%H:%M:%S.000Z"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -293,7 +293,7 @@ const SOURCE_METADATA_MAP: SourceMap = {
|
|||||||
fireflies: {
|
fireflies: {
|
||||||
icon: FirefliesIcon,
|
icon: FirefliesIcon,
|
||||||
displayName: "Fireflies",
|
displayName: "Fireflies",
|
||||||
category: SourceCategory.CustomerSupport,
|
category: SourceCategory.Other,
|
||||||
docs: "https://docs.danswer.dev/connectors/fireflies",
|
docs: "https://docs.danswer.dev/connectors/fireflies",
|
||||||
},
|
},
|
||||||
// currently used for the Internet Search tool docs, which is why
|
// currently used for the Internet Search tool docs, which is why
|
||||||
|
Loading…
x
Reference in New Issue
Block a user